From 24d1df5bc803d30a048ac5ded353f8506bea8e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Alves?= Date: Tue, 24 Mar 2026 16:59:43 -0300 Subject: [PATCH 1/3] feat: enhance user AI recommendations logic --- .claude/launch.json | 17 + .claude/settings.local.json | 9 + .../user-stats/get-user-ai-recommendations.ts | 507 +++++++++++++----- .../src/infra/http/schemas/user-stats.ts | 1 + apps/web/next-env.d.ts | 2 +- 5 files changed, 388 insertions(+), 148 deletions(-) create mode 100644 .claude/launch.json create mode 100644 .claude/settings.local.json diff --git a/.claude/launch.json b/.claude/launch.json new file mode 100644 index 000000000..21f97839e --- /dev/null +++ b/.claude/launch.json @@ -0,0 +1,17 @@ +{ + "version": "0.0.1", + "configurations": [ + { + "name": "web", + "runtimeExecutable": "pnpm", + "runtimeArgs": ["--filter", "web", "run", "dev"], + "port": 3000 + }, + { + "name": "backend", + "runtimeExecutable": "sh", + "runtimeArgs": ["-c", "docker compose -f apps/backend/docker-compose.yml up -d --scale app=0 && pnpm --filter plotwist-api run dev"], + "port": 3333 + } + ] +} diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..7a36170e8 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(gh pr:*)", + "Bash(gh api:*)", + "Bash(git add:*)" + ] + } +} diff --git a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts index 82edc9b66..511ec3b5d 100644 --- a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts +++ b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts @@ -16,12 +16,21 @@ type Input = { } const MIN_VOTE_COUNT = 3000 +const COLD_START_THRESHOLD = 5 + +type Candidate = { + tmdbId: number + title: string + year?: number + mediaType: 'movie' | 'tv' +} type Rec = { title: string reason: string mediaType: 'movie' | 'tv' year?: number + tmdbId?: number } type SearchHit = { @@ -40,64 +49,137 @@ function hitTitle(h: SearchHit): string { return (h.title ?? h.name ?? '').trim() } -async function filterOutWatched( - recs: Rec[], - watchedSet: Set, +const languageMap: Record = { + 'en-US': 'Respond in English.', + 'pt-BR': 'Responda em português brasileiro.', + 'es-ES': 'Responde en español.', + 'fr-FR': 'Réponds en français.', + 'de-DE': 'Antworte auf Deutsch.', + 'it-IT': 'Rispondi in italiano.', + 'ja-JP': '日本語で回答してください。', +} + +async function buildCandidatePool( + seeds: Array<{ tmdb_id: number; media_type: string }>, + exclusionSet: Set, language: Language -): Promise { - const kept: Rec[] = [] - for (const rec of recs) { - try { - const search = await tmdb.search.multi(rec.title, language) - const results = ((search as { results?: SearchHit[] }).results ?? - []) as SearchHit[] - const mediaType = rec.mediaType === 'tv' ? 'tv' : 'movie' - const candidates = results.filter( - (r: SearchHit) => - (r.media_type === 'movie' || r.media_type === 'tv') && - r.media_type === mediaType - ) - if (candidates.length === 0) { - console.log('[ai-recommendations] filterOutWatched: no TMDB match', { - title: rec.title, - mediaType, - }) - continue - } - const recNorm = normalizeTitle(rec.title) - const byTitleMatch = candidates.filter( - (r) => normalizeTitle(hitTitle(r)) === recNorm - ) - const pool = byTitleMatch.length > 0 ? byTitleMatch : candidates - const match = pool.reduce((best, r) => - (r.vote_count ?? 0) > (best.vote_count ?? 0) ? r : best - ) - if (watchedSet.has(`${match.id}-${mediaType}`)) { - console.log('[ai-recommendations] filterOutWatched: already watched', { - title: rec.title, - tmdbId: match.id, - }) - continue - } - const votes = match.vote_count ?? 0 - if (votes < MIN_VOTE_COUNT) { - console.log('[ai-recommendations] filterOutWatched: low votes', { - title: rec.title, - votes, - min: MIN_VOTE_COUNT, - }) - continue +): Promise { + const allCandidates: Candidate[] = [] + + await Promise.all( + seeds.map(async seed => { + try { + const isMovie = seed.media_type === 'MOVIE' + + if (isMovie) { + const related = await tmdb.movies.related( + seed.tmdb_id, + 'recommendations', + language + ) + for (const r of related.results ?? []) { + if ( + !exclusionSet.has(`${r.id}-movie`) && + (r.vote_count ?? 0) >= MIN_VOTE_COUNT + ) { + allCandidates.push({ + tmdbId: r.id, + title: r.title, + year: r.release_date + ? Number.parseInt(r.release_date.split('-')[0]) + : undefined, + mediaType: 'movie', + }) + } + } + } else { + const related = await tmdb.tv.related( + seed.tmdb_id, + 'recommendations', + language + ) + for (const r of related.results ?? []) { + if ( + !exclusionSet.has(`${r.id}-tv`) && + (r.vote_count ?? 0) >= MIN_VOTE_COUNT + ) { + allCandidates.push({ + tmdbId: r.id, + title: r.name, + year: r.first_air_date + ? Number.parseInt(r.first_air_date.split('-')[0]) + : undefined, + mediaType: 'tv', + }) + } + } + } + } catch { + // Ignore individual TMDB errors — other seeds will still contribute } - kept.push(rec) - } catch (e) { - console.log( - '[ai-recommendations] filterOutWatched: error for', - rec.title, - e instanceof Error ? e.message : e - ) + }) + ) + + // Deduplicate by tmdbId-mediaType + const seen = new Set() + return allCandidates.filter(c => { + const key = `${c.tmdbId}-${c.mediaType}` + if (seen.has(key)) return false + seen.add(key) + return true + }) +} + +async function resolveTmdbId( + rec: Omit, + exclusionSet: Set, + language: Language +): Promise { + try { + const search = await tmdb.search.multi(rec.title, language) + const results = ((search as { results?: SearchHit[] }).results ?? + []) as SearchHit[] + const mediaType = rec.mediaType + const candidates = results.filter( + r => + (r.media_type === 'movie' || r.media_type === 'tv') && + r.media_type === mediaType + ) + if (candidates.length === 0) return null + + const recNorm = normalizeTitle(rec.title) + const byTitleMatch = candidates.filter( + r => normalizeTitle(hitTitle(r)) === recNorm + ) + const pool = byTitleMatch.length > 0 ? byTitleMatch : candidates + const match = pool.reduce((best, r) => + (r.vote_count ?? 0) > (best.vote_count ?? 0) ? r : best + ) + + if (exclusionSet.has(`${match.id}-${mediaType}`)) return null + if ((match.vote_count ?? 0) < MIN_VOTE_COUNT) return null + + return { ...rec, tmdbId: match.id } + } catch { + return null + } +} + +async function fetchItemTitle( + tmdbId: number, + mediaType: string, + language: Language +): Promise { + try { + if (mediaType === 'MOVIE') { + const d = await tmdb.movies.details(tmdbId, language) + return (d as { title?: string }).title ?? null } + const d = await tmdb.tv.details(tmdbId, language) + return (d as { name?: string }).name ?? null + } catch { + return null } - return kept } export async function getUserAIRecommendationsService({ @@ -107,128 +189,259 @@ export async function getUserAIRecommendationsService({ period = 'all', dateRange, }: Input) { - const cacheKey = `user-stats:${userId}:ai-recommendations:v4:${language}:${period}` + const cacheKey = `user-stats:${userId}:ai-recommendations:v5:${language}:${period}` const cached = await redis.get(cacheKey) if (cached) return JSON.parse(cached) const itemDateFilter = dateRange?.startDate && dateRange?.endDate - ? sql` AND added_at >= ${dateRange.startDate.toISOString()} AND added_at <= ${dateRange.endDate.toISOString()}` - : dateRange?.startDate - ? sql` AND added_at >= ${dateRange.startDate.toISOString()}` - : sql`` - const reviewDateFilter = - dateRange?.startDate && dateRange?.endDate - ? sql` AND created_at >= ${dateRange.startDate.toISOString()} AND created_at <= ${dateRange.endDate.toISOString()}` + ? sql` AND ui.added_at >= ${dateRange.startDate.toISOString()} AND ui.added_at <= ${dateRange.endDate.toISOString()}` : dateRange?.startDate - ? sql` AND created_at >= ${dateRange.startDate.toISOString()}` + ? sql` AND ui.added_at >= ${dateRange.startDate.toISOString()}` : sql`` - const [rows, watchedRows] = await Promise.all([ - db.execute<{ movie_count: number; series_count: number }>(sql` - SELECT - COUNT(*) FILTER (WHERE media_type = 'MOVIE')::int as movie_count, - COUNT(*) FILTER (WHERE media_type = 'TV_SHOW')::int as series_count - FROM user_items - WHERE user_id = ${userId} AND status = 'WATCHED' ${itemDateFilter} - `), - db.execute<{ tmdb_id: number; media_type: string }>(sql` - SELECT tmdb_id, media_type - FROM user_items - WHERE user_id = ${userId} AND status = 'WATCHED' - `), - ]) - - const ratingRows = await db.execute(sql` - SELECT COALESCE(AVG(rating), 0)::numeric(3,1) as avg_rating, - COUNT(*)::int as total - FROM reviews WHERE user_id = ${userId} ${reviewDateFilter} + // Watched items with average rating (used for seeding and prompt context) + const watchedWithRatings = await db.execute<{ + tmdb_id: number + media_type: string + avg_rating: string | null + added_at: string + }>(sql` + SELECT ui.tmdb_id, ui.media_type, ui.added_at, + AVG(r.rating)::numeric(3,1)::text AS avg_rating + FROM user_items ui + LEFT JOIN reviews r + ON r.tmdb_id = ui.tmdb_id + AND r.user_id = ui.user_id + AND r.media_type = ui.media_type + WHERE ui.user_id = ${userId} AND ui.status = 'WATCHED' ${itemDateFilter} + GROUP BY ui.tmdb_id, ui.media_type, ui.added_at + ORDER BY avg_rating DESC NULLS LAST, ui.added_at DESC `) - const movieCount = Number(rows[0]?.movie_count || 0) - const seriesCount = Number(rows[0]?.series_count || 0) - const avgRating = Number(ratingRows[0]?.avg_rating || 0) + // Exclusion set: ALL statuses (WATCHED, WATCHING, DROPPED, WATCHLIST) + const allEngagedRows = await db.execute<{ + tmdb_id: number + media_type: string + }>(sql` + SELECT tmdb_id, media_type FROM user_items WHERE user_id = ${userId} + `) - const watchedSet = new Set( - (watchedRows ?? []).map( + const exclusionSet = new Set( + allEngagedRows.map( r => `${r.tmdb_id}-${r.media_type === 'TV_SHOW' ? 'tv' : 'movie'}` ) ) - const languageMap: Record = { - 'en-US': 'Respond in English.', - 'pt-BR': 'Responda em português brasileiro.', - 'es-ES': 'Responde en español.', - 'fr-FR': 'Réponds en français.', - 'de-DE': 'Antworte auf Deutsch.', - 'it-IT': 'Rispondi in italiano.', - 'ja-JP': '日本語で回答してください。', + const watchedCount = watchedWithRatings.length + const toRating = (v: string | null) => (v != null ? parseFloat(v) : null) + + // Seeds: items rated >= 3 (or unrated) — used to drive TMDB related API + const seeds = watchedWithRatings + .filter(r => { + const rating = toRating(r.avg_rating) + return rating === null || rating >= 3 + }) + .slice(0, 10) + + // Loved / disliked for prompt context + const lovedItems = watchedWithRatings + .filter(r => (toRating(r.avg_rating) ?? 0) >= 4) + .slice(0, 3) + + const dislikedItems = watchedWithRatings + .filter(r => { + const rating = toRating(r.avg_rating) + return rating !== null && rating <= 2 + }) + .slice(0, 3) + + const openai = new OpenAI({ apiKey: config.openai.OPENAI_API_KEY }) + const systemPrompt = + 'You are a personal film & TV curator. Given candidate titles and a user taste profile, select the best matches. Respond ONLY with valid JSON, no markdown.' + + let recommendations: Rec[] = [] + + if (watchedCount >= COLD_START_THRESHOLD) { + // Standard path: build candidate pool via TMDB + fetch titles for context + const [candidates, lovedTitles, dislikedTitles] = await Promise.all([ + buildCandidatePool(seeds, exclusionSet, language), + Promise.all( + lovedItems.map(async item => ({ + title: await fetchItemTitle(item.tmdb_id, item.media_type, language), + rating: toRating(item.avg_rating), + })) + ), + Promise.all( + dislikedItems.map(async item => ({ + title: await fetchItemTitle(item.tmdb_id, item.media_type, language), + rating: toRating(item.avg_rating), + })) + ), + ]) + + if (candidates.length < 3) { + // Not enough candidates (edge case: all related items already watched) + // Fall through to cold start path below by setting watchedCount signal + console.log( + '[ai-recommendations] candidate pool too small, falling back to cold start', + { candidates: candidates.length } + ) + } else { + const lovedLine = lovedTitles + .filter(r => r.title) + .map(r => `"${r.title}" (${r.rating}/5)`) + .join(', ') + + const dislikedLine = dislikedTitles + .filter(r => r.title) + .map(r => `"${r.title}" (${r.rating}/5)`) + .join(', ') + + const candidateList = candidates + .slice(0, 20) + .map(c => + JSON.stringify({ + title: c.title, + year: c.year, + mediaType: c.mediaType, + tmdbId: c.tmdbId, + }) + ) + .join('\n') + + const prompt = `User taste profile: +- Watched: ${watchedCount} titles total +${lovedLine ? `- Loved: ${lovedLine}` : ''} +${dislikedLine ? `- Disliked (avoid similar): ${dislikedLine}` : ''} + +Candidate titles — pick the 5 best matches for this user: +${candidateList} + +${dislikedLine ? 'Do NOT recommend anything tonally or stylistically similar to the disliked titles.' : ''} +${languageMap[language] || languageMap['en-US']} + +Return ONLY a valid JSON array with exactly 5 objects: +[{"title":"exact title from list","reason":"1-sentence reason in user's language","mediaType":"movie or tv","year":2020,"tmdbId":12345}]` + + try { + const completion = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: prompt }, + ], + temperature: 0.5, + max_tokens: 600, + }) + + const raw = completion.choices[0]?.message?.content?.trim() || '[]' + const parsed: Rec[] = JSON.parse(raw) + + console.log('[ai-recommendations] OpenAI raw count', parsed.length, { + candidates: candidates.length, + lovedCount: lovedTitles.filter(r => r.title).length, + dislikedCount: dislikedTitles.filter(r => r.title).length, + }) + + // Match back to candidate pool for reliable tmdbId + const candidateMap = new Map( + candidates.map(c => [normalizeTitle(c.title), c]) + ) + recommendations = parsed + .map(rec => { + const mediaType = rec.mediaType === 'tv' ? 'tv' : 'movie' + const poolMatch = candidateMap.get(normalizeTitle(rec.title)) + const tmdbId = poolMatch?.tmdbId ?? rec.tmdbId + return { ...rec, mediaType, tmdbId } as Rec + }) + .filter( + rec => + rec.tmdbId && + !exclusionSet.has(`${rec.tmdbId}-${rec.mediaType}`) + ) + } catch (err) { + console.error( + '[ai-recommendations] OpenAI error (standard):', + err instanceof Error ? err.message : err + ) + recommendations = [] + } + } } - const prompt = `Based on this viewer profile, recommend exactly 3 popular, well-known titles they'd love. ${languageMap[language] || languageMap['en-US']} + // Cold start path: fewer than threshold watched items OR candidate pool too small + if (recommendations.length === 0) { + const movieCount = watchedWithRatings.filter( + r => r.media_type === 'MOVIE' + ).length + const seriesCount = watchedWithRatings.filter( + r => r.media_type === 'TV_SHOW' + ).length -CRITICAL: Recommend ONLY mainstream, widely known titles: big releases, award winners, or titles with broad appeal (thousands of votes on TMDB). Do NOT suggest: hidden gems, underrated films, niche titles, obscure films, shorts, or little-known releases. Do NOT recommend any title the user has already watched. + const prompt = `Based on this viewer profile, recommend exactly 5 popular, well-known titles. ${languageMap[language] || languageMap['en-US']} + +CRITICAL: Only mainstream titles with thousands of TMDB votes. No hidden gems or obscure titles. Profile: -- Watched ${movieCount} movies and ${seriesCount} series -- Average rating: ${avgRating}/5 +- Watched: ${movieCount} movies, ${seriesCount} series - Preference: ${movieCount > seriesCount * 1.5 ? 'Strong movie lover' : seriesCount > movieCount * 1.5 ? 'Series binge-watcher' : 'Balanced viewer'} -Return ONLY valid JSON array with exactly 5 objects (popular titles only; obscure ones will be filtered out), no markdown: -[{"title":"Exact English title as on TMDB","reason":"Short 1-sentence reason in the user's language","mediaType":"movie or tv","year":2020}]` +Return ONLY valid JSON: +[{"title":"Exact English title as on TMDB","reason":"Short reason in user's language","mediaType":"movie or tv","year":2020}]` - let recommendations: Array<{ - title: string - reason: string - mediaType: 'movie' | 'tv' - year?: number - }> = [] + try { + const completion = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: prompt }, + ], + temperature: 0.6, + max_tokens: 400, + }) - try { - const openai = new OpenAI({ apiKey: config.openai.OPENAI_API_KEY }) - const completion = await openai.chat.completions.create({ - model: 'gpt-4o-mini', - messages: [ - { - role: 'system', - content: - 'You are a film curator. Recommend only popular, mainstream titles (high visibility, many votes on TMDB). Do not suggest hidden gems or obscure titles. Always return valid JSON.', - }, - { role: 'user', content: prompt }, - ], - temperature: 0.6, - max_tokens: 400, - }) + const raw = completion.choices[0]?.message?.content?.trim() || '[]' + const parsed: Array> = JSON.parse(raw) - const raw = completion.choices[0]?.message?.content?.trim() || '[]' - recommendations = JSON.parse(raw) - console.log( - '[ai-recommendations] OpenAI raw count', - recommendations.length, - 'titles:', - recommendations.map(r => r.title) - ) - } catch (err) { - console.error( - '[ai-recommendations] OpenAI error:', - err instanceof Error ? err.message : err - ) - recommendations = [] + console.log( + '[ai-recommendations] cold start raw count', + parsed.length, + parsed.map(r => r.title) + ) + + const resolved = await Promise.all( + parsed.map(rec => + resolveTmdbId( + { + ...rec, + mediaType: rec.mediaType === 'tv' ? 'tv' : 'movie', + }, + exclusionSet, + language + ) + ) + ) + + recommendations = resolved.filter((r): r is Rec => r !== null) + } catch (err) { + console.error( + '[ai-recommendations] OpenAI error (cold start):', + err instanceof Error ? err.message : err + ) + recommendations = [] + } } - const filtered = await filterOutWatched(recommendations, watchedSet, language) - const result = { recommendations: filtered.slice(0, 3) } - console.log( - '[ai-recommendations] after filter', - filtered.length, - 'returning', - result.recommendations.length, - 'titles:', - result.recommendations.map(r => r.title) - ) + const result = { recommendations: recommendations.slice(0, 3) } + + console.log('[ai-recommendations] returning', result.recommendations.length, { + titles: result.recommendations.map(r => r.title), + hasTmdbIds: result.recommendations.every(r => r.tmdbId != null), + }) - if (filtered.length > 0) { + if (result.recommendations.length > 0) { await redis.set(cacheKey, JSON.stringify(result), 'EX', 60 * 60 * 24 * 7) } diff --git a/apps/backend/src/infra/http/schemas/user-stats.ts b/apps/backend/src/infra/http/schemas/user-stats.ts index b2cf5150a..6283a26bc 100644 --- a/apps/backend/src/infra/http/schemas/user-stats.ts +++ b/apps/backend/src/infra/http/schemas/user-stats.ts @@ -177,6 +177,7 @@ export const getUserAIRecommendationsResponseSchema = { reason: z.string(), mediaType: z.string(), year: z.number().optional(), + tmdbId: z.number().optional(), }) ), }), diff --git a/apps/web/next-env.d.ts b/apps/web/next-env.d.ts index 9edff1c7c..c4b7818fb 100644 --- a/apps/web/next-env.d.ts +++ b/apps/web/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -import "./.next/types/routes.d.ts"; +import "./.next/dev/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. From d02f4d067b50ed88ab4d2a668bd87f6440cda5b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Alves?= Date: Tue, 24 Mar 2026 17:28:41 -0300 Subject: [PATCH 2/3] feat: improve AI recommendation system and database interactions --- .../user-stats/get-user-ai-recommendations.ts | 469 ++++++++++-------- apps/backend/src/infra/adapters/open-ai.ts | 20 + .../db/repositories/user-item-repository.ts | 44 ++ apps/backend/src/infra/ports/ai-service.ts | 6 + 4 files changed, 333 insertions(+), 206 deletions(-) diff --git a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts index 511ec3b5d..e7285cdfa 100644 --- a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts +++ b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts @@ -1,10 +1,12 @@ import type { FastifyRedis } from '@fastify/redis' import type { Language } from '@plotwist_app/tmdb' -import { sql } from 'drizzle-orm' -import OpenAI from 'openai' -import { config } from '@/config' import { tmdb } from '@/infra/adapters/tmdb' -import { db } from '@/infra/db' +import { + selectAllUserItems, + selectWatchedItemsWithAvgRating, +} from '@/infra/db/repositories/user-item-repository' +import { selectUserPreferences } from '@/infra/db/repositories/user-preferences' +import { createAIService } from '@/infra/factories/ai-provider-factory' import type { StatsPeriod } from '@/infra/http/schemas/common' type Input = { @@ -18,6 +20,9 @@ type Input = { const MIN_VOTE_COUNT = 3000 const COLD_START_THRESHOLD = 5 +const SYSTEM_PROMPT = + 'You are a personal film & TV curator. Given candidate titles and a user taste profile, select the best matches. Respond ONLY with valid JSON, no markdown.' + type Candidate = { tmdbId: number title: string @@ -33,6 +38,13 @@ type Rec = { tmdbId?: number } +type WatchedRow = { + tmdbId: number + mediaType: string + avgRating: string | null + addedAt: Date +} + type SearchHit = { id: number media_type?: string @@ -41,15 +53,33 @@ type SearchHit = { name?: string } -function normalizeTitle(s: string): string { - return s.trim().toLowerCase() -} - -function hitTitle(h: SearchHit): string { - return (h.title ?? h.name ?? '').trim() +const TMDB_GENRE_MAP: Record = { + 28: 'Action', + 12: 'Adventure', + 16: 'Animation', + 35: 'Comedy', + 80: 'Crime', + 99: 'Documentary', + 18: 'Drama', + 10751: 'Family', + 14: 'Fantasy', + 36: 'History', + 27: 'Horror', + 10402: 'Music', + 9648: 'Mystery', + 10749: 'Romance', + 878: 'Science Fiction', + 53: 'Thriller', + 10752: 'War', + 37: 'Western', + 10759: 'Action & Adventure', + 10762: 'Kids', + 10765: 'Sci-Fi & Fantasy', + 10766: 'Soap', + 10768: 'War & Politics', } -const languageMap: Record = { +const LANGUAGE_INSTRUCTION: Record = { 'en-US': 'Respond in English.', 'pt-BR': 'Responda em português brasileiro.', 'es-ES': 'Responde en español.', @@ -59,70 +89,82 @@ const languageMap: Record = { 'ja-JP': '日本語で回答してください。', } -async function buildCandidatePool( - seeds: Array<{ tmdb_id: number; media_type: string }>, +// --- Data helpers --- + +function toRating(v: string | null): number | null { + return v != null ? parseFloat(v) : null +} + +function normalizeTitle(s: string): string { + return s.trim().toLowerCase() +} + +function hitTitle(h: SearchHit): string { + return (h.title ?? h.name ?? '').trim() +} + +function buildExclusionKey(tmdbId: number, mediaType: string): string { + return `${tmdbId}-${mediaType === 'TV_SHOW' ? 'tv' : 'movie'}` +} + +function isGoodSeed(row: WatchedRow): boolean { + const rating = toRating(row.avgRating) + return rating === null || rating >= 3 +} + +// --- TMDB helpers --- + +async function fetchMovieCandidates( + seed: WatchedRow, exclusionSet: Set, language: Language ): Promise { - const allCandidates: Candidate[] = [] + const related = await tmdb.movies.related(seed.tmdbId, 'recommendations', language) + return (related.results ?? []) + .filter(r => !exclusionSet.has(`${r.id}-movie`) && (r.vote_count ?? 0) >= MIN_VOTE_COUNT) + .map(r => ({ + tmdbId: r.id, + title: r.title, + year: r.release_date ? Number.parseInt(r.release_date.split('-')[0], 10) : undefined, + mediaType: 'movie' as const, + })) +} + +async function fetchTvCandidates( + seed: WatchedRow, + exclusionSet: Set, + language: Language +): Promise { + const related = await tmdb.tv.related(seed.tmdbId, 'recommendations', language) + return (related.results ?? []) + .filter(r => !exclusionSet.has(`${r.id}-tv`) && (r.vote_count ?? 0) >= MIN_VOTE_COUNT) + .map(r => ({ + tmdbId: r.id, + title: r.name, + year: r.first_air_date ? Number.parseInt(r.first_air_date.split('-')[0], 10) : undefined, + mediaType: 'tv' as const, + })) +} - await Promise.all( +async function buildCandidatePool( + seeds: WatchedRow[], + exclusionSet: Set, + language: Language +): Promise { + const results = await Promise.all( seeds.map(async seed => { try { - const isMovie = seed.media_type === 'MOVIE' - - if (isMovie) { - const related = await tmdb.movies.related( - seed.tmdb_id, - 'recommendations', - language - ) - for (const r of related.results ?? []) { - if ( - !exclusionSet.has(`${r.id}-movie`) && - (r.vote_count ?? 0) >= MIN_VOTE_COUNT - ) { - allCandidates.push({ - tmdbId: r.id, - title: r.title, - year: r.release_date - ? Number.parseInt(r.release_date.split('-')[0]) - : undefined, - mediaType: 'movie', - }) - } - } - } else { - const related = await tmdb.tv.related( - seed.tmdb_id, - 'recommendations', - language - ) - for (const r of related.results ?? []) { - if ( - !exclusionSet.has(`${r.id}-tv`) && - (r.vote_count ?? 0) >= MIN_VOTE_COUNT - ) { - allCandidates.push({ - tmdbId: r.id, - title: r.name, - year: r.first_air_date - ? Number.parseInt(r.first_air_date.split('-')[0]) - : undefined, - mediaType: 'tv', - }) - } - } - } + return seed.mediaType === 'MOVIE' + ? fetchMovieCandidates(seed, exclusionSet, language) + : fetchTvCandidates(seed, exclusionSet, language) } catch { - // Ignore individual TMDB errors — other seeds will still contribute + return [] } }) ) - // Deduplicate by tmdbId-mediaType const seen = new Set() - return allCandidates.filter(c => { + return results.flat().filter(c => { const key = `${c.tmdbId}-${c.mediaType}` if (seen.has(key)) return false seen.add(key) @@ -130,6 +172,23 @@ async function buildCandidatePool( }) } +async function fetchItemTitle( + tmdbId: number, + mediaType: string, + language: Language +): Promise { + try { + if (mediaType === 'MOVIE' || mediaType === 'movie') { + const d = await tmdb.movies.details(tmdbId, language) + return (d as { title?: string }).title ?? null + } + const d = await tmdb.tv.details(tmdbId, language) + return (d as { name?: string }).name ?? null + } catch { + return null + } +} + async function resolveTmdbId( rec: Omit, exclusionSet: Set, @@ -139,7 +198,8 @@ async function resolveTmdbId( const search = await tmdb.search.multi(rec.title, language) const results = ((search as { results?: SearchHit[] }).results ?? []) as SearchHit[] - const mediaType = rec.mediaType + const { mediaType } = rec + const candidates = results.filter( r => (r.media_type === 'movie' || r.media_type === 'tv') && @@ -165,23 +225,70 @@ async function resolveTmdbId( } } -async function fetchItemTitle( - tmdbId: number, - mediaType: string, +// --- Prompt builders --- + +function buildStandardPrompt(params: { + watchedCount: number + preferredGenres: string + lovedLine: string + dislikedLine: string + candidateList: string language: Language -): Promise { - try { - if (mediaType === 'MOVIE') { - const d = await tmdb.movies.details(tmdbId, language) - return (d as { title?: string }).title ?? null - } - const d = await tmdb.tv.details(tmdbId, language) - return (d as { name?: string }).name ?? null - } catch { - return null - } +}): string { + const { + watchedCount, + preferredGenres, + lovedLine, + dislikedLine, + candidateList, + language, + } = params + + return `User taste profile: +- Watched: ${watchedCount} titles total +${preferredGenres ? `- Preferred genres: ${preferredGenres}` : ''} +${lovedLine ? `- Loved: ${lovedLine}` : ''} +${dislikedLine ? `- Disliked (avoid similar): ${dislikedLine}` : ''} + +Candidate titles — pick the 5 best matches for this user: +${candidateList} + +${dislikedLine ? 'Do NOT recommend anything tonally or stylistically similar to the disliked titles.' : ''} +${LANGUAGE_INSTRUCTION[language] || LANGUAGE_INSTRUCTION['en-US']} + +Return ONLY a valid JSON array with exactly 5 objects: +[{"title":"exact title from list","reason":"1-sentence reason in user's language","mediaType":"movie or tv","year":2020,"tmdbId":12345}]` } +function buildColdStartPrompt(params: { + movieCount: number + seriesCount: number + preferredGenres: string + language: Language +}): string { + const { movieCount, seriesCount, preferredGenres, language } = params + const preference = + movieCount > seriesCount * 1.5 + ? 'Strong movie lover' + : seriesCount > movieCount * 1.5 + ? 'Series binge-watcher' + : 'Balanced viewer' + + return `Based on this viewer profile, recommend exactly 5 popular, well-known titles. ${LANGUAGE_INSTRUCTION[language] || LANGUAGE_INSTRUCTION['en-US']} + +CRITICAL: Only mainstream titles with thousands of TMDB votes. No hidden gems or obscure titles. + +Profile: +- Watched: ${movieCount} movies, ${seriesCount} series +- Preference: ${preference} +${preferredGenres ? `- Preferred genres: ${preferredGenres}` : ''} + +Return ONLY valid JSON: +[{"title":"Exact English title as on TMDB","reason":"Short reason in user's language","mediaType":"movie or tv","year":2020}]` +} + +// --- Main service --- + export async function getUserAIRecommendationsService({ userId, redis, @@ -194,99 +301,72 @@ export async function getUserAIRecommendationsService({ const cached = await redis.get(cacheKey) if (cached) return JSON.parse(cached) - const itemDateFilter = - dateRange?.startDate && dateRange?.endDate - ? sql` AND ui.added_at >= ${dateRange.startDate.toISOString()} AND ui.added_at <= ${dateRange.endDate.toISOString()}` - : dateRange?.startDate - ? sql` AND ui.added_at >= ${dateRange.startDate.toISOString()}` - : sql`` - - // Watched items with average rating (used for seeding and prompt context) - const watchedWithRatings = await db.execute<{ - tmdb_id: number - media_type: string - avg_rating: string | null - added_at: string - }>(sql` - SELECT ui.tmdb_id, ui.media_type, ui.added_at, - AVG(r.rating)::numeric(3,1)::text AS avg_rating - FROM user_items ui - LEFT JOIN reviews r - ON r.tmdb_id = ui.tmdb_id - AND r.user_id = ui.user_id - AND r.media_type = ui.media_type - WHERE ui.user_id = ${userId} AND ui.status = 'WATCHED' ${itemDateFilter} - GROUP BY ui.tmdb_id, ui.media_type, ui.added_at - ORDER BY avg_rating DESC NULLS LAST, ui.added_at DESC - `) - - // Exclusion set: ALL statuses (WATCHED, WATCHING, DROPPED, WATCHLIST) - const allEngagedRows = await db.execute<{ - tmdb_id: number - media_type: string - }>(sql` - SELECT tmdb_id, media_type FROM user_items WHERE user_id = ${userId} - `) + const [watchedWithRatings, allEngagedRows, prefs] = await Promise.all([ + selectWatchedItemsWithAvgRating( + userId, + dateRange?.startDate, + dateRange?.endDate + ) as Promise, + selectAllUserItems(userId), + selectUserPreferences(userId), + ]) const exclusionSet = new Set( - allEngagedRows.map( - r => `${r.tmdb_id}-${r.media_type === 'TV_SHOW' ? 'tv' : 'movie'}` - ) + allEngagedRows.map(r => buildExclusionKey(r.tmdbId, r.mediaType)) ) - const watchedCount = watchedWithRatings.length - const toRating = (v: string | null) => (v != null ? parseFloat(v) : null) - - // Seeds: items rated >= 3 (or unrated) — used to drive TMDB related API - const seeds = watchedWithRatings - .filter(r => { - const rating = toRating(r.avg_rating) - return rating === null || rating >= 3 - }) - .slice(0, 10) - - // Loved / disliked for prompt context - const lovedItems = watchedWithRatings - .filter(r => (toRating(r.avg_rating) ?? 0) >= 4) - .slice(0, 3) - - const dislikedItems = watchedWithRatings - .filter(r => { - const rating = toRating(r.avg_rating) - return rating !== null && rating <= 2 - }) - .slice(0, 3) - - const openai = new OpenAI({ apiKey: config.openai.OPENAI_API_KEY }) - const systemPrompt = - 'You are a personal film & TV curator. Given candidate titles and a user taste profile, select the best matches. Respond ONLY with valid JSON, no markdown.' + const preferredGenres = (prefs[0]?.genreIds ?? []) + .map(id => TMDB_GENRE_MAP[id]) + .filter(Boolean) + .join(', ') + const watchedCount = watchedWithRatings.length + const aiService = createAIService('openAI') let recommendations: Rec[] = [] if (watchedCount >= COLD_START_THRESHOLD) { - // Standard path: build candidate pool via TMDB + fetch titles for context + const seeds = [ + ...watchedWithRatings + .filter(r => r.mediaType === 'MOVIE' && isGoodSeed(r)) + .slice(0, 5), + ...watchedWithRatings + .filter(r => r.mediaType === 'TV_SHOW' && isGoodSeed(r)) + .slice(0, 5), + ] + + const lovedItems = watchedWithRatings + .filter(r => (toRating(r.avgRating) ?? 0) >= 4) + .slice(0, 3) + + const dislikedItems = watchedWithRatings + .filter(r => { + const rating = toRating(r.avgRating) + return rating !== null && rating <= 2 + }) + .slice(0, 3) + const [candidates, lovedTitles, dislikedTitles] = await Promise.all([ buildCandidatePool(seeds, exclusionSet, language), Promise.all( lovedItems.map(async item => ({ - title: await fetchItemTitle(item.tmdb_id, item.media_type, language), - rating: toRating(item.avg_rating), + title: await fetchItemTitle(item.tmdbId, item.mediaType, language), + rating: toRating(item.avgRating), })) ), Promise.all( dislikedItems.map(async item => ({ - title: await fetchItemTitle(item.tmdb_id, item.media_type, language), - rating: toRating(item.avg_rating), + title: await fetchItemTitle(item.tmdbId, item.mediaType, language), + rating: toRating(item.avgRating), })) ), ]) if (candidates.length < 3) { - // Not enough candidates (edge case: all related items already watched) - // Fall through to cold start path below by setting watchedCount signal console.log( '[ai-recommendations] candidate pool too small, falling back to cold start', - { candidates: candidates.length } + { + candidates: candidates.length, + } ) } else { const lovedLine = lovedTitles @@ -311,41 +391,33 @@ export async function getUserAIRecommendationsService({ ) .join('\n') - const prompt = `User taste profile: -- Watched: ${watchedCount} titles total -${lovedLine ? `- Loved: ${lovedLine}` : ''} -${dislikedLine ? `- Disliked (avoid similar): ${dislikedLine}` : ''} - -Candidate titles — pick the 5 best matches for this user: -${candidateList} - -${dislikedLine ? 'Do NOT recommend anything tonally or stylistically similar to the disliked titles.' : ''} -${languageMap[language] || languageMap['en-US']} - -Return ONLY a valid JSON array with exactly 5 objects: -[{"title":"exact title from list","reason":"1-sentence reason in user's language","mediaType":"movie or tv","year":2020,"tmdbId":12345}]` - try { - const completion = await openai.chat.completions.create({ - model: 'gpt-4o-mini', - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: prompt }, - ], + const raw = await aiService.generateJSON({ + system: SYSTEM_PROMPT, + user: buildStandardPrompt({ + watchedCount, + preferredGenres, + lovedLine, + dislikedLine, + candidateList, + language, + }), temperature: 0.5, - max_tokens: 600, + maxTokens: 600, }) - const raw = completion.choices[0]?.message?.content?.trim() || '[]' const parsed: Rec[] = JSON.parse(raw) - console.log('[ai-recommendations] OpenAI raw count', parsed.length, { - candidates: candidates.length, - lovedCount: lovedTitles.filter(r => r.title).length, - dislikedCount: dislikedTitles.filter(r => r.title).length, - }) + console.log( + '[ai-recommendations] standard path raw count', + parsed.length, + { + candidates: candidates.length, + lovedCount: lovedTitles.filter(r => r.title).length, + dislikedCount: dislikedTitles.filter(r => r.title).length, + } + ) - // Match back to candidate pool for reliable tmdbId const candidateMap = new Map( candidates.map(c => [normalizeTitle(c.title), c]) ) @@ -358,12 +430,11 @@ Return ONLY a valid JSON array with exactly 5 objects: }) .filter( rec => - rec.tmdbId && - !exclusionSet.has(`${rec.tmdbId}-${rec.mediaType}`) + rec.tmdbId && !exclusionSet.has(`${rec.tmdbId}-${rec.mediaType}`) ) } catch (err) { console.error( - '[ai-recommendations] OpenAI error (standard):', + '[ai-recommendations] error (standard):', err instanceof Error ? err.message : err ) recommendations = [] @@ -371,38 +442,27 @@ Return ONLY a valid JSON array with exactly 5 objects: } } - // Cold start path: fewer than threshold watched items OR candidate pool too small if (recommendations.length === 0) { const movieCount = watchedWithRatings.filter( - r => r.media_type === 'MOVIE' + r => r.mediaType === 'MOVIE' ).length const seriesCount = watchedWithRatings.filter( - r => r.media_type === 'TV_SHOW' + r => r.mediaType === 'TV_SHOW' ).length - const prompt = `Based on this viewer profile, recommend exactly 5 popular, well-known titles. ${languageMap[language] || languageMap['en-US']} - -CRITICAL: Only mainstream titles with thousands of TMDB votes. No hidden gems or obscure titles. - -Profile: -- Watched: ${movieCount} movies, ${seriesCount} series -- Preference: ${movieCount > seriesCount * 1.5 ? 'Strong movie lover' : seriesCount > movieCount * 1.5 ? 'Series binge-watcher' : 'Balanced viewer'} - -Return ONLY valid JSON: -[{"title":"Exact English title as on TMDB","reason":"Short reason in user's language","mediaType":"movie or tv","year":2020}]` - try { - const completion = await openai.chat.completions.create({ - model: 'gpt-4o-mini', - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: prompt }, - ], + const raw = await aiService.generateJSON({ + system: SYSTEM_PROMPT, + user: buildColdStartPrompt({ + movieCount, + seriesCount, + preferredGenres, + language, + }), temperature: 0.6, - max_tokens: 400, + maxTokens: 400, }) - const raw = completion.choices[0]?.message?.content?.trim() || '[]' const parsed: Array> = JSON.parse(raw) console.log( @@ -414,10 +474,7 @@ Return ONLY valid JSON: const resolved = await Promise.all( parsed.map(rec => resolveTmdbId( - { - ...rec, - mediaType: rec.mediaType === 'tv' ? 'tv' : 'movie', - }, + { ...rec, mediaType: rec.mediaType === 'tv' ? 'tv' : 'movie' }, exclusionSet, language ) @@ -427,7 +484,7 @@ Return ONLY valid JSON: recommendations = resolved.filter((r): r is Rec => r !== null) } catch (err) { console.error( - '[ai-recommendations] OpenAI error (cold start):', + '[ai-recommendations] error (cold start):', err instanceof Error ? err.message : err ) recommendations = [] diff --git a/apps/backend/src/infra/adapters/open-ai.ts b/apps/backend/src/infra/adapters/open-ai.ts index 45f44ac56..70c3b2677 100644 --- a/apps/backend/src/infra/adapters/open-ai.ts +++ b/apps/backend/src/infra/adapters/open-ai.ts @@ -26,8 +26,28 @@ async function generateMessage(prompt: string, content: string) { return response.choices[0].message.content || '' } +async function generateJSON(params: { + system: string + user: string + temperature?: number + maxTokens?: number +}) { + const response = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [ + { role: 'system', content: params.system }, + { role: 'user', content: params.user }, + ], + temperature: params.temperature ?? 0.5, + max_tokens: params.maxTokens ?? 600, + }) + + return response.choices[0].message.content?.trim() || '[]' +} + const OpenAIService: AIService = { generateMessage: (prefix, content) => generateMessage(prefix, content), + generateJSON: params => generateJSON(params), } export { OpenAIService } diff --git a/apps/backend/src/infra/db/repositories/user-item-repository.ts b/apps/backend/src/infra/db/repositories/user-item-repository.ts index 76c9eb857..70da90c37 100644 --- a/apps/backend/src/infra/db/repositories/user-item-repository.ts +++ b/apps/backend/src/infra/db/repositories/user-item-repository.ts @@ -225,6 +225,50 @@ export async function selectAllUserItems(userId: string) { .where(eq(schema.userItems.userId, userId)) } +export async function selectWatchedItemsWithAvgRating( + userId: string, + startDate?: Date, + endDate?: Date +) { + const whereConditions = [ + eq(schema.userItems.userId, userId), + eq(schema.userItems.status, 'WATCHED'), + ] + + if (startDate && endDate) { + whereConditions.push(between(schema.userItems.addedAt, startDate, endDate)) + } else if (startDate) { + whereConditions.push(gte(schema.userItems.addedAt, startDate)) + } + + return db + .select({ + tmdbId: schema.userItems.tmdbId, + mediaType: schema.userItems.mediaType, + addedAt: schema.userItems.addedAt, + avgRating: sql`AVG(${schema.reviews.rating})::numeric(3,1)::text`, + }) + .from(schema.userItems) + .leftJoin( + schema.reviews, + and( + eq(schema.reviews.tmdbId, schema.userItems.tmdbId), + eq(schema.reviews.userId, schema.userItems.userId), + eq(schema.reviews.mediaType, schema.userItems.mediaType) + ) + ) + .where(and(...whereConditions)) + .groupBy( + schema.userItems.tmdbId, + schema.userItems.mediaType, + schema.userItems.addedAt + ) + .orderBy( + sql`AVG(${schema.reviews.rating}) DESC NULLS LAST`, + desc(schema.userItems.addedAt) + ) +} + export async function selectUserItemsCount(userId: string) { const result = await db .select({ diff --git a/apps/backend/src/infra/ports/ai-service.ts b/apps/backend/src/infra/ports/ai-service.ts index 864557ba1..d0bbe62dc 100644 --- a/apps/backend/src/infra/ports/ai-service.ts +++ b/apps/backend/src/infra/ports/ai-service.ts @@ -1,3 +1,9 @@ export interface AIService { generateMessage(prefix: string, content: string): Promise + generateJSON(params: { + system: string + user: string + temperature?: number + maxTokens?: number + }): Promise } From 0e433f9406053eb4357c42d96f8cdfb394a13609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Alves?= Date: Wed, 25 Mar 2026 11:26:33 -0300 Subject: [PATCH 3/3] feat: update AI recommendation logic and enhance configuration --- .claude/launch.json | 2 +- .claude/settings.local.json | 6 +- apps/backend/.env.example | 24 +- apps/backend/src/config.ts | 5 +- .../get-user-ai-recommendations.spec.ts | 291 ++++++++++++++++++ .../user-stats/get-user-ai-recommendations.ts | 151 +++++++-- .../services/user-stats/get-user-taste-dna.ts | 2 +- .../user-stats/get-user-viewer-profile.ts | 7 +- apps/backend/src/infra/adapters/open-ai.ts | 2 +- apps/backend/src/test/global-setup.ts | 1 + 10 files changed, 445 insertions(+), 46 deletions(-) create mode 100644 apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.spec.ts diff --git a/.claude/launch.json b/.claude/launch.json index 21f97839e..0d48481ba 100644 --- a/.claude/launch.json +++ b/.claude/launch.json @@ -10,7 +10,7 @@ { "name": "backend", "runtimeExecutable": "sh", - "runtimeArgs": ["-c", "docker compose -f apps/backend/docker-compose.yml up -d --scale app=0 && pnpm --filter plotwist-api run dev"], + "runtimeArgs": ["-c", "docker compose -f apps/backend/docker-compose.yml up -d && pnpm --filter plotwist-api run dev"], "port": 3333 } ] diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 7a36170e8..be519bd86 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,11 @@ "allow": [ "Bash(gh pr:*)", "Bash(gh api:*)", - "Bash(git add:*)" + "Bash(git add:*)", + "Bash(source ~/.zshrc)", + "Bash(npx @biomejs/biome check --changed)", + "Bash(npx @biomejs/biome check src/config.ts src/infra/adapters/open-ai.ts src/domain/services/user-stats/get-user-ai-recommendations.ts src/domain/services/user-stats/get-user-ai-recommendations.spec.ts)", + "Bash(pnpm run:*)" ] } } diff --git a/apps/backend/.env.example b/apps/backend/.env.example index c695348fe..165cf3fd0 100644 --- a/apps/backend/.env.example +++ b/apps/backend/.env.example @@ -3,17 +3,17 @@ APP_ENV=dev # General PORT=3333 BASE_URL=http://localhost:3333 -JWT_SECRET= +JWT_SECRET="banana" CLIENT_URL=http://localhost:3000 +RATE_LIMIT_MAX=100 +RATE_LIMIT_TIME_WINDOW_MS=60000 IOS_TOKEN= # Database -DATABASE_URL="postgresql://postgres:postgres@localhost:5432" +DATABASE_URL="postgresql://postgres:postgres@localhost:5432/plotwist_db" -# Stripe -STRIPE_SECRET_KEY="" -# Webhook signing secret (whsec_...) – use for constructEvent in production -STRIPE_WEBHOOK_SECRET="" +# Stripe +STRIPE_SECRET_KEY= # TMDB TMDB_ACCESS_TOKEN= @@ -31,6 +31,7 @@ CLOUDFLARE_PUBLIC_URL='https://cloudfront.net' # SQS AWS_REGION=sa-east-1 LOCALSTACK_ENDPOINT=http://localhost:4566 + AWS_ACCESS_KEY_ID=banana AWS_SECRET_ACCESS_KEY=banana @@ -42,18 +43,19 @@ IMPORT_SERIES_QUEUE=import-series-queue MAL_CLIENT_ID=banana # Feature Flags -ENABLE_IMPORT_MOVIES=true -ENABLE_IMPORT_SERIES=true -ENABLE_SQS=true +ENABLE_IMPORT_MOVIES=false +ENABLE_IMPORT_SERIES=false +ENABLE_SQS=false ENABLE_CRON_JOBS=false # OpenAI OPENAI_API_KEY= +RECOMMENDATION_AI_PROVIDER=openAI # Monitors ENABLE_MONITORS=true MONITOR_CRON_TIME="*/30 * * * *" # Telemetry -OTEL_EXPORTER_OTLP_ENDPOINT=localhost -OTEL_EXPORTER_OTLP_HEADERS= +OTEL_EXPORTER_OTLP_ENDPOINT="localhost" +OTEL_EXPORTER_OTLP_HEADERS="" diff --git a/apps/backend/src/config.ts b/apps/backend/src/config.ts index 238c26b68..c60247ea2 100644 --- a/apps/backend/src/config.ts +++ b/apps/backend/src/config.ts @@ -15,7 +15,7 @@ export const config = { sqsQueues: loadSQSQueues(), featureFlags: loadFeatureFlags(), myAnimeList: loadMALEnvs(), - openai: loadOpenAIEnvs(), + intelligence: loadAIEnvs(), google: loadGoogleEnvs(), monitors: loadMonitorsEnvs(), telemetry: loadTelemetryEnvs(), @@ -115,9 +115,10 @@ function loadMALEnvs() { return schema.parse(process.env) } -function loadOpenAIEnvs() { +function loadAIEnvs() { const schema = z.object({ OPENAI_API_KEY: z.string(), + RECOMMENDATION_AI_PROVIDER: z.enum(['openAI', 'llama']).default('openAI'), }) return schema.parse(process.env) diff --git a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.spec.ts b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.spec.ts new file mode 100644 index 000000000..3e27e3046 --- /dev/null +++ b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.spec.ts @@ -0,0 +1,291 @@ +import { beforeEach, describe, expect, it, type Mock, vi } from 'vitest' +import { tmdb } from '@/infra/adapters/tmdb' +import { createAIService } from '@/infra/factories/ai-provider-factory' +import { makeUser } from '@/test/factories/make-user' +import { makeUserItem } from '@/test/factories/make-user-item' +import { redisClient } from '@/test/mocks/redis' +import { getUserAIRecommendationsService } from './get-user-ai-recommendations' + +vi.mock('@/infra/factories/ai-provider-factory') + +vi.mock('@/infra/adapters/tmdb', () => ({ + tmdb: { + movies: { + related: vi.fn(), + details: vi.fn(), + }, + tv: { + related: vi.fn(), + details: vi.fn(), + }, + search: { + multi: vi.fn(), + }, + }, +})) + +const INCEPTION = { + id: 27205, + title: 'Inception', + release_date: '2010-07-16', + vote_count: 35_000, +} + +const BREAKING_BAD = { + id: 1396, + name: 'Breaking Bad', + first_air_date: '2008-01-20', + vote_count: 15_000, +} + +describe('get user ai recommendations', () => { + let generateJSONMock: Mock + + beforeEach(async () => { + await redisClient.flushall() + + generateJSONMock = vi.fn() + ;(createAIService as Mock).mockReturnValue({ + generateMessage: vi.fn(), + generateJSON: generateJSONMock, + }) + + ;(tmdb.movies.related as Mock).mockResolvedValue({ results: [INCEPTION] }) + ;(tmdb.tv.related as Mock).mockResolvedValue({ results: [BREAKING_BAD] }) + ;(tmdb.movies.details as Mock).mockResolvedValue({ title: INCEPTION.title }) + ;(tmdb.tv.details as Mock).mockResolvedValue({ name: BREAKING_BAD.name }) + ;(tmdb.search.multi as Mock).mockResolvedValue({ + results: [ + { + id: INCEPTION.id, + media_type: 'movie', + vote_count: INCEPTION.vote_count, + title: INCEPTION.title, + }, + ], + }) + }) + + describe('cold start (fewer than 5 watched items)', () => { + it('should return AI-generated recommendations resolved via TMDB search', async () => { + const user = await makeUser() + + generateJSONMock.mockResolvedValue( + JSON.stringify([ + { + title: INCEPTION.title, + reason: 'A mind-bending sci-fi thriller.', + mediaType: 'movie', + year: 2010, + }, + ]) + ) + + const result = await getUserAIRecommendationsService({ + userId: user.id, + redis: redisClient, + language: 'en-US', + }) + + expect(generateJSONMock).toHaveBeenCalledOnce() + expect(result.recommendations).toHaveLength(1) + expect(result.recommendations[0]).toMatchObject({ + title: INCEPTION.title, + mediaType: 'movie', + tmdbId: INCEPTION.id, + }) + }) + + it('should return empty recommendations when AI returns no resolvable titles', async () => { + const user = await makeUser() + + generateJSONMock.mockResolvedValue(JSON.stringify([])) + + const result = await getUserAIRecommendationsService({ + userId: user.id, + redis: redisClient, + language: 'en-US', + }) + + expect(result.recommendations).toHaveLength(0) + }) + }) + + describe('standard path (5 or more watched items)', () => { + it('should use TMDB candidate pool and AI curation', async () => { + const user = await makeUser() + + for (let i = 0; i < 5; i++) { + await makeUserItem({ + userId: user.id, + status: 'WATCHED', + mediaType: 'MOVIE', + }) + } + + generateJSONMock.mockResolvedValue( + JSON.stringify([ + { + title: INCEPTION.title, + reason: 'A classic.', + mediaType: 'movie', + year: 2010, + tmdbId: INCEPTION.id, + }, + ]) + ) + + const result = await getUserAIRecommendationsService({ + userId: user.id, + redis: redisClient, + language: 'en-US', + }) + + expect(tmdb.movies.related).toHaveBeenCalled() + expect(generateJSONMock).toHaveBeenCalledOnce() + expect(result.recommendations).toHaveLength(1) + expect(result.recommendations[0]).toMatchObject({ + title: INCEPTION.title, + tmdbId: INCEPTION.id, + mediaType: 'movie', + }) + }) + + it('should include both movie and TV candidates when user has both', async () => { + const user = await makeUser() + + for (let i = 0; i < 3; i++) { + await makeUserItem({ + userId: user.id, + status: 'WATCHED', + mediaType: 'MOVIE', + }) + } + for (let i = 0; i < 3; i++) { + await makeUserItem({ + userId: user.id, + status: 'WATCHED', + mediaType: 'TV_SHOW', + }) + } + + // Cold start path is triggered when candidate pool < 3 (1 movie + 1 TV = 2) + // Mock search.multi to resolve both titles correctly + ;(tmdb.search.multi as Mock).mockImplementation((title: string) => { + if (title === BREAKING_BAD.name) { + return Promise.resolve({ + results: [ + { + id: BREAKING_BAD.id, + media_type: 'tv', + vote_count: BREAKING_BAD.vote_count, + name: BREAKING_BAD.name, + }, + ], + }) + } + return Promise.resolve({ + results: [ + { + id: INCEPTION.id, + media_type: 'movie', + vote_count: INCEPTION.vote_count, + title: INCEPTION.title, + }, + ], + }) + }) + + generateJSONMock.mockResolvedValue( + JSON.stringify([ + { + title: INCEPTION.title, + reason: 'Great film.', + mediaType: 'movie', + year: 2010, + }, + { + title: BREAKING_BAD.name, + reason: 'Unmissable series.', + mediaType: 'tv', + year: 2008, + }, + ]) + ) + + const result = await getUserAIRecommendationsService({ + userId: user.id, + redis: redisClient, + language: 'en-US', + }) + + expect(tmdb.movies.related).toHaveBeenCalled() + expect(tmdb.tv.related).toHaveBeenCalled() + expect(result.recommendations).toHaveLength(2) + }) + }) + + describe('exclusion', () => { + it('should not recommend items already in any user list status', async () => { + const user = await makeUser() + + await makeUserItem({ + userId: user.id, + tmdbId: INCEPTION.id, + mediaType: 'MOVIE', + status: 'WATCHING', + }) + + generateJSONMock.mockResolvedValue( + JSON.stringify([ + { + title: INCEPTION.title, + reason: 'A classic.', + mediaType: 'movie', + year: 2010, + }, + ]) + ) + + const result = await getUserAIRecommendationsService({ + userId: user.id, + redis: redisClient, + language: 'en-US', + }) + + const hasExcluded = result.recommendations.some( + (r: { tmdbId?: number }) => r.tmdbId === INCEPTION.id + ) + expect(hasExcluded).toBe(false) + }) + }) + + describe('caching', () => { + it('should return cached result on second call without calling AI again', async () => { + const user = await makeUser() + + generateJSONMock.mockResolvedValue( + JSON.stringify([ + { + title: INCEPTION.title, + reason: 'A classic.', + mediaType: 'movie', + year: 2010, + }, + ]) + ) + + const params = { + userId: user.id, + redis: redisClient, + language: 'en-US' as const, + } + + const first = await getUserAIRecommendationsService(params) + const second = await getUserAIRecommendationsService(params) + + expect(generateJSONMock).toHaveBeenCalledOnce() + expect(second).toEqual(first) + }) + }) +}) diff --git a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts index e7285cdfa..6dfee0fda 100644 --- a/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts +++ b/apps/backend/src/domain/services/user-stats/get-user-ai-recommendations.ts @@ -1,5 +1,7 @@ import type { FastifyRedis } from '@fastify/redis' import type { Language } from '@plotwist_app/tmdb' +import type { MediaTypeEnum } from '@/@types/media-type-enum' +import { config } from '@/config' import { tmdb } from '@/infra/adapters/tmdb' import { selectAllUserItems, @@ -17,17 +19,20 @@ type Input = { dateRange?: { startDate: Date | undefined; endDate: Date | undefined } } -const MIN_VOTE_COUNT = 3000 +const MOVIE_MIN_VOTE_COUNT = 2000 +const TV_MIN_VOTE_COUNT = 200 const COLD_START_THRESHOLD = 5 +const ANIME_GENRE_ID = 16 const SYSTEM_PROMPT = - 'You are a personal film & TV curator. Given candidate titles and a user taste profile, select the best matches. Respond ONLY with valid JSON, no markdown.' + 'You are a personal film, TV & anime curator. Given candidate titles and a user taste profile, select the best matches. Respond ONLY with valid JSON, no markdown.' type Candidate = { tmdbId: number title: string year?: number mediaType: 'movie' | 'tv' + genres: string[] } type Rec = { @@ -40,7 +45,7 @@ type Rec = { type WatchedRow = { tmdbId: number - mediaType: string + mediaType: MediaTypeEnum avgRating: string | null addedAt: Date } @@ -103,7 +108,7 @@ function hitTitle(h: SearchHit): string { return (h.title ?? h.name ?? '').trim() } -function buildExclusionKey(tmdbId: number, mediaType: string): string { +function buildExclusionKey(tmdbId: number, mediaType: MediaTypeEnum): string { return `${tmdbId}-${mediaType === 'TV_SHOW' ? 'tv' : 'movie'}` } @@ -112,6 +117,10 @@ function isGoodSeed(row: WatchedRow): boolean { return rating === null || rating >= 3 } +function mapGenreIds(genreIds: number[]): string[] { + return genreIds.map(id => TMDB_GENRE_MAP[id]).filter(Boolean) as string[] +} + // --- TMDB helpers --- async function fetchMovieCandidates( @@ -119,14 +128,25 @@ async function fetchMovieCandidates( exclusionSet: Set, language: Language ): Promise { - const related = await tmdb.movies.related(seed.tmdbId, 'recommendations', language) + const related = await tmdb.movies.related( + seed.tmdbId, + 'recommendations', + language + ) return (related.results ?? []) - .filter(r => !exclusionSet.has(`${r.id}-movie`) && (r.vote_count ?? 0) >= MIN_VOTE_COUNT) + .filter( + r => + !exclusionSet.has(`${r.id}-movie`) && + (r.vote_count ?? 0) >= MOVIE_MIN_VOTE_COUNT + ) .map(r => ({ tmdbId: r.id, title: r.title, - year: r.release_date ? Number.parseInt(r.release_date.split('-')[0], 10) : undefined, + year: r.release_date + ? Number.parseInt(r.release_date.split('-')[0], 10) + : undefined, mediaType: 'movie' as const, + genres: mapGenreIds(r.genre_ids ?? []), })) } @@ -135,14 +155,25 @@ async function fetchTvCandidates( exclusionSet: Set, language: Language ): Promise { - const related = await tmdb.tv.related(seed.tmdbId, 'recommendations', language) + const related = await tmdb.tv.related( + seed.tmdbId, + 'recommendations', + language + ) return (related.results ?? []) - .filter(r => !exclusionSet.has(`${r.id}-tv`) && (r.vote_count ?? 0) >= MIN_VOTE_COUNT) + .filter( + r => + !exclusionSet.has(`${r.id}-tv`) && + (r.vote_count ?? 0) >= TV_MIN_VOTE_COUNT + ) .map(r => ({ tmdbId: r.id, title: r.name, - year: r.first_air_date ? Number.parseInt(r.first_air_date.split('-')[0], 10) : undefined, + year: r.first_air_date + ? Number.parseInt(r.first_air_date.split('-')[0], 10) + : undefined, mediaType: 'tv' as const, + genres: mapGenreIds(r.genre_ids ?? []), })) } @@ -174,11 +205,11 @@ async function buildCandidatePool( async function fetchItemTitle( tmdbId: number, - mediaType: string, + mediaType: MediaTypeEnum, language: Language ): Promise { try { - if (mediaType === 'MOVIE' || mediaType === 'movie') { + if (mediaType === 'MOVIE') { const d = await tmdb.movies.details(tmdbId, language) return (d as { title?: string }).title ?? null } @@ -217,7 +248,7 @@ async function resolveTmdbId( ) if (exclusionSet.has(`${match.id}-${mediaType}`)) return null - if ((match.vote_count ?? 0) < MIN_VOTE_COUNT) return null + if ((match.vote_count ?? 0) < TV_MIN_VOTE_COUNT) return null return { ...rec, tmdbId: match.id } } catch { @@ -225,6 +256,45 @@ async function resolveTmdbId( } } +// --- Anime detection --- + +function detectIsAnimeFanFromCandidates( + candidates: Candidate[], + preferredGenres: string +): boolean { + if (preferredGenres.includes('Animation')) return true + const animeCandidates = candidates.filter(c => + c.genres.includes('Animation') + ).length + return candidates.length > 0 && animeCandidates / candidates.length >= 0.3 +} + +async function detectIsAnimeFanFromWatched( + watchedItems: WatchedRow[], + language: Language +): Promise { + const tvSeeds = watchedItems + .filter(r => r.mediaType === 'TV_SHOW') + .slice(0, 3) + + if (tvSeeds.length === 0) return false + + const checks = await Promise.all( + tvSeeds.map(async item => { + try { + const details = await tmdb.tv.details(item.tmdbId, language) + return ( + (details as { genres?: { id: number }[] }).genres ?? [] + ).some(g => g.id === ANIME_GENRE_ID) + } catch { + return false + } + }) + ) + + return checks.some(Boolean) +} + // --- Prompt builders --- function buildStandardPrompt(params: { @@ -233,6 +303,7 @@ function buildStandardPrompt(params: { lovedLine: string dislikedLine: string candidateList: string + isAnimeFan: boolean language: Language }): string { const { @@ -241,12 +312,14 @@ function buildStandardPrompt(params: { lovedLine, dislikedLine, candidateList, + isAnimeFan, language, } = params return `User taste profile: - Watched: ${watchedCount} titles total ${preferredGenres ? `- Preferred genres: ${preferredGenres}` : ''} +${isAnimeFan ? '- This user watches a lot of anime — prioritize anime recommendations.' : ''} ${lovedLine ? `- Loved: ${lovedLine}` : ''} ${dislikedLine ? `- Disliked (avoid similar): ${dislikedLine}` : ''} @@ -264,9 +337,11 @@ function buildColdStartPrompt(params: { movieCount: number seriesCount: number preferredGenres: string + isAnimeFan: boolean language: Language }): string { - const { movieCount, seriesCount, preferredGenres, language } = params + const { movieCount, seriesCount, preferredGenres, isAnimeFan, language } = + params const preference = movieCount > seriesCount * 1.5 ? 'Strong movie lover' @@ -277,6 +352,7 @@ function buildColdStartPrompt(params: { return `Based on this viewer profile, recommend exactly 5 popular, well-known titles. ${LANGUAGE_INSTRUCTION[language] || LANGUAGE_INSTRUCTION['en-US']} CRITICAL: Only mainstream titles with thousands of TMDB votes. No hidden gems or obscure titles. +${isAnimeFan ? 'IMPORTANT: This user watches a lot of anime — recommend anime titles.' : ''} Profile: - Watched: ${movieCount} movies, ${seriesCount} series @@ -296,7 +372,7 @@ export async function getUserAIRecommendationsService({ period = 'all', dateRange, }: Input) { - const cacheKey = `user-stats:${userId}:ai-recommendations:v5:${language}:${period}` + const cacheKey = `user-stats:${userId}:ai-recommendations:v7:${language}:${period}` const cached = await redis.get(cacheKey) if (cached) return JSON.parse(cached) @@ -321,18 +397,26 @@ export async function getUserAIRecommendationsService({ .join(', ') const watchedCount = watchedWithRatings.length - const aiService = createAIService('openAI') + const aiService = createAIService( + config.intelligence.RECOMMENDATION_AI_PROVIDER + ) let recommendations: Rec[] = [] + // isAnimeFan is determined once and shared between both paths + let isAnimeFan = + preferredGenres.includes('Animation') || + prefs[0]?.genreIds?.includes(ANIME_GENRE_ID) === true + if (watchedCount >= COLD_START_THRESHOLD) { - const seeds = [ - ...watchedWithRatings - .filter(r => r.mediaType === 'MOVIE' && isGoodSeed(r)) - .slice(0, 5), - ...watchedWithRatings - .filter(r => r.mediaType === 'TV_SHOW' && isGoodSeed(r)) - .slice(0, 5), - ] + const movieSeeds = watchedWithRatings + .filter(r => r.mediaType === 'MOVIE' && isGoodSeed(r)) + .slice(0, 5) + + const tvSeeds = watchedWithRatings + .filter(r => r.mediaType === 'TV_SHOW' && isGoodSeed(r)) + .slice(0, 5) + + const seeds = [...movieSeeds, ...tvSeeds] const lovedItems = watchedWithRatings .filter(r => (toRating(r.avgRating) ?? 0) >= 4) @@ -361,12 +445,18 @@ export async function getUserAIRecommendationsService({ ), ]) + // Update isAnimeFan from candidate pool (carries over to cold start fallback) + isAnimeFan = + isAnimeFan || detectIsAnimeFanFromCandidates(candidates, preferredGenres) + if (candidates.length < 3) { + // Try to detect anime from watched TV items via TMDB details + if (!isAnimeFan) { + isAnimeFan = await detectIsAnimeFanFromWatched(tvSeeds, language) + } console.log( '[ai-recommendations] candidate pool too small, falling back to cold start', - { - candidates: candidates.length, - } + { candidates: candidates.length, isAnimeFan } ) } else { const lovedLine = lovedTitles @@ -386,6 +476,7 @@ export async function getUserAIRecommendationsService({ title: c.title, year: c.year, mediaType: c.mediaType, + genres: c.genres.length > 0 ? c.genres.join(', ') : undefined, tmdbId: c.tmdbId, }) ) @@ -400,6 +491,7 @@ export async function getUserAIRecommendationsService({ lovedLine, dislikedLine, candidateList, + isAnimeFan, language, }), temperature: 0.5, @@ -413,6 +505,7 @@ export async function getUserAIRecommendationsService({ parsed.length, { candidates: candidates.length, + isAnimeFan, lovedCount: lovedTitles.filter(r => r.title).length, dislikedCount: dislikedTitles.filter(r => r.title).length, } @@ -440,6 +533,9 @@ export async function getUserAIRecommendationsService({ recommendations = [] } } + } else if (!isAnimeFan && watchedWithRatings.length > 0) { + // Pure cold start: detect anime from watched TV items via TMDB details + isAnimeFan = await detectIsAnimeFanFromWatched(watchedWithRatings, language) } if (recommendations.length === 0) { @@ -457,6 +553,7 @@ export async function getUserAIRecommendationsService({ movieCount, seriesCount, preferredGenres, + isAnimeFan, language, }), temperature: 0.6, diff --git a/apps/backend/src/domain/services/user-stats/get-user-taste-dna.ts b/apps/backend/src/domain/services/user-stats/get-user-taste-dna.ts index 665623f90..c282e1290 100644 --- a/apps/backend/src/domain/services/user-stats/get-user-taste-dna.ts +++ b/apps/backend/src/domain/services/user-stats/get-user-taste-dna.ts @@ -153,7 +153,7 @@ When review snippets are provided above, use them to reflect how they talk about let traits: string[] = [] try { - const openai = new OpenAI({ apiKey: config.openai.OPENAI_API_KEY }) + const openai = new OpenAI({ apiKey: config.intelligence.OPENAI_API_KEY }) const completion = await openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ diff --git a/apps/backend/src/domain/services/user-stats/get-user-viewer-profile.ts b/apps/backend/src/domain/services/user-stats/get-user-viewer-profile.ts index bfc068bf3..24528332a 100644 --- a/apps/backend/src/domain/services/user-stats/get-user-viewer-profile.ts +++ b/apps/backend/src/domain/services/user-stats/get-user-viewer-profile.ts @@ -89,7 +89,7 @@ Write ONLY the profile text, no labels or headers.` let profile = '' try { - const openai = new OpenAI({ apiKey: config.openai.OPENAI_API_KEY }) + const openai = new OpenAI({ apiKey: config.intelligence.OPENAI_API_KEY }) const completion = await openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ @@ -105,7 +105,10 @@ Write ONLY the profile text, no labels or headers.` }) profile = completion.choices[0]?.message?.content?.trim() || '' } catch (err) { - console.error('[viewer-profile] OpenAI error:', err instanceof Error ? err.message : err) + console.error( + '[viewer-profile] OpenAI error:', + err instanceof Error ? err.message : err + ) } const result = { viewerProfile: profile } diff --git a/apps/backend/src/infra/adapters/open-ai.ts b/apps/backend/src/infra/adapters/open-ai.ts index 70c3b2677..4f5698c5b 100644 --- a/apps/backend/src/infra/adapters/open-ai.ts +++ b/apps/backend/src/infra/adapters/open-ai.ts @@ -3,7 +3,7 @@ import { config } from '@/config' import type { AIService } from '@/infra/ports/ai-service' const openai = new OpenAI({ - apiKey: config.openai.OPENAI_API_KEY, + apiKey: config.intelligence.OPENAI_API_KEY, }) async function generateMessage(prompt: string, content: string) { diff --git a/apps/backend/src/test/global-setup.ts b/apps/backend/src/test/global-setup.ts index 94740aedc..f7907438c 100644 --- a/apps/backend/src/test/global-setup.ts +++ b/apps/backend/src/test/global-setup.ts @@ -78,6 +78,7 @@ export async function setup() { // OpenAI process.env.OPENAI_API_KEY = 'open_api_key' + process.env.RECOMMENDATION_AI_PROVIDER = 'openAI' await setupDatabase() await setupLocalStack()