From 7f94757dc545a021f7ed1924a6ac5089a5603e2c Mon Sep 17 00:00:00 2001 From: Vasyl Vdovychenko Date: Fri, 19 Jun 2026 02:28:45 -0400 Subject: [PATCH] =?UTF-8?q?feat(rag):=20Ask=20this=20book=20=E2=80=94=20co?= =?UTF-8?q?nversational=20streaming=20chat=20+=20gpt-4.1-mini?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns the single-turn FAQ into a real chat (grounding + citations + spoiler gate unchanged). - Model: rag.ask -> gpt-4.1-mini (dedicated openai-rag provider, mirrors openai-explain; translate/podcast stay nano). - Companion system prompt: warm, conversational, handles greetings; grounding is STRICT and overrides everything (covers themes/symbolism/interpretation, not just plot) -> every book-fact cites [n]; greeting/meta the only no-cite case. - Multi-turn memory: AskRequest.History (client-held, server-clamped last 6 turns / 4000 chars each); real LlmMessage[] (system -> excerpts -> history -> question); retrieval still per latest question. Eval call passes []. - SSE streaming (content-negotiated, copies Explain): delta* then terminal done {citations,lastReadOrd,insufficient}; empty chunks -> friendly delta + done, NO model call (spoiler/hallucination short-circuit, both paths). JSON fallback unchanged. Both catalog + user-book ask. MaxOutputTokens 320->400. - Web: streaming token render + blinking caret, suggested starters on empty thread, last-6 history sent, abort on new-question/unmount. Mobile keeps JSON. architect -> backend+web -> adversarial QA (SHIP; closed grounding loophole + added empty-chunk-no-LLM-call tests). 878 unit + 564 web green. NOTE: re-run the paid grounding/citation eval on /ai-quality post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 8 + apps/web/src/api/ask.ts | 71 ++++++- apps/web/src/components/reader/AskPanel.tsx | 43 ++++- .../reader/__tests__/AskPanel.test.tsx | 31 ++- apps/web/src/hooks/useAsk.test.ts | 140 ++++++++++---- apps/web/src/hooks/useAsk.ts | 131 +++++++++++-- apps/web/src/locales/en.json | 9 +- apps/web/src/styles/reader.css | 37 ++++ .../TextStack.Ai.EvalSuite/RagEvalRunner.cs | 2 +- backend/src/Api/Endpoints/AskEndpoints.cs | 41 ++-- backend/src/Api/Endpoints/AskSse.cs | 144 ++++++++++++++ .../src/Api/Endpoints/UserBookAskEndpoints.cs | 45 ++--- backend/src/Api/appsettings.json | 5 +- backend/src/Application/Ai/RagAskHistory.cs | 74 ++++++++ backend/src/Application/Ai/RagAskPrompt.cs | 36 ++-- .../src/Application/DependencyInjection.cs | 11 +- backend/src/Application/Rag/RagAskService.cs | 152 +++++++++++++-- backend/src/Contracts/Books/AskDtos.cs | 15 +- .../Persistence/ModelRegistrySeeder.cs | 2 +- packages/shared/src/types/api.ts | 9 + tests/TextStack.AiEvals/RagEvalRunnerTests.cs | 4 +- .../AskEndpointTests.cs | 54 ++++++ tests/TextStack.UnitTests/AskSseTests.cs | 103 ++++++++++ .../TextStack.UnitTests/RagAskHistoryTests.cs | 127 +++++++++++++ .../TextStack.UnitTests/RagAskPromptTests.cs | 80 ++++++-- .../TextStack.UnitTests/RagAskServiceTests.cs | 178 ++++++++++++++++++ 26 files changed, 1403 insertions(+), 149 deletions(-) create mode 100644 backend/src/Api/Endpoints/AskSse.cs create mode 100644 backend/src/Application/Ai/RagAskHistory.cs create mode 100644 tests/TextStack.UnitTests/AskSseTests.cs create mode 100644 tests/TextStack.UnitTests/RagAskHistoryTests.cs create mode 100644 tests/TextStack.UnitTests/RagAskServiceTests.cs diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c02d583..e367e9ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Ask this book — conversational, streaming web chat — backend (AI-028) (2026-06-19) + +Backend for the conversational "Ask this book" upgrade: **model bump + multi-turn memory + warm-companion prompt + SSE streaming**, with grounding, citations, and the spoiler gate intact. `rag.ask` now routes to a dedicated keyed provider `openai-rag` on **gpt-4.1-mini** (was gpt-4.1-nano), mirroring `openai-explain` (`OpenAI:RagAsk:Model`, `Ai:Routes:rag.ask → openai-rag`, decorator-loop entry, `ModelRegistrySeeder` row). The system prompt is rewritten from "answer ONLY from excerpts else refuse" to a **warm reading companion** that is still strictly grounded — every book-fact claim must come from the numbered excerpts and cite `[n]` (citation contract + parser unchanged), but greetings/meta ("hi", "what can you do") get a warm invite with **no forced citation and no refusal**, and a genuine question with no matching excerpt gets a graceful "I don't see that in what you've read so far" rather than an invented fact. **Multi-turn**: `AskRequest` gains `History: AskTurnDto[]` (role `"user"`/`"assistant"`); the server defensively clamps to the **last 6 turns**, caps each turn at 4000 chars, normalizes roles, and assembles a real chat (system → numbered-excerpts context block → prior turns → new question last). Retrieval still runs on the latest question only, so the grounding eval is byte-identical with `[]` history. **SSE** (content-negotiated, mirrors Explain): `Accept: text/event-stream` → `delta` events (token fragments) then a terminal `done` carrying `{ citations, lastReadOrd, insufficient }` (camelCase, identical citation shape to the JSON path); empty-chunks → one friendly `delta` + `done {insufficient:true}` with **no model call**; provider/mid-stream failure → terminal `error`. JSON path returns the unchanged `AskResponse` (eval + mobile keep working). Ask `MaxOutputTokens` raised 320 → 400 for conversational length. `dotnet build -c Release` clean; 868 unit tests green (history clamp, multi-turn message assembly, SSE event sequencing over a fake delta stream, companion greeting-vs-content prompt structure) + integration (catalog spoiler-gate, owner-404, SSE content-type + framing, JSON history passthrough — skip-on-unavailable). **Note: the grounding golden eval (`RagEvalRunner`) MUST be re-run on mini post-deploy** — the companion prompt loosened the refusal rule, so this is the real hallucination-risk gate (paid; not runnable in CI). Frontend = parallel agent (AI-026e). + +### Ask this book — conversational, streaming web chat (AI-026e) (2026-06-19) + +"Ask this book" goes from one-shot Q&A to a streaming, multi-turn reading companion on the web. Answers now stream **token-by-token**: the in-progress assistant turn grows live with a subtle blinking caret, citation chips render under it once the stream's `done` event lands. The panel sends the **last 6 turns** of history (bounded client-side) on each request for follow-up context, and shows **3–4 suggested starter questions** ("Summarize what I've read so far", "Who are the main characters?", "Explain the key idea so far", "What should I pay attention to?") on an empty, Ready thread — one click submits. Reuses the existing `postSse` SSE-over-POST client (same path Explain streams on) — `askStream(target, question, history, {onDelta,onDone,onError,signal})` POSTs `{question, history, currentChapterId?}` with `Accept: text/event-stream`, routing by `target.kind` to catalog `/books/{id}/ask` or userbook `/me/books/{id}/ask`. Browsers that can't stream fall back to the JSON single-turn endpoint; 401 → sign-in. Grounding, citation chips + jump-to-passage, and the Prepare → Preparing N/M → Ready/Failed index state machine are **unchanged** (server owns grounding). Shared `AskTurnDto` added to `@textstack/shared` (web + mobile consume; mobile keeps its JSON path). tsc + web build clean; mobile tsc clean; 564 web tests green (extended `useAsk` for delta accumulation → done-sets-citations, history bounded to 6, abort-on-unmount; `AskPanel` for starters render + submit). Backend = parallel agent. + ### Ask this book — on-demand indexing, Phase 2 (user-uploaded books) — backend (2026-06-19) Backend for "Ask this book" over **user-uploaded books** (`UserBook`/`UserChapter`), mirroring the P1 catalog path with **per-user isolation as a hard requirement** (a user must never retrieve another user's chunks). New **isolated** `user_chapter_chunk` table (NOT polymorphic with `chapter_chunk`) carries a **denormalized `user_id`** alongside `user_book_id`; retrieval filters on **both** in SQL (defense in depth), in both the vector and lexical branches. `UserBook` gains the same `rag_status/rag_chunk_count/rag_embedded_count/rag_indexed_at/rag_error` fields as `Edition`. `BookChunkingService.ChunkUserBookAsync` chunks `UserChapter.PlainText` into the new table (stamping the owner id from the book); the existing `ChapterEmbeddingWorker` now runs a **second batch poll** over `user_chapter_chunk` on the **same single OpenAI drain** (no second worker) and flips `user_books.rag_status → Ready` when `embedded == chunk`. `IRagService.RetrieveUserBookAsync` reuses the identical RRF hybrid (vector NN + lexical FTS) via a shared private helper, so fusion/vector-format/timeout stay byte-identical to the catalog path. Owner-scoped endpoints: `POST/GET /me/books/{id}/index` (atomic claim `WHERE id=@id AND user_id=@uid AND rag_status IN (0,3)`, clears stale chunks before re-chunk, rate-limited `rag.index`) and `POST /me/books/{id}/ask` (**no spoiler gate** — full-book retrieval over the user's own document, no private-notes corpus; reuses `RagAskService.AskFromChunksAsync`; 404 if not the owner's book). `GET /me/books/{id}` detail DTO gains `ragStatus`/counts. P1 catalog path unchanged. Migration `AddUserBookRagIndex` (creates `user_chapter_chunk` + HNSW/GIN/`(user_id, user_book_id)` indexes + generated `search_vector` + cascade FKs from both `user_books` and `user_chapters`, plus the `user_books.rag_*` columns; Up/Down verified against pgvector). 852 unit tests green (incl. `ChunkUserBookAsync` row shape + a SQL-level isolation guard asserting both `user_id`/`user_book_id` filters in both retrievers) + integration tests (unauth→401, non-owner→404, owner→202/200/answer). Frontend = parallel agent. diff --git a/apps/web/src/api/ask.ts b/apps/web/src/api/ask.ts index 34b021ae..03e9d0fd 100644 --- a/apps/web/src/api/ask.ts +++ b/apps/web/src/api/ask.ts @@ -1,10 +1,77 @@ import { authFetch } from './client' -import type { AskResponse } from '@textstack/shared' +import type { AskResponse, AskCitation, AskTurnDto } from '@textstack/shared' +import { postSse } from '../lib/sse' import type { RagIndexState, RagIndexStatus } from '../types/api' -export type { AskResponse, AskCitation } from '@textstack/shared' +export type { AskResponse, AskCitation, AskTurnDto } from '@textstack/shared' export type { RagIndexState, RagIndexStatus } from '../types/api' +// API_BASE: host (dev) or '' (prod, nginx strips /api). Backend route has no prefix. +const API_BASE = import.meta.env.VITE_API_URL ?? '' + +/** Most recent conversation turns to send back for multi-turn context (AI-026e). */ +export const MAX_HISTORY_TURNS = 6 + +/** Terminal `done` payload of a streamed ask (AI-026e). */ +export interface AskDone { + citations: AskCitation[] + lastReadOrd: number + insufficient: boolean +} + +interface AskStreamCallbacks { + onDelta: (fragment: string) => void + onDone: (done: AskDone) => void + onError: (message: string) => void + signal?: AbortSignal +} + +/** + * Streaming "Ask this book" (AI-026e). POSTs the question + bounded `history` with + * `Accept: text/event-stream` and consumes SSE via {@link postSse}: `delta` fragments append to the + * answer, `done` carries citations/insufficient, `error` surfaces a message. URL routes by + * `target.kind` (catalog vs userbook). Throws `SseUnauthorizedError`/`SseUnsupportedError` from + * `postSse` for the caller to handle (sign-in / JSON fallback). + */ +export function askStream( + target: AskTarget, + question: string, + history: AskTurnDto[], + { onDelta, onDone, onError, signal }: AskStreamCallbacks, + currentChapterId?: string, +): Promise { + const url = + target.kind === 'userbook' + ? `${API_BASE}/me/books/${target.id}/ask` + : `${API_BASE}/books/${target.id}/ask` + const body = { + question, + history: history.slice(-MAX_HISTORY_TURNS), + ...(currentChapterId ? { currentChapterId } : {}), + } + return postSse( + url, + body, + e => { + if (signal?.aborted) return + if (e.event === 'delta') onDelta(e.data) + else if (e.event === 'done') { + try { + const parsed = JSON.parse(e.data) as Partial + onDone({ + citations: parsed.citations ?? [], + lastReadOrd: parsed.lastReadOrd ?? 0, + insufficient: Boolean(parsed.insufficient), + }) + } catch { + onDone({ citations: [], lastReadOrd: 0, insufficient: false }) + } + } else if (e.event === 'error') onError(e.data || 'Ask failed') + }, + signal, + ) +} + /** * Identifies what the "Ask this book" panel is pointed at (AI-027 P2). A catalog `edition` * routes to `/books/{id}/...`; a user-uploaded `userbook` routes to `/me/books/{id}/...`. diff --git a/apps/web/src/components/reader/AskPanel.tsx b/apps/web/src/components/reader/AskPanel.tsx index b8f95acd..6c7b1d5f 100644 --- a/apps/web/src/components/reader/AskPanel.tsx +++ b/apps/web/src/components/reader/AskPanel.tsx @@ -52,6 +52,15 @@ export function AskPanel({ setInput('') } + const submitStarter = (q: string) => { + if (isLoading) return + ask(q) + } + + // Suggested starter questions, shown only on an empty, Ready thread (AI-026e). + const starterKeys = ['summary', 'characters', 'keyIdea', 'attention'] as const + const showStarters = history.length === 0 && status === 'Ready' && isAuthenticated && !isLoading + return ( <>
@@ -69,11 +78,35 @@ export function AskPanel({ {history.length === 0 && !isLoading && (

{t('reader.ask.empty')}

)} + {showStarters && ( +
+

{t('reader.ask.startersTitle')}

+ {starterKeys.map(key => ( + + ))} +
+ )} {history.map((turn, i) => (

{turn.question}

-

{turn.answer}

- {turn.citations.length > 0 && ( +

+ {turn.answer} + {turn.streaming &&

+ {turn.streaming && !turn.answer && ( +
+ + {t('reader.ask.thinking')} +
+ )} + {!turn.streaming && turn.citations.length > 0 && (
{turn.citations.map(c => (
diff --git a/apps/web/src/components/reader/__tests__/AskPanel.test.tsx b/apps/web/src/components/reader/__tests__/AskPanel.test.tsx index 25959232..9e619314 100644 --- a/apps/web/src/components/reader/__tests__/AskPanel.test.tsx +++ b/apps/web/src/components/reader/__tests__/AskPanel.test.tsx @@ -38,6 +38,7 @@ const baseProps = { afterEach(() => { cleanup() askState.history = [] + askState.ask = vi.fn() ragState.status = 'Ready' ragState.chunkCount = 0 ragState.embeddedCount = 0 @@ -96,7 +97,7 @@ describe('AskPanel', () => { it('renders a citation chip and navigates on click', () => { const citation = { marker: 1, chunkId: 'c1', chapterId: 'ch1', chapterOrd: 4, charStart: 0, charEnd: 1, preview: 'snippet' } - askState.history = [{ question: 'q', answer: 'a [1]', citations: [citation], insufficient: false }] + askState.history = [{ question: 'q', answer: 'a [1]', citations: [citation], insufficient: false, streaming: false }] const onNavigateToCitation = vi.fn() render() @@ -105,4 +106,32 @@ describe('AskPanel', () => { expect(onNavigateToCitation).toHaveBeenCalledWith(citation) }) + + it('shows starter questions on an empty, Ready thread and submits one on click', () => { + askState.history = [] + ragState.status = 'Ready' + const ask = vi.fn() + askState.ask = ask + + render() + + expect(screen.getByText('reader.ask.startersTitle')).toBeTruthy() + const starter = screen.getByText('reader.ask.starters.summary') + fireEvent.click(starter) + expect(ask).toHaveBeenCalledWith('reader.ask.starters.summary') + }) + + it('hides starters once the thread has a turn', () => { + askState.history = [{ question: 'q', answer: 'a', citations: [], insufficient: false, streaming: false }] + ragState.status = 'Ready' + render() + expect(screen.queryByText('reader.ask.startersTitle')).toBeNull() + }) + + it('does not show starters until the index is Ready', () => { + askState.history = [] + ragState.status = 'NotIndexed' + render() + expect(screen.queryByText('reader.ask.startersTitle')).toBeNull() + }) }) diff --git a/apps/web/src/hooks/useAsk.test.ts b/apps/web/src/hooks/useAsk.test.ts index 75927c04..6285e3ae 100644 --- a/apps/web/src/hooks/useAsk.test.ts +++ b/apps/web/src/hooks/useAsk.test.ts @@ -1,12 +1,24 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { renderHook, act, waitFor } from '@testing-library/react' -vi.mock('../api/ask', () => ({ ask: vi.fn(), askUserBook: vi.fn() })) -import { ask as askApi, askUserBook as askUserBookApi, type AskTarget } from '../api/ask' +vi.mock('../api/ask', () => ({ + ask: vi.fn(), + askUserBook: vi.fn(), + askStream: vi.fn(), + MAX_HISTORY_TURNS: 6, +})) +import { + ask as askJsonApi, + askUserBook as askUserBookJsonApi, + askStream as askStreamApi, + type AskTarget, +} from '../api/ask' +import { SseUnsupportedError } from '../lib/sse' import { useAsk } from './useAsk' -const mockAsk = askApi as unknown as ReturnType -const mockAskUserBook = askUserBookApi as unknown as ReturnType +const mockAskStream = askStreamApi as unknown as ReturnType +const mockAskJson = askJsonApi as unknown as ReturnType +const mockAskUserBookJson = askUserBookJsonApi as unknown as ReturnType const edition: AskTarget = { kind: 'edition', id: 'ed-1' } const userbook: AskTarget = { kind: 'userbook', id: 'ub-1' } @@ -15,69 +27,133 @@ const citation = { marker: 1, chunkId: 'c1', chapterId: 'ch1', chapterOrd: 2, charStart: 0, charEnd: 5, preview: 'preview', } -describe('useAsk', () => { +type Callbacks = { + onDelta: (s: string) => void + onDone: (d: { citations: unknown[]; lastReadOrd: number; insufficient: boolean }) => void + onError: (m: string) => void + signal?: AbortSignal +} + +/** Drives askStream: emit deltas then a done payload. */ +function streamDeltas(deltas: string[], done: Partial<{ citations: unknown[]; insufficient: boolean }> = {}) { + mockAskStream.mockImplementationOnce( + async (_t: AskTarget, _q: string, _h: unknown, cb: Callbacks) => { + for (const d of deltas) cb.onDelta(d) + cb.onDone({ citations: done.citations ?? [], lastReadOrd: 0, insufficient: Boolean(done.insufficient) }) + }, + ) +} + +describe('useAsk (streaming)', () => { beforeEach(() => { - mockAsk.mockReset() - mockAskUserBook.mockReset() + mockAskStream.mockReset() + mockAskJson.mockReset() + mockAskUserBookJson.mockReset() }) - it('appends a turn on success', async () => { - mockAsk.mockResolvedValueOnce({ answer: 'Because [1].', citations: [citation], lastReadOrd: 3, insufficient: false }) + it('accumulates deltas into the turn answer, then done sets citations', async () => { + streamDeltas(['Because ', 'of [1].'], { citations: [citation] }) const { result } = renderHook(() => useAsk(edition)) await act(() => result.current.ask('why?')) - await waitFor(() => expect(result.current.history).toHaveLength(1)) - expect(result.current.history[0]).toMatchObject({ question: 'why?', answer: 'Because [1].' }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.history).toHaveLength(1) + expect(result.current.history[0]).toMatchObject({ question: 'why?', answer: 'Because of [1].', streaming: false }) expect(result.current.history[0].citations).toHaveLength(1) expect(result.current.error).toBeNull() }) - it('sets error and keeps history empty on failure', async () => { - mockAsk.mockRejectedValueOnce(new Error('boom')) + it('flags insufficient turns from the done payload', async () => { + streamDeltas(['Read more first.'], { insufficient: true }) const { result } = renderHook(() => useAsk(edition)) await act(() => result.current.ask('why?')) - await waitFor(() => expect(result.current.error).toBe('boom')) - expect(result.current.history).toHaveLength(0) + await waitFor(() => expect(result.current.history).toHaveLength(1)) + expect(result.current.history[0].insufficient).toBe(true) }) - it('flags insufficient turns', async () => { - mockAsk.mockResolvedValueOnce({ answer: 'Read more first.', citations: [], lastReadOrd: 0, insufficient: true }) + it('sends bounded last-6-turn history on the next question', async () => { + // Seed 7 prior answered turns via repeated streamed asks. const { result } = renderHook(() => useAsk(edition)) + for (let i = 0; i < 7; i++) { + streamDeltas([`answer-${i}`]) + await act(() => result.current.ask(`q-${i}`)) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + } + + // The 8th question should carry only the last 6 turns (12 user/assistant messages). + streamDeltas(['final']) + await act(() => result.current.ask('q-final')) + await waitFor(() => expect(mockAskStream).toHaveBeenCalledTimes(8)) + + const lastCall = mockAskStream.mock.calls[7] + const sentHistory = lastCall[2] as { role: string; content: string }[] + // Hook bounds to last 6 *turns* (slice(-6)); each turn = 2 messages → 12 messages. + expect(sentHistory.length).toBeLessThanOrEqual(12) + // Earliest sent message must not be q-0 (it was dropped by the bound). + expect(sentHistory[0].content).not.toBe('q-0') + }) + + it('routes a userbook target to the userbook ask url', async () => { + streamDeltas(['ok']) + const { result } = renderHook(() => useAsk(userbook, 'chap-guid-9')) await act(() => result.current.ask('why?')) - await waitFor(() => expect(result.current.history).toHaveLength(1)) - expect(result.current.history[0].insufficient).toBe(true) + await waitFor(() => expect(mockAskStream).toHaveBeenCalled()) + expect(mockAskStream.mock.calls[0][0]).toEqual(userbook) + expect(mockAskStream.mock.calls[0][4]).toBe('chap-guid-9') }) - it('forwards currentChapterId to the api when provided', async () => { - mockAsk.mockResolvedValueOnce({ answer: 'ok', citations: [], lastReadOrd: 0, insufficient: false }) - const { result } = renderHook(() => useAsk(edition, 'chap-guid-4')) + it('surfaces a stream error event and clears streaming flag', async () => { + mockAskStream.mockImplementationOnce(async (_t, _q, _h, cb: Callbacks) => { + cb.onDelta('Partial ') + cb.onError('Ask service unavailable') + }) + const { result } = renderHook(() => useAsk(edition)) await act(() => result.current.ask('why?')) - await waitFor(() => expect(mockAsk).toHaveBeenCalled()) - expect(mockAsk).toHaveBeenCalledWith('ed-1', 'why?', undefined, expect.anything(), 'chap-guid-4') + await waitFor(() => expect(result.current.error).toBe('Ask service unavailable')) + expect(result.current.history[0].answer).toBe('Partial ') + expect(result.current.history[0].streaming).toBe(false) }) - it('routes a userbook target to askUserBook (not the catalog ask)', async () => { - mockAskUserBook.mockResolvedValueOnce({ answer: 'ok', citations: [], lastReadOrd: 0, insufficient: false }) - const { result } = renderHook(() => useAsk(userbook, 'chap-guid-9')) + it('falls back to the JSON request when streaming is unsupported', async () => { + mockAskStream.mockRejectedValueOnce(new SseUnsupportedError()) + mockAskJson.mockResolvedValueOnce({ answer: 'Plain answer.', citations: [citation], lastReadOrd: 1, insufficient: false }) + const { result } = renderHook(() => useAsk(edition)) await act(() => result.current.ask('why?')) - await waitFor(() => expect(mockAskUserBook).toHaveBeenCalled()) - expect(mockAskUserBook).toHaveBeenCalledWith('ub-1', 'why?', undefined, expect.anything(), 'chap-guid-9') - expect(mockAsk).not.toHaveBeenCalled() + await waitFor(() => expect(result.current.history[0].answer).toBe('Plain answer.')) + expect(mockAskJson).toHaveBeenCalledTimes(1) + expect(result.current.error).toBeNull() + }) + + it('aborts the in-flight stream on unmount', async () => { + let captured: AbortSignal | undefined + mockAskStream.mockImplementationOnce( + (_t, _q, _h, cb: Callbacks) => { + captured = cb.signal + return new Promise(() => {}) // never resolves + }, + ) + const { result, unmount } = renderHook(() => useAsk(edition)) + + act(() => { void result.current.ask('why?') }) + await waitFor(() => expect(captured).toBeDefined()) + expect(captured!.aborted).toBe(false) + + unmount() + expect(captured!.aborted).toBe(true) }) it('no-ops without a target', async () => { const { result } = renderHook(() => useAsk(undefined)) await act(() => result.current.ask('why?')) - expect(mockAsk).not.toHaveBeenCalled() - expect(mockAskUserBook).not.toHaveBeenCalled() + expect(mockAskStream).not.toHaveBeenCalled() }) }) diff --git a/apps/web/src/hooks/useAsk.ts b/apps/web/src/hooks/useAsk.ts index 9c38e2b2..f25270ea 100644 --- a/apps/web/src/hooks/useAsk.ts +++ b/apps/web/src/hooks/useAsk.ts @@ -1,35 +1,87 @@ import { useState, useCallback, useRef, useEffect } from 'react' -import { ask as askApi, askUserBook as askUserBookApi, type AskCitation, type AskTarget } from '../api/ask' +import { + ask as askJsonApi, + askUserBook as askUserBookJsonApi, + askStream, + MAX_HISTORY_TURNS, + type AskCitation, + type AskTarget, + type AskTurnDto, +} from '../api/ask' import { ApiError } from '../api/client' +import { SseUnauthorizedError, SseUnsupportedError } from '../lib/sse' export interface AskTurn { question: string answer: string citations: AskCitation[] insufficient: boolean + /** True while the answer is still streaming in (token-by-token). */ + streaming: boolean +} + +/** Flattens the visible Q&A history into the user/assistant turn list the server expects. */ +function toDto(history: AskTurn[]): AskTurnDto[] { + const turns: AskTurnDto[] = [] + for (const t of history) { + turns.push({ role: 'user', content: t.question }) + if (t.answer) turns.push({ role: 'assistant', content: t.answer }) + } + return turns } /** - * Session "Ask this book" state (AI-026a): an in-memory Q&A history (not persisted), plus loading - * and error. `ask` appends a turn; in-flight requests are aborted on a new question / unmount. + * Session "Ask this book" state (AI-026e): a streamed, multi-turn in-memory Q&A history (not + * persisted), plus loading and error. `ask` optimistically appends a turn, streams the answer + * token-by-token (`delta` → append; `done` → citations/insufficient), and sends the last 6 turns + * for context. In-flight requests abort on a new question / unmount. * - * `target.kind` (AI-027 P2) routes the POST — catalog `/books/{id}/ask` vs user-upload - * `/me/books/{id}/ask`. + * `target.kind` (AI-027 P2) routes the request — catalog `/books/{id}/ask` vs user-upload + * `/me/books/{id}/ask`. Browsers that can't stream fall back to the JSON single-turn endpoint. */ export function useAsk(target: AskTarget | undefined, currentChapterId?: string) { - const id = target?.id - const kind = target?.kind const [history, setHistory] = useState([]) const [isLoading, setIsLoading] = useState(false) const [error, setError] = useState(null) const abortRef = useRef(null) + const mountedRef = useRef(true) - useEffect(() => () => abortRef.current?.abort(), []) + useEffect(() => { + mountedRef.current = true + return () => { + mountedRef.current = false + abortRef.current?.abort() + } + }, []) + + /** Append a fragment to the last (streaming) turn's answer. */ + const appendDelta = useCallback((fragment: string) => { + setHistory(prev => { + if (prev.length === 0) return prev + const next = prev.slice() + const last = next[next.length - 1] + next[next.length - 1] = { ...last, answer: last.answer + fragment } + return next + }) + }, []) + + const finishTurn = useCallback( + (patch: Partial) => { + setHistory(prev => { + if (prev.length === 0) return prev + const next = prev.slice() + const last = next[next.length - 1] + next[next.length - 1] = { ...last, ...patch, streaming: false } + return next + }) + }, + [], + ) const ask = useCallback( async (question: string) => { const q = question.trim() - if (!q || !id || isLoading) return + if (!q || !target || isLoading) return abortRef.current?.abort() const ctrl = new AbortController() @@ -37,22 +89,63 @@ export function useAsk(target: AskTarget | undefined, currentChapterId?: string) setIsLoading(true) setError(null) + // History to send is the last N turns *before* this question, flattened to messages. + const priorHistory = toDto(history.slice(-MAX_HISTORY_TURNS)) + setHistory(prev => [ + ...prev, + { question: q, answer: '', citations: [], insufficient: false, streaming: true }, + ]) + try { - const fn = kind === 'userbook' ? askUserBookApi : askApi - const res = await fn(id, q, undefined, ctrl.signal, currentChapterId) - setHistory(prev => [ - ...prev, - { question: q, answer: res.answer, citations: res.citations, insufficient: res.insufficient }, - ]) + await askStream( + target, + q, + priorHistory, + { + onDelta: appendDelta, + onDone: done => { + if (ctrl.signal.aborted) return + finishTurn({ citations: done.citations, insufficient: done.insufficient }) + }, + onError: msg => { + if (ctrl.signal.aborted) return + finishTurn({}) + setError(msg) + }, + signal: ctrl.signal, + }, + currentChapterId, + ) + if (!ctrl.signal.aborted) finishTurn({}) } catch (err) { if ((err as { name?: string })?.name === 'AbortError') return - if (err instanceof ApiError && err.status === 401) setError('auth') - else setError(err instanceof Error ? err.message : 'Ask failed') + if (ctrl.signal.aborted) return + + // No streaming support → one-shot JSON request (single-turn fallback). + if (err instanceof SseUnsupportedError) { + try { + const fn = target.kind === 'userbook' ? askUserBookJsonApi : askJsonApi + const res = await fn(target.id, q, undefined, ctrl.signal, currentChapterId) + if (ctrl.signal.aborted) return + finishTurn({ answer: res.answer, citations: res.citations, insufficient: res.insufficient }) + } catch (fallbackErr) { + if ((fallbackErr as { name?: string })?.name === 'AbortError') return + finishTurn({}) + if (fallbackErr instanceof ApiError && fallbackErr.status === 401) setError('auth') + else setError(fallbackErr instanceof Error ? fallbackErr.message : 'Ask failed') + } + } else if (err instanceof SseUnauthorizedError) { + finishTurn({}) + setError('auth') + } else { + finishTurn({}) + setError(err instanceof Error ? err.message : 'Ask failed') + } } finally { - if (abortRef.current === ctrl) setIsLoading(false) + if (abortRef.current === ctrl && mountedRef.current) setIsLoading(false) } }, - [id, kind, isLoading, currentChapterId], + [target, isLoading, history, currentChapterId, appendDelta, finishTurn], ) return { history, isLoading, error, ask } diff --git a/apps/web/src/locales/en.json b/apps/web/src/locales/en.json index b88eca93..3c6e471d 100644 --- a/apps/web/src/locales/en.json +++ b/apps/web/src/locales/en.json @@ -200,7 +200,14 @@ "prepareCta": "Prepare this book for questions", "preparing": "Preparing this book… {{done}}/{{total}}", "indexFailed": "Preparation failed.", - "indexRetry": "Retry" + "indexRetry": "Retry", + "startersTitle": "Try asking", + "starters": { + "summary": "Summarize what I've read so far", + "characters": "Who are the main characters?", + "keyIdea": "Explain the key idea so far", + "attention": "What should I pay attention to?" + } }, "studyBuddy": { "title": "Help me understand this", diff --git a/apps/web/src/styles/reader.css b/apps/web/src/styles/reader.css index 6752d3db..ac619130 100644 --- a/apps/web/src/styles/reader.css +++ b/apps/web/src/styles/reader.css @@ -2270,6 +2270,43 @@ html[data-theme="dark"] .vocab-translation-overlay__item { font-size: 14px; margin: 0; } +/* Streaming caret on the in-progress answer (AI-026e) */ +.ask-panel__cursor { + display: inline-block; + width: 2px; + height: 1em; + margin-left: 2px; + vertical-align: text-bottom; + background: var(--reader-accent, #2563eb); + animation: stream-cursor-blink 1s steps(2, start) infinite; +} +/* Suggested starter questions on an empty thread (AI-026e) */ +.ask-panel__starters { + display: flex; + flex-direction: column; + gap: 8px; + margin-top: 8px; +} +.ask-panel__starters-title { + margin: 0 0 2px; + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.04em; + color: var(--reader-secondary); +} +.ask-panel__starter { + text-align: left; + font-size: 14px; + padding: 10px 12px; + border-radius: 10px; + border: 1px solid var(--reader-border); + background: var(--reader-bar-bg); + color: var(--reader-text); + cursor: pointer; +} +.ask-panel__starter:hover { + border-color: var(--reader-text); +} .ask-panel__composer { flex: 0 0 auto; border-top: 1px solid var(--reader-border); diff --git a/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs b/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs index 15cb6166..1ce45dbe 100644 --- a/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs +++ b/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs @@ -153,7 +153,7 @@ private async Task JudgeCitationsAsync( { ct.ThrowIfCancellationRequested(); // lastReadOrd is irrelevant here — chunks are supplied directly (no user gating). - var answer = await ask.AskFromChunksAsync(question, chunks, [], lastReadOrd: int.MaxValue, ct); + var answer = await ask.AskFromChunksAsync(question, chunks, [], [], lastReadOrd: int.MaxValue, ct); if (answer.Insufficient || answer.Citations.Count == 0) continue; answersGenerated++; diff --git a/backend/src/Api/Endpoints/AskEndpoints.cs b/backend/src/Api/Endpoints/AskEndpoints.cs index c8890502..4a8d7674 100644 --- a/backend/src/Api/Endpoints/AskEndpoints.cs +++ b/backend/src/Api/Endpoints/AskEndpoints.cs @@ -1,5 +1,6 @@ using Api.Extensions; using Api.Sites; +using Application.Ai; using Application.Auth; using Application.Common.Interfaces; using Application.Rag; @@ -11,14 +12,14 @@ namespace Api.Endpoints; /// -/// Public "Ask this book" (Phase 4 RAG, AI-025). Authenticated question over a catalog edition → -/// spoiler-safe retrieval () → grounded answer with citations. JSON -/// for now; SSE/token streaming lands in AI-028. The reader UI + citation deep-links are AI-026. +/// Public "Ask this book" (Phase 4 RAG, AI-025; conversational upgrade AI-028). Authenticated question +/// over a catalog edition → spoiler-safe retrieval () → grounded, +/// conversational answer with citations. Content-negotiated: Accept: text/event-stream streams +/// tokens (SSE — delta* then done); anything else returns the original JSON +/// (eval + mobile keep working). Multi-turn history is carried on the request. /// public static class AskEndpoints { - private const int PreviewChars = 200; - public static void MapAskEndpoints(this WebApplication app) { app.MapPost("/books/{editionId:guid}/ask", Ask) @@ -33,6 +34,7 @@ private static async Task Ask( AuthService authService, IAppDbContext db, IServiceProvider services, + ILogger logger, CancellationToken ct) { var userId = httpContext.GetUserId(authService); @@ -48,9 +50,11 @@ private static async Task Ask( // RagAskService construction pulls in the embedder, which throws without an OpenAI key — // surface that as a clean 503 rather than a generic 500. + RagContextService context; RagAskService ask; try { + context = services.GetRequiredService(); ask = services.GetRequiredService(); } catch (InvalidOperationException) @@ -58,17 +62,25 @@ private static async Task Ask( return Results.Problem("Ask is not configured (no OpenAI key).", statusCode: 503); } + var k = request.K is > 0 ? request.K.Value : IRagService.DefaultK; + var history = RagAskHistory.Clamp(request.History); + + if (AskSse.WantsSse(httpContext)) + { + // Build the spoiler-safe context first (the gate/retrieval), then stream the answer. + var ctx = await context.BuildAsync(userId.Value, siteId, editionId, request.Question, k, request.CurrentChapterId, ct); + var noteTexts = ctx.Notes.Select(n => n.Text).ToList(); + return AskSse.Stream( + httpContext, + ct2 => ask.StreamAsync(request.Question, ctx.Chunks, noteTexts, history, ctx.LastReadOrd, ct2), + logger); + } + try { - var k = request.K is > 0 ? request.K.Value : IRagService.DefaultK; var answer = await ask.AskAsync( - userId.Value, siteId, editionId, request.Question, k, request.CurrentChapterId, ct); - - var citations = answer.Citations.Select(c => new AskCitation( - c.Marker, c.Chunk.ChunkId, c.Chunk.ChapterId, c.Chunk.ChapterOrd, - c.Chunk.CharStart, c.Chunk.CharEnd, Preview(c.Chunk.Text))).ToList(); - - return Results.Ok(new AskResponse(answer.Answer, citations, answer.LastReadOrd, answer.Insufficient)); + userId.Value, siteId, editionId, request.Question, k, request.CurrentChapterId, history, ct); + return Results.Ok(AskSse.ToResponse(answer)); } catch (OperationCanceledException) when (ct.IsCancellationRequested) { @@ -79,7 +91,4 @@ private static async Task Ask( return Results.Problem("Ask is temporarily unavailable.", statusCode: 503); } } - - private static string Preview(string text) => - text.Length <= PreviewChars ? text : text[..PreviewChars] + "…"; } diff --git a/backend/src/Api/Endpoints/AskSse.cs b/backend/src/Api/Endpoints/AskSse.cs new file mode 100644 index 00000000..87190d3c --- /dev/null +++ b/backend/src/Api/Endpoints/AskSse.cs @@ -0,0 +1,144 @@ +using System.Net.ServerSentEvents; +using System.Runtime.CompilerServices; +using System.Text.Json; +using Application.Rag; +using Contracts.Books; + +namespace Api.Endpoints; + +/// +/// Shared "Ask this book" plumbing for the catalog + user-book endpoints (AI-028): content negotiation, +/// the JSON-response projection, and the SSE event stream. Mirrors : an +/// Accept: text/event-stream request gets delta events (one per token fragment) then a +/// terminal done carrying { citations, lastReadOrd, insufficient }; a failure mid-stream +/// emits a terminal error event. Buffering is disabled so Cloudflare/nginx don't hold the stream. +/// +public static class AskSse +{ + private const int PreviewChars = 200; + + // Match ASP.NET Core's default camelCase JSON (Results.Ok) so SSE + JSON citations are identical. + private static readonly JsonSerializerOptions DoneJson = new(JsonSerializerDefaults.Web); + + /// True when the caller asked for an SSE token stream. + public static bool WantsSse(HttpContext httpContext) => + httpContext.Request.Headers.Accept.Any(v => + v is not null && v.Contains("text/event-stream", StringComparison.OrdinalIgnoreCase)); + + /// Projects the service answer onto the public JSON contract (citations → previews). + public static AskResponse ToResponse(AskAnswer answer) + { + var citations = answer.Citations.Select(ToCitation).ToList(); + return new AskResponse(answer.Answer, citations, answer.LastReadOrd, answer.Insufficient); + } + + /// + /// SSE response over a service stream. Disables proxy buffering, then + /// relays text deltas as delta events and the terminal item as done (or error). + /// + public static IResult Stream( + HttpContext httpContext, + Func> source, + ILogger logger) + { + // Cloudflare/nginx must not buffer the stream. + httpContext.Response.Headers["X-Accel-Buffering"] = "no"; + httpContext.Response.Headers.CacheControl = "no-cache"; + + return TypedResults.ServerSentEvents( + StreamEventsAsync(source, ex => logger.LogError(ex, "Ask stream failed"), httpContext.RequestAborted)); + } + + /// + /// Maps service stream items to SSE items: a delta per text fragment, a terminal done + /// (serialized citations + gating), or a terminal error. Provider resolution can throw before + /// the first item — the service surfaces that as an . Pure over its + /// delegate so it's unit-tested directly. + /// + public static async IAsyncEnumerable> StreamEventsAsync( + Func> source, + Action? onException = null, + [EnumeratorCancellation] CancellationToken ct = default) + { + IAsyncEnumerable? stream = null; + string? error = null; + try + { + stream = source(ct); + } + catch (Exception ex) + { + onException?.Invoke(ex); + error = "Ask is temporarily unavailable."; + } + + if (error is not null) + { + yield return new SseItem(error, "error"); + yield break; + } + + await using var e = stream!.GetAsyncEnumerator(ct); + while (true) + { + AskStreamItem item; + string? moveError = null; + try + { + if (!await e.MoveNextAsync()) + break; + item = e.Current; + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; // client disconnected — nothing to emit + } + catch (Exception ex) + { + onException?.Invoke(ex); + moveError = "Ask is temporarily unavailable."; + item = new AskStreamItem(); + } + + if (moveError is not null) + { + yield return new SseItem(moveError, "error"); + yield break; + } + + if (item.Error is { } err) + { + yield return new SseItem(err, "error"); + yield break; + } + + if (item.TextDelta is { Length: > 0 } fragment) + { + yield return new SseItem(fragment, "delta"); + } + else if (item.Terminal is { } terminal) + { + yield return Done(terminal); + } + } + } + + private static SseItem Done(AskTerminal terminal) + { + var citations = terminal.Citations.Select(ToCitation).ToList(); + var payload = JsonSerializer.Serialize(new + { + citations, + lastReadOrd = terminal.LastReadOrd, + insufficient = terminal.Insufficient, + }, DoneJson); + return new SseItem(payload, "done"); + } + + private static AskCitation ToCitation(AskCitationSource c) => + new(c.Marker, c.Chunk.ChunkId, c.Chunk.ChapterId, c.Chunk.ChapterOrd, + c.Chunk.CharStart, c.Chunk.CharEnd, Preview(c.Chunk.Text)); + + private static string Preview(string text) => + text.Length <= PreviewChars ? text : text[..PreviewChars] + "…"; +} diff --git a/backend/src/Api/Endpoints/UserBookAskEndpoints.cs b/backend/src/Api/Endpoints/UserBookAskEndpoints.cs index 1c5c479c..45ff4ad7 100644 --- a/backend/src/Api/Endpoints/UserBookAskEndpoints.cs +++ b/backend/src/Api/Endpoints/UserBookAskEndpoints.cs @@ -1,4 +1,5 @@ using Api.Extensions; +using Application.Ai; using Application.Auth; using Application.Rag; using Contracts.Books; @@ -8,18 +9,16 @@ namespace Api.Endpoints; /// -/// "Ask this book" over a USER-uploaded book (Phase 2). Owner-scoped: the authenticated user asks a -/// question over their OWN uploaded document → per-user isolated retrieval -/// () → grounded answer with citations -/// (, shared with the catalog path). No spoiler gate — -/// it's the user's own book — so the full book is in scope. Returns the same -/// contract as the catalog endpoint. CurrentChapterId on the request -/// is accepted but ignored (no gate). 404 when the book isn't this user's. +/// "Ask this book" over a USER-uploaded book (Phase 2; conversational upgrade AI-028). Owner-scoped: +/// the authenticated user asks a question over their OWN uploaded document → per-user isolated retrieval +/// () → grounded, conversational answer with citations +/// (, shared with the catalog path). No spoiler gate — it's the user's own +/// book — so the full book is in scope. Content-negotiated SSE/JSON like the catalog endpoint; +/// multi-turn history on the request. CurrentChapterId is accepted but ignored (no gate). 404 +/// when the book isn't this user's. /// public static class UserBookAskEndpoints { - private const int PreviewChars = 200; - public static void MapUserBookAskEndpoints(this WebApplication app) { app.MapPost("/me/books/{id:guid}/ask", Ask) @@ -33,6 +32,7 @@ private static async Task Ask( HttpContext httpContext, AuthService authService, IServiceProvider services, + ILogger logger, CancellationToken ct) { var userId = httpContext.GetUserId(authService); @@ -54,24 +54,28 @@ private static async Task Ask( return Results.Problem("Ask is not configured (no OpenAI key).", statusCode: 503); } + var k = request.K is > 0 ? request.K.Value : IRagService.DefaultK; + var history = RagAskHistory.Clamp(request.History); + try { - var k = request.K is > 0 ? request.K.Value : IRagService.DefaultK; - // Ownership-scoped context build. Null => not this user's book (or taken down) → 404. var ctx = await context.BuildAsync(userId.Value, id, request.Question, k, ct); if (ctx is null) return Results.NotFound("Book not found"); // Full-book retrieval (no gate), no private-notes corpus. lastReadOrd is 0 (unused for - // user books). AskFromChunksAsync handles the empty-chunks case (book not indexed yet) by - // returning an "insufficient" answer without an LLM call. - var answer = await ask.AskFromChunksAsync(request.Question, ctx.Chunks, [], lastReadOrd: 0, ct); + // user books). The service handles the empty-chunks case (book not indexed yet) without an + // LLM call (JSON: "insufficient" answer; SSE: friendly delta + insufficient terminal). + if (AskSse.WantsSse(httpContext)) + { + return AskSse.Stream( + httpContext, + ct2 => ask.StreamAsync(request.Question, ctx.Chunks, [], history, lastReadOrd: 0, ct2), + logger); + } - var citations = answer.Citations.Select(c => new AskCitation( - c.Marker, c.Chunk.ChunkId, c.Chunk.ChapterId, c.Chunk.ChapterOrd, - c.Chunk.CharStart, c.Chunk.CharEnd, Preview(c.Chunk.Text))).ToList(); - - return Results.Ok(new AskResponse(answer.Answer, citations, answer.LastReadOrd, answer.Insufficient)); + var answer = await ask.AskFromChunksAsync(request.Question, ctx.Chunks, [], history, lastReadOrd: 0, ct); + return Results.Ok(AskSse.ToResponse(answer)); } catch (OperationCanceledException) when (ct.IsCancellationRequested) { @@ -82,7 +86,4 @@ private static async Task Ask( return Results.Problem("Ask is temporarily unavailable.", statusCode: 503); } } - - private static string Preview(string text) => - text.Length <= PreviewChars ? text : text[..PreviewChars] + "…"; } diff --git a/backend/src/Api/appsettings.json b/backend/src/Api/appsettings.json index 3f4025a6..354ba1e4 100644 --- a/backend/src/Api/appsettings.json +++ b/backend/src/Api/appsettings.json @@ -26,6 +26,9 @@ "Explain": { "Model": "gpt-4.1-mini" }, + "RagAsk": { + "Model": "gpt-4.1-mini" + }, "Embedding": { "Model": "text-embedding-3-small" }, @@ -97,7 +100,7 @@ "bookmeta": "ollama", "tagsuggestion": "ollama", "podcast.script": "openai", - "rag.ask": "openai" + "rag.ask": "openai-rag" }, "Tracing": { "Sampling": { diff --git a/backend/src/Application/Ai/RagAskHistory.cs b/backend/src/Application/Ai/RagAskHistory.cs new file mode 100644 index 00000000..78ac3288 --- /dev/null +++ b/backend/src/Application/Ai/RagAskHistory.cs @@ -0,0 +1,74 @@ +using Contracts.Books; +using TextStack.Ai.Core; + +namespace Application.Ai; + +/// +/// Multi-turn "Ask this book" history handling (AI-028). The server is stateless — the client sends +/// the prior conversation on each request — so this clamps the untrusted history (last N turns, each +/// content-capped) and assembles the final sequence: the grounded excerpts/ +/// context block, then the clamped prior turns, then the new question last. Pure so it's unit-tested. +/// +public static class RagAskHistory +{ + /// How many trailing turns of history survive the clamp (defense against unbounded prompts). + public const int MaxTurns = 6; + + /// Per-turn content cap in chars (a single pasted turn can't blow the prompt budget). + public const int MaxTurnChars = 4000; + + private const string UserRole = "user"; + private const string AssistantRole = "assistant"; + + /// + /// Defensive clamp over untrusted client history: drops blank/unknown-role turns, keeps only the + /// LAST turns, and truncates each turn's content to . + /// Roles are normalized to "user"/"assistant" (anything else is dropped). + /// + public static IReadOnlyList Clamp(IReadOnlyList? history) + { + if (history is null || history.Count == 0) + return []; + + var cleaned = new List(history.Count); + foreach (var turn in history) + { + if (turn is null || string.IsNullOrWhiteSpace(turn.Content)) + continue; + + var role = turn.Role?.Trim().ToLowerInvariant(); + if (role != UserRole && role != AssistantRole) + continue; + + var content = turn.Content.Length > MaxTurnChars ? turn.Content[..MaxTurnChars] : turn.Content; + cleaned.Add(new AskTurnDto(role, content)); + } + + if (cleaned.Count <= MaxTurns) + return cleaned; + + return cleaned.GetRange(cleaned.Count - MaxTurns, MaxTurns); + } + + /// + /// Assembles the chat messages for the gateway: a "user" message carrying the numbered excerpts + + /// the reader's notes (the grounding context block), then the clamped prior turns in order, then + /// the new question as the final "user" message. The system (companion) prompt is supplied + /// separately on the . is assumed already + /// clamped by . + /// + public static IReadOnlyList BuildMessages( + string contextBlock, IReadOnlyList history, string question) + { + var messages = new List(history.Count + 2) + { + new(UserRole, contextBlock), + }; + + foreach (var turn in history) + messages.Add(new LlmMessage(turn.Role, turn.Content)); + + messages.Add(new LlmMessage(UserRole, question)); + return messages; + } +} diff --git a/backend/src/Application/Ai/RagAskPrompt.cs b/backend/src/Application/Ai/RagAskPrompt.cs index 172a34ed..09188891 100644 --- a/backend/src/Application/Ai/RagAskPrompt.cs +++ b/backend/src/Application/Ai/RagAskPrompt.cs @@ -12,32 +12,46 @@ namespace Application.Ai; public static class RagAskPrompt { public static string BuildSystemPrompt() => - "You answer a reader's question about a book using ONLY the numbered excerpts provided. " + - "Write 2-4 sentences. Cite every claim with [n] referring to the excerpt numbers you used. " + - "If the excerpts do not contain the answer, say so plainly — do NOT use outside knowledge. " + - "No preface, no markdown."; + "You are a warm, thoughtful reading companion chatting with a reader about a book they are " + + "reading. Be friendly and conversational in tone. " + + "GROUNDING (strict, overrides everything below): the numbered excerpts below are your ONLY source " + + "of truth about this book. Every substantive statement you make about the book — its plot, " + + "characters, events, the author's words, AND any theme, symbolism, meaning, motive, or " + + "interpretation you offer — MUST be supported by those excerpts, and you MUST cite each such claim " + + "with [n] referring to the excerpt number(s) you used. Never use outside knowledge (the wider " + + "story, other works, the author's biography, plot you recall) to assert, explain, or interpret " + + "anything about this book, and never invent details that aren't in the excerpts. You may help the " + + "reader reflect, but only by reasoning from what the cited excerpts actually say — if a thought " + + "isn't supported there, don't present it as fact about the book. " + + "The ONLY messages you may answer without citing excerpts are a greeting or a question about what " + + "you can do (e.g. \"hi\", \"what can you do\"): respond warmly and invite a question about the book " + + "— do NOT cite anything and do NOT say the excerpts lack the answer. For anything else, if the " + + "excerpts don't cover it, say so gracefully (e.g. \"I don't see that in what you've read so far\") " + + "rather than guessing or filling the gap from memory. " + + "Keep replies short (a few sentences). No markdown, no preface."; /// - /// Numbered excerpts (1-based, matching the citation markers) + the reader's own notes, then the - /// question. order defines the [n] numbering. + /// The grounding context block: numbered excerpts (1-based, matching the citation markers) + the + /// reader's own notes. order defines the [n] numbering. The question is + /// NOT included — in the multi-turn assembly it's the final user message (see + /// ). /// - public static string BuildUserPrompt( - string question, IReadOnlyList chunks, IReadOnlyList notes) + public static string BuildContextBlock(IReadOnlyList chunks, IReadOnlyList notes) { var sb = new StringBuilder(); - sb.Append("Excerpts:\n"); + sb.Append("Numbered excerpts from the part of the book the reader has reached " + + "(cite these as [n] for any book fact):\n"); for (var i = 0; i < chunks.Count; i++) sb.Append('[').Append(i + 1).Append("] (ch.").Append(chunks[i].ChapterOrd).Append(") ") .Append(chunks[i].Text).Append('\n'); if (notes.Count > 0) { - sb.Append("\nYour notes:\n"); + sb.Append("\nThe reader's own notes:\n"); foreach (var note in notes) sb.Append("- ").Append(note).Append('\n'); } - sb.Append("\nQuestion: ").Append(question); return sb.ToString(); } diff --git a/backend/src/Application/DependencyInjection.cs b/backend/src/Application/DependencyInjection.cs index b55b6680..cea7d10d 100644 --- a/backend/src/Application/DependencyInjection.cs +++ b/backend/src/Application/DependencyInjection.cs @@ -106,8 +106,17 @@ public static IServiceCollection AddApplication(this IServiceCollection services sp.GetRequiredService>(), sp.GetRequiredService()["OpenAI:Explain:Model"] ?? "gpt-4.1-mini")); + // "Ask this book" (rag.ask) runs a STRONGER conversational model than the nano default + // (OpenAI:RagAsk:Model, default gpt-4.1-mini) for the warm reading-companion experience. + // Routed per-feature (Ai:Routes:rag.ask → openai-rag) so translate / podcast stay on nano. + services.AddKeyedSingleton("openai-rag-raw", (sp, key) => + new global::TextStack.Ai.Llm.OpenAiLlmClient( + sp.GetRequiredService(), + sp.GetRequiredService>(), + sp.GetRequiredService()["OpenAI:RagAsk:Model"] ?? "gpt-4.1-mini")); + // Decorated providers (keyed): TracingDecorator wraps each raw provider. - foreach (var providerKey in new[] { "openai", "ollama", "openai-judge", "openai-explain" }) + foreach (var providerKey in new[] { "openai", "ollama", "openai-judge", "openai-explain", "openai-rag" }) { services.AddKeyedSingleton(providerKey, (sp, key) => new global::TextStack.Ai.Llm.TracingDecorator( diff --git a/backend/src/Application/Rag/RagAskService.cs b/backend/src/Application/Rag/RagAskService.cs index 9fe63d82..e91c4008 100644 --- a/backend/src/Application/Rag/RagAskService.cs +++ b/backend/src/Application/Rag/RagAskService.cs @@ -1,4 +1,7 @@ +using System.Runtime.CompilerServices; +using System.Text; using Application.Ai; +using Contracts.Books; using TextStack.Ai.Core; using TextStack.Ai.Rag; @@ -17,6 +20,23 @@ public record AskAnswer( int LastReadOrd, bool Insufficient); +/// +/// A streaming chunk of an "Ask this book" answer (AI-028). Exactly one field is set per item: a +/// partial text fragment as it arrives, or the terminal +/// (citations + lastReadOrd + insufficient) that closes the stream. An item is the +/// terminal item on failure. +/// +public record AskStreamItem( + string? TextDelta = null, + AskTerminal? Terminal = null, + string? Error = null); + +/// The terminal payload of a streamed answer: resolved citations + gating metadata. +public record AskTerminal( + IReadOnlyList Citations, + int LastReadOrd, + bool Insufficient); + /// /// "Ask this book" (Phase 4 RAG, AI-025). Interface so the AI-027 citation eval can drive generation /// against pre-retrieved chunks (no reading user) and tests can fake it. @@ -27,61 +47,159 @@ public interface IRagAskService /// Spoiler-safe ask for a real reader: gates context by their reading progress. Pass /// (the chapter open in the reader) so it counts as read even /// before the debounced progress-save persists — resolved server-side, ignored if not this edition. + /// carries prior conversation turns (multi-turn memory); retrieval still + /// runs on only. /// Task AskAsync( Guid userId, Guid siteId, Guid editionId, string question, int k, - Guid? currentChapterId, CancellationToken ct); + Guid? currentChapterId, IReadOnlyList history, CancellationToken ct); /// /// Generate a grounded, cited answer from an already-retrieved chunk set — bypasses user-progress /// gating (the caller supplies the chunks). Used by the AI-027 citation eval over the golden set. + /// is the prior conversation (pass [] for a single-shot grounding eval). /// Task AskFromChunksAsync( string question, IReadOnlyList chunks, IReadOnlyList noteTexts, - int lastReadOrd, CancellationToken ct); + IReadOnlyList history, int lastReadOrd, CancellationToken ct); } /// /// "Ask this book" (Phase 4 RAG, AI-025): retrieves spoiler-safe context via -/// , generates a grounded 2-4 sentence answer with citations via the -/// LLM gateway (FeatureTag rag.ask), and resolves the cited excerpts. Reused by the AI-027 eval. +/// , generates a grounded, conversational answer with citations via the +/// LLM gateway (FeatureTag rag.ask), and resolves the cited excerpts. Multi-turn (AI-028): +/// prior conversation turns are clamped + threaded into the chat. Streaming via . +/// Reused by the AI-027 eval. /// public sealed class RagAskService(RagContextService context, ILlmService llm) : IRagAskService { public const string FeatureTag = "rag.ask"; - private const int MaxOutputTokens = 320; + + // Raised for conversational length (AI-028: companion tone, multi-sentence replies). + private const int MaxOutputTokens = 400; private const string InsufficientMessage = - "You haven't read enough of this book yet for me to answer from it. Keep reading and ask again."; + "I don't see enough of this book in what you've read so far to answer that yet. " + + "Keep reading and ask me again — I'll be here."; public async Task AskAsync( Guid userId, Guid siteId, Guid editionId, string question, int k, - Guid? currentChapterId, CancellationToken ct) + Guid? currentChapterId, IReadOnlyList history, CancellationToken ct) { var ctx = await context.BuildAsync(userId, siteId, editionId, question, k, currentChapterId, ct); var noteTexts = ctx.Notes.Select(n => n.Text).ToList(); - return await AskFromChunksAsync(question, ctx.Chunks, noteTexts, ctx.LastReadOrd, ct); + return await AskFromChunksAsync(question, ctx.Chunks, noteTexts, history, ctx.LastReadOrd, ct); } public async Task AskFromChunksAsync( string question, IReadOnlyList chunks, IReadOnlyList noteTexts, - int lastReadOrd, CancellationToken ct) + IReadOnlyList history, int lastReadOrd, CancellationToken ct) { // No readable context → answer plainly without spending an LLM call. if (chunks.Count == 0) return new AskAnswer(InsufficientMessage, [], lastReadOrd, Insufficient: true); - var request = new LlmRequest( + var request = BuildRequest(question, chunks, noteTexts, history); + var response = await llm.CompleteAsync(request, ct); + + var citations = ResolveCitations(response.Text, chunks); + return new AskAnswer(response.Text, citations, lastReadOrd, Insufficient: false); + } + + /// + /// Streaming "Ask this book" (AI-028): same context build + gate as , + /// then streams the answer as text deltas, accumulating server-side; on + /// completion runs the EXISTING [n] citation parse over the full text and emits a single + /// terminal item with resolved citations. Empty chunks → one friendly delta + an insufficient + /// terminal (no model call). A mid-stream failure emits an terminal. + /// + public async IAsyncEnumerable StreamAsync( + string question, IReadOnlyList chunks, IReadOnlyList noteTexts, + IReadOnlyList history, int lastReadOrd, + [EnumeratorCancellation] CancellationToken ct = default) + { + // No readable context → friendly message + insufficient terminal, no LLM call. + if (chunks.Count == 0) + { + yield return new AskStreamItem(TextDelta: InsufficientMessage); + yield return new AskStreamItem(Terminal: new AskTerminal([], lastReadOrd, Insufficient: true)); + yield break; + } + + var request = BuildRequest(question, chunks, noteTexts, history); + + var text = new StringBuilder(); + string? error = null; + + // Provider resolution happens inside StreamAsync (gateway → keyed provider ctor) and can throw + // on a keyless host — surface that as an error terminal, not a broken stream. + IAsyncEnumerable? stream = null; + try + { + stream = llm.StreamAsync(request, ct); + } + catch (Exception) + { + error = "Ask is temporarily unavailable."; + } + + if (error is null) + { + await using var e = stream!.GetAsyncEnumerator(ct); + while (true) + { + LlmDelta delta; + try + { + if (!await e.MoveNextAsync()) + break; + delta = e.Current; + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; // client disconnected — nothing to emit + } + catch (Exception) + { + error = "Ask is temporarily unavailable."; + break; + } + + if (delta.TextDelta is { Length: > 0 } fragment) + { + text.Append(fragment); + yield return new AskStreamItem(TextDelta: fragment); + } + } + } + + if (error is not null) + { + yield return new AskStreamItem(Error: error); + yield break; + } + + var citations = ResolveCitations(text.ToString(), chunks); + yield return new AskStreamItem(Terminal: new AskTerminal(citations, lastReadOrd, Insufficient: false)); + } + + private static LlmRequest BuildRequest( + string question, IReadOnlyList chunks, IReadOnlyList noteTexts, + IReadOnlyList history) + { + var contextBlock = RagAskPrompt.BuildContextBlock(chunks, noteTexts); + var messages = RagAskHistory.BuildMessages(contextBlock, history, question); + return new LlmRequest( SystemPrompt: RagAskPrompt.BuildSystemPrompt(), - Messages: [new LlmMessage("user", RagAskPrompt.BuildUserPrompt(question, chunks, noteTexts))], + Messages: messages, MaxOutputTokens: MaxOutputTokens, FeatureTag: FeatureTag); + } - var response = await llm.CompleteAsync(request, ct); - - var markers = RagAskPrompt.ParseCitations(response.Text, chunks.Count); - var citations = markers.Select(n => new AskCitationSource(n, chunks[n - 1])).ToList(); - - return new AskAnswer(response.Text, citations, lastReadOrd, Insufficient: false); + private static IReadOnlyList ResolveCitations( + string answer, IReadOnlyList chunks) + { + var markers = RagAskPrompt.ParseCitations(answer, chunks.Count); + return markers.Select(n => new AskCitationSource(n, chunks[n - 1])).ToList(); } } diff --git a/backend/src/Contracts/Books/AskDtos.cs b/backend/src/Contracts/Books/AskDtos.cs index 0d4e0c9c..b7850837 100644 --- a/backend/src/Contracts/Books/AskDtos.cs +++ b/backend/src/Contracts/Books/AskDtos.cs @@ -1,12 +1,25 @@ namespace Contracts.Books; +/// +/// One prior turn in a multi-turn "Ask this book" conversation. is "user" or +/// "assistant". History is sent by the client (the server is stateless); it's clamped server-side +/// (last few turns, each content-capped) before reaching the LLM. +/// +public record AskTurnDto(string Role, string Content); + /// /// "Ask this book" request (AI-025). overrides the default retrieval count. /// is the chapter the reader has open right now; the spoiler gate /// counts it as read (max with persisted progress) so "ask about what I'm reading" works before the /// debounced progress-save fires. Resolved server-side and ignored if it isn't part of this edition. +/// carries the prior conversation turns for multi-turn memory (AI-028); +/// retrieval still runs on only. /// -public record AskRequest(string Question, int? K = null, Guid? CurrentChapterId = null); +public record AskRequest( + string Question, + int? K = null, + Guid? CurrentChapterId = null, + IReadOnlyList? History = null); /// /// A cited source for an answer. is the [n] number in the answer text; diff --git a/backend/src/Infrastructure/Persistence/ModelRegistrySeeder.cs b/backend/src/Infrastructure/Persistence/ModelRegistrySeeder.cs index fd948ed4..18dfefde 100644 --- a/backend/src/Infrastructure/Persistence/ModelRegistrySeeder.cs +++ b/backend/src/Infrastructure/Persistence/ModelRegistrySeeder.cs @@ -23,7 +23,7 @@ private static readonly (string Feature, string Provider, string Model)[] Primar ("bookmeta", "ollama", "gemma4:e2b"), ("tagsuggestion", "ollama", "gemma4:e2b"), ("podcast.script", "openai", "gpt-4.1-nano"), - ("rag.ask", "openai", "gpt-4.1-nano"), + ("rag.ask", "openai-rag", "gpt-4.1-mini"), ]; public static async Task SeedAsync(AppDbContext db, CancellationToken ct = default) diff --git a/packages/shared/src/types/api.ts b/packages/shared/src/types/api.ts index 6e8e932a..6e47e33c 100644 --- a/packages/shared/src/types/api.ts +++ b/packages/shared/src/types/api.ts @@ -441,3 +441,12 @@ export interface AskResponse { lastReadOrd: number insufficient: boolean } + +/** + * One prior turn of the conversation, sent back to the server for multi-turn "Ask this book" + * (AI-026e). The client bounds the history (last 6 turns) before sending. + */ +export interface AskTurnDto { + role: 'user' | 'assistant' + content: string +} diff --git a/tests/TextStack.AiEvals/RagEvalRunnerTests.cs b/tests/TextStack.AiEvals/RagEvalRunnerTests.cs index e27dabde..16a1bef4 100644 --- a/tests/TextStack.AiEvals/RagEvalRunnerTests.cs +++ b/tests/TextStack.AiEvals/RagEvalRunnerTests.cs @@ -64,11 +64,11 @@ public Task> RetrieveUserBookAsync( /// Echoes back a one-citation answer pointing at the first supplied chunk. private sealed class FakeAsk : IRagAskService { - public Task AskAsync(Guid u, Guid s, Guid e, string q, int k, Guid? currentChapterId, CancellationToken ct) => + public Task AskAsync(Guid u, Guid s, Guid e, string q, int k, Guid? currentChapterId, IReadOnlyList history, CancellationToken ct) => throw new NotSupportedException(); public Task AskFromChunksAsync( - string question, IReadOnlyList chunks, IReadOnlyList notes, int lastReadOrd, CancellationToken ct) + string question, IReadOnlyList chunks, IReadOnlyList notes, IReadOnlyList history, int lastReadOrd, CancellationToken ct) { var citations = chunks.Count == 0 ? Array.Empty() diff --git a/tests/TextStack.IntegrationTests/AskEndpointTests.cs b/tests/TextStack.IntegrationTests/AskEndpointTests.cs index a7928400..9499949f 100644 --- a/tests/TextStack.IntegrationTests/AskEndpointTests.cs +++ b/tests/TextStack.IntegrationTests/AskEndpointTests.cs @@ -1,4 +1,5 @@ using System.Net; +using System.Net.Http.Headers; using System.Net.Http.Json; namespace TextStack.IntegrationTests; @@ -46,4 +47,57 @@ public async Task Ask_WithAuth_Returns200OrSkips() "endpoint not reachable (no edition / key / corpus)"); Assert.Equal(HttpStatusCode.OK, response.StatusCode); } + + [Fact] + public async Task Ask_WithHistory_JsonPathReturnsAskResponse() + { + // AI-028: the JSON contract is unchanged when multi-turn history is supplied. + Assert.SkipUnless(_fixture.IsAuthenticated, "auth unavailable"); + + var request = _fixture.CreateRequest(HttpMethod.Post, $"/books/{SomeEdition}/ask"); + request.Content = JsonContent.Create(new + { + question = "and what happens next?", + history = new[] + { + new { role = "user", content = "who is the main character?" }, + new { role = "assistant", content = "The protagonist is introduced early [1]." }, + }, + }); + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen( + IntegrationSkip.Unavailable(response) || response.StatusCode == HttpStatusCode.ServiceUnavailable, + "endpoint not reachable (no edition / key / corpus)"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("application/json", response.Content.Headers.ContentType?.MediaType); + } + + [Fact] + public async Task Ask_AcceptEventStream_ReturnsSseWithDeltas() + { + // AI-028: content-negotiated SSE — Accept: text/event-stream → text/event-stream body with + // at least one `delta` event (and a terminal `done`). Skips when key/edition/corpus missing. + Assert.SkipUnless(_fixture.IsAuthenticated, "auth unavailable"); + + var request = _fixture.CreateRequest(HttpMethod.Post, $"/books/{SomeEdition}/ask"); + request.Content = JsonContent.Create(new { question = "what happens at the start?" }); + request.Headers.Accept.Clear(); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/event-stream")); + + var response = await _fixture.Client.SendAsync( + request, HttpCompletionOption.ResponseHeadersRead, TestContext.Current.CancellationToken); + + Assert.SkipWhen( + IntegrationSkip.Unavailable(response) || response.StatusCode == HttpStatusCode.ServiceUnavailable, + "endpoint not reachable (no edition / key / corpus)"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/event-stream", response.Content.Headers.ContentType?.MediaType); + + var body = await response.Content.ReadAsStringAsync(TestContext.Current.CancellationToken); + // The spoiler gate / model may yield the friendly "insufficient" message, but it still streams + // as a `delta` then `done` — assert the SSE framing, not the content. + Assert.Contains("event: delta", body); + Assert.Contains("event: done", body); + } } diff --git a/tests/TextStack.UnitTests/AskSseTests.cs b/tests/TextStack.UnitTests/AskSseTests.cs new file mode 100644 index 00000000..b12da794 --- /dev/null +++ b/tests/TextStack.UnitTests/AskSseTests.cs @@ -0,0 +1,103 @@ +using System.Net.ServerSentEvents; +using Api.Endpoints; +using Application.Rag; +using TextStack.Ai.Rag; + +namespace TextStack.UnitTests; + +/// +/// AI-028 — the Ask SSE event stream (), driven through a fake +/// source: text deltas then a terminal done-with-citations; the empty-chunks +/// case (friendly delta + insufficient done); a service-level error item; and a mid-stream throw → error. +/// +public class AskSseTests +{ + private static RetrievedChunk Chunk() => + new(Guid.NewGuid(), Guid.NewGuid(), 2, 0, "excerpt text", 0, 12, 0.9); + + private static async IAsyncEnumerable Items(params AskStreamItem[] items) + { + foreach (var i in items) + { + await Task.Yield(); + yield return i; + } + } + + private static async IAsyncEnumerable Boom() + { + await Task.Yield(); + yield return new AskStreamItem(TextDelta: "par"); + throw new InvalidOperationException("provider died"); + } + + private static async Task>> Collect( + Func> source) + { + var list = new List>(); + await foreach (var item in AskSse.StreamEventsAsync(source, onException: null, ct: TestContext.Current.CancellationToken)) + list.Add(item); + return list; + } + + [Fact] + public async Task Stream_DeltasThenTerminalDoneWithCitations() + { + var chunk = Chunk(); + var events = await Collect(_ => Items( + new AskStreamItem(TextDelta: "Hel"), + new AskStreamItem(TextDelta: "lo [1]"), + new AskStreamItem(Terminal: new AskTerminal([new AskCitationSource(1, chunk)], LastReadOrd: 4, Insufficient: false)))); + + Assert.Equal(["delta", "delta", "done"], events.Select(e => e.EventType)); + Assert.Equal("Hel", events[0].Data); + Assert.Equal("lo [1]", events[1].Data); + + var done = events[^1].Data; + Assert.Contains("\"insufficient\":false", done); + Assert.Contains("\"lastReadOrd\":4", done); + Assert.Contains("\"marker\":1", done.Replace(" ", "")); + Assert.Contains(chunk.ChunkId.ToString(), done); + } + + [Fact] + public async Task Stream_EmptyChunks_FriendlyDeltaThenInsufficientDone() + { + // Mirrors RagAskService.StreamAsync's no-chunks path: one delta + insufficient terminal, no citations. + var events = await Collect(_ => Items( + new AskStreamItem(TextDelta: "I don't see enough of this book yet."), + new AskStreamItem(Terminal: new AskTerminal([], LastReadOrd: 0, Insufficient: true)))); + + Assert.Equal(["delta", "done"], events.Select(e => e.EventType)); + Assert.Contains("\"insufficient\":true", events[^1].Data); + Assert.Contains("\"citations\":[]", events[^1].Data.Replace(" ", "")); + } + + [Fact] + public async Task Stream_ServiceErrorItem_EmitsTerminalError() + { + var events = await Collect(_ => Items( + new AskStreamItem(TextDelta: "partial"), + new AskStreamItem(Error: "Ask is temporarily unavailable."))); + + Assert.Equal(["delta", "error"], events.Select(e => e.EventType)); + Assert.Equal("Ask is temporarily unavailable.", events[^1].Data); + } + + [Fact] + public async Task Stream_MidStreamThrow_EmitsErrorAsTerminal() + { + var events = await Collect(_ => Boom()); + + Assert.Equal(["delta", "error"], events.Select(e => e.EventType)); + } + + [Fact] + public async Task Stream_SourceFactoryThrows_EmitsError() + { + var events = await Collect(_ => throw new InvalidOperationException("no key")); + + var item = Assert.Single(events); + Assert.Equal("error", item.EventType); + } +} diff --git a/tests/TextStack.UnitTests/RagAskHistoryTests.cs b/tests/TextStack.UnitTests/RagAskHistoryTests.cs new file mode 100644 index 00000000..ab0b0c82 --- /dev/null +++ b/tests/TextStack.UnitTests/RagAskHistoryTests.cs @@ -0,0 +1,127 @@ +using Application.Ai; +using Contracts.Books; + +namespace TextStack.UnitTests; + +/// +/// AI-028 — multi-turn "Ask this book" history: the defensive clamp (last-6 + per-turn content cap + +/// role normalization) and the chat-message assembly order (system supplied separately → context → +/// prior turns → new question last). +/// +public class RagAskHistoryTests +{ + private static AskTurnDto User(string c) => new("user", c); + private static AskTurnDto Assistant(string c) => new("assistant", c); + + [Fact] + public void Clamp_Null_ReturnsEmpty() => Assert.Empty(RagAskHistory.Clamp(null)); + + [Fact] + public void Clamp_KeepsOnlyLastSixTurns() + { + var history = Enumerable.Range(0, 10).Select(i => User($"turn {i}")).ToList(); + + var clamped = RagAskHistory.Clamp(history); + + Assert.Equal(RagAskHistory.MaxTurns, clamped.Count); + // The LAST six survive (turns 4..9), most recent context retained. + Assert.Equal("turn 4", clamped[0].Content); + Assert.Equal("turn 9", clamped[^1].Content); + } + + [Fact] + public void Clamp_TruncatesPerTurnContent() + { + var huge = new string('x', RagAskHistory.MaxTurnChars + 500); + + var clamped = RagAskHistory.Clamp([User(huge)]); + + Assert.Equal(RagAskHistory.MaxTurnChars, Assert.Single(clamped).Content.Length); + } + + [Fact] + public void Clamp_DropsBlankAndUnknownRoles_NormalizesRole() + { + var clamped = RagAskHistory.Clamp( + [ + new AskTurnDto("USER", "keep me"), + new AskTurnDto("system", "drop: bad role"), + new AskTurnDto("assistant", " "), + new AskTurnDto("Assistant", "keep assistant"), + ]); + + Assert.Equal(2, clamped.Count); + Assert.Equal("user", clamped[0].Role); + Assert.Equal("keep me", clamped[0].Content); + Assert.Equal("assistant", clamped[1].Role); + Assert.Equal("keep assistant", clamped[1].Content); + } + + [Fact] + public void BuildMessages_Order_ContextThenHistoryThenQuestion() + { + var messages = RagAskHistory.BuildMessages( + "CONTEXT BLOCK", + [User("earlier question"), Assistant("earlier answer [1]")], + "the new question"); + + // [0] = context (user role), [1..n] = prior turns in order, [last] = new question (user role). + Assert.Equal(4, messages.Count); + + Assert.Equal("user", messages[0].Role); + Assert.Equal("CONTEXT BLOCK", messages[0].Content); + + Assert.Equal("user", messages[1].Role); + Assert.Equal("earlier question", messages[1].Content); + + Assert.Equal("assistant", messages[2].Role); + Assert.Equal("earlier answer [1]", messages[2].Content); + + Assert.Equal("user", messages[^1].Role); + Assert.Equal("the new question", messages[^1].Content); + } + + [Fact] + public void BuildMessages_EmptyHistory_ContextThenQuestionOnly() + { + var messages = RagAskHistory.BuildMessages("CTX", [], "Q?"); + + Assert.Equal(2, messages.Count); + Assert.Equal("CTX", messages[0].Content); + Assert.Equal("Q?", messages[1].Content); + } + + [Fact] + public void Clamp_AbusiveFlood_BoundedToLastSix_EachContentCapped() + { + // Malicious client: 1000 turns, each 10MB. Server-side clamp must bound both axes regardless of + // what the client claims to send (cost / context-window defense — NOT just a UI slice). + var huge = new string('z', 10 * 1024 * 1024); + var flood = Enumerable.Range(0, 1000) + .Select(i => i % 2 == 0 ? User(huge) : Assistant(huge)) + .ToList(); + + var clamped = RagAskHistory.Clamp(flood); + + Assert.Equal(RagAskHistory.MaxTurns, clamped.Count); + Assert.All(clamped, t => Assert.Equal(RagAskHistory.MaxTurnChars, t.Content.Length)); + } + + [Fact] + public void BuildMessages_MalformedHistory_TwoUsersInARow_DoesNotThrow_PreservesOrder() + { + // A crafted history that doesn't alternate (two user turns, assistant-first elsewhere) must not + // break assembly — we pass it through verbatim after the context block; the LLM tolerates it. + var messages = RagAskHistory.BuildMessages( + "CTX", + [User("first"), User("second-in-a-row"), Assistant("reply")], + "now what?"); + + Assert.Equal(5, messages.Count); + Assert.Equal("CTX", messages[0].Content); + Assert.Equal("first", messages[1].Content); + Assert.Equal("second-in-a-row", messages[2].Content); + Assert.Equal("reply", messages[3].Content); + Assert.Equal("now what?", messages[^1].Content); + } +} diff --git a/tests/TextStack.UnitTests/RagAskPromptTests.cs b/tests/TextStack.UnitTests/RagAskPromptTests.cs index c03e7b34..b7f64f65 100644 --- a/tests/TextStack.UnitTests/RagAskPromptTests.cs +++ b/tests/TextStack.UnitTests/RagAskPromptTests.cs @@ -9,28 +9,82 @@ private static RetrievedChunk Chunk(int chapterOrd, string text) => new(Guid.NewGuid(), Guid.NewGuid(), chapterOrd, 0, text, 0, text.Length, 0.9); [Fact] - public void BuildUserPrompt_NumbersExcerptsAndAppendsQuestion() + public void BuildContextBlock_NumbersExcerpts_NoQuestionEmbedded() { - var prompt = RagAskPrompt.BuildUserPrompt( - "How does it work?", + // The question is the final user message in the multi-turn assembly — it must NOT be baked + // into the context block. + var block = RagAskPrompt.BuildContextBlock( [Chunk(3, "first excerpt"), Chunk(5, "second excerpt")], []); - Assert.Contains("[1] (ch.3) first excerpt", prompt); - Assert.Contains("[2] (ch.5) second excerpt", prompt); - Assert.Contains("Question: How does it work?", prompt); - Assert.DoesNotContain("Your notes:", prompt); + Assert.Contains("[1] (ch.3) first excerpt", block); + Assert.Contains("[2] (ch.5) second excerpt", block); + Assert.DoesNotContain("Question:", block); + Assert.DoesNotContain("reader's own notes", block); } [Fact] - public void BuildUserPrompt_WithNotes_IncludesNotesBlock() + public void BuildContextBlock_WithNotes_IncludesNotesBlock() { - var prompt = RagAskPrompt.BuildUserPrompt( - "Q?", [Chunk(1, "x")], ["my highlight", "another note"]); + var block = RagAskPrompt.BuildContextBlock([Chunk(1, "x")], ["my highlight", "another note"]); - Assert.Contains("Your notes:", prompt); - Assert.Contains("- my highlight", prompt); - Assert.Contains("- another note", prompt); + Assert.Contains("reader's own notes", block); + Assert.Contains("- my highlight", block); + Assert.Contains("- another note", block); + } + + [Fact] + public void BuildSystemPrompt_Companion_GreetingBranchHasNoForcedCitation() + { + // Structural: the warm-companion prompt steers greeting/meta to NOT cite and NOT refuse, while + // still REQUIRING [n] citations for any book fact (grounding contract preserved). + var system = RagAskPrompt.BuildSystemPrompt(); + + // Greeting/meta branch: warm + invite, explicitly no citation, no "excerpts lack the answer". + Assert.Contains("greet", system, StringComparison.OrdinalIgnoreCase); + Assert.Contains("do NOT cite", system); + + // Grounding contract still present: book facts must come from excerpts and cite [n]. + Assert.Contains("[n]", system); + Assert.Contains("MUST", system); + Assert.Contains("outside knowledge", system); + } + + [Fact] + public void BuildSystemPrompt_Grounding_DominatesAndCoversInterpretation() + { + // The #1 hallucination gate: the companion may be conversational, but grounding must dominate + // and must cover NOT just literal plot facts but theme / meaning / interpretation — otherwise the + // model "explains the symbolism" from world knowledge with no citation. Lock that wording in. + var system = RagAskPrompt.BuildSystemPrompt(); + + Assert.Contains("ONLY source", system, StringComparison.OrdinalIgnoreCase); + Assert.Contains("interpret", system, StringComparison.OrdinalIgnoreCase); + Assert.Contains("theme", system, StringComparison.OrdinalIgnoreCase); + // Grounding is declared as overriding the conversational tone, not co-equal with it. + Assert.Contains("overrides", system, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void BuildSystemPrompt_GreetingExceptionIsNarrow_NotABlanketNoCiteLicense() + { + // The no-cite exception must be scoped to greeting / "what can you do" ONLY — never a blanket + // "you may answer without excerpts". If this assertion ever fails because the exception widened, + // the spoiler/hallucination gate has a hole. + var system = RagAskPrompt.BuildSystemPrompt(); + + Assert.Contains("ONLY messages you may answer without citing", system, StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("you may answer without excerpts", system, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void BuildSystemPrompt_Companion_GracefulMissForContentQuestions() + { + // Genuine content question with no relevant excerpt → graceful "I don't see that", never invent. + var system = RagAskPrompt.BuildSystemPrompt(); + + Assert.Contains("don't see", system, StringComparison.OrdinalIgnoreCase); + Assert.Contains("invent", system, StringComparison.OrdinalIgnoreCase); } [Fact] diff --git a/tests/TextStack.UnitTests/RagAskServiceTests.cs b/tests/TextStack.UnitTests/RagAskServiceTests.cs new file mode 100644 index 00000000..fc702d1c --- /dev/null +++ b/tests/TextStack.UnitTests/RagAskServiceTests.cs @@ -0,0 +1,178 @@ +using System.Runtime.CompilerServices; +using Application.Rag; +using Contracts.Books; +using TextStack.Ai.Core; +using TextStack.Ai.Rag; + +namespace TextStack.UnitTests; + +/// +/// AI-028 — service-level guards on driven by a deterministic, CALL-COUNTING +/// fake (no key, no network). The #1 invariant: when retrieval returns 0 chunks +/// (un-read / beyond progress / unindexed), BOTH the JSON (AskFromChunksAsync) and streaming +/// (StreamAsync) paths short-circuit to the friendly insufficient message WITHOUT calling the LLM — +/// a single model call on empty chunks would be a spoiler/hallucination leak. Also: the non-empty path +/// calls the model exactly once, threads the clamped multi-turn history into the request, and resolves +/// citations from the ACCUMULATED streamed text (not per-delta). +/// +public class RagAskServiceTests +{ + private static RetrievedChunk Chunk(int ord, string text) => + new(Guid.NewGuid(), Guid.NewGuid(), ord, 0, text, 0, text.Length, 0.9); + + /// Counts CompleteAsync/StreamAsync calls and captures the last request; emits scripted output. + private sealed class CountingLlm(string completeText, string[] streamFragments) : ILlmService + { + public int CompleteCalls { get; private set; } + public int StreamCalls { get; private set; } + public LlmRequest? LastRequest { get; private set; } + + public Task CompleteAsync(LlmRequest request, CancellationToken ct) + { + CompleteCalls++; + LastRequest = request; + return Task.FromResult(new LlmResponse( + completeText, [], new LlmUsage(0, 0, 0m), "fake-model", Guid.NewGuid())); + } + + public async IAsyncEnumerable StreamAsync( + LlmRequest request, [EnumeratorCancellation] CancellationToken ct) + { + StreamCalls++; + LastRequest = request; + foreach (var f in streamFragments) + { + await Task.Yield(); + yield return new LlmDelta(TextDelta: f); + } + } + } + + /// An ILlmService that throws if EITHER method is ever invoked — proves no model call happened. + private sealed class ThrowingLlm : ILlmService + { + public Task CompleteAsync(LlmRequest request, CancellationToken ct) => + throw new InvalidOperationException("LLM must NOT be called on empty chunks"); + + public IAsyncEnumerable StreamAsync(LlmRequest request, CancellationToken ct) => + throw new InvalidOperationException("LLM must NOT be called on empty chunks"); + } + + // RagContextService is only used by AskAsync (the progress-gated entry). These guard tests target + // AskFromChunksAsync / StreamAsync directly, so context is never touched — pass null safely. + private static RagAskService Service(ILlmService llm) => new(context: null!, llm); + + private static readonly CancellationToken Ct = TestContext.Current.CancellationToken; + + // ---- Empty-chunk short-circuit (spoiler / hallucination gate) ------------------------------------ + + [Fact] + public async Task AskFromChunksAsync_EmptyChunks_DoesNotCallLlm_ReturnsInsufficient() + { + var svc = Service(new ThrowingLlm()); // throws if the model is touched + + var answer = await svc.AskFromChunksAsync( + "what happens at the end?", chunks: [], noteTexts: [], history: [], lastReadOrd: 3, Ct); + + Assert.True(answer.Insufficient); + Assert.Empty(answer.Citations); + Assert.Equal(3, answer.LastReadOrd); + Assert.Contains("don't see enough", answer.Answer); + } + + [Fact] + public async Task StreamAsync_EmptyChunks_DoesNotCallLlm_FriendlyDeltaThenInsufficientTerminal() + { + var svc = Service(new ThrowingLlm()); // throws if the model is touched + + var items = new List(); + await foreach (var i in svc.StreamAsync( + "what happens at the end?", chunks: [], noteTexts: [], history: [], lastReadOrd: 0, Ct)) + items.Add(i); + + Assert.Equal(2, items.Count); + Assert.False(string.IsNullOrEmpty(items[0].TextDelta)); + Assert.NotNull(items[1].Terminal); + Assert.True(items[1].Terminal!.Insufficient); + Assert.Empty(items[1].Terminal!.Citations); + } + + // ---- Non-empty path: exactly one model call, history threaded, citations resolved --------------- + + [Fact] + public async Task AskFromChunksAsync_WithChunks_CallsLlmOnce_ResolvesCitations() + { + var llm = new CountingLlm("The hero leaves home [1] and meets a guide [2].", []); + var svc = Service(llm); + + var answer = await svc.AskFromChunksAsync( + "what happens?", [Chunk(1, "left home"), Chunk(2, "the guide")], noteTexts: [], + history: [], lastReadOrd: 5, Ct); + + Assert.Equal(1, llm.CompleteCalls); + Assert.False(answer.Insufficient); + Assert.Equal(new[] { 1, 2 }, answer.Citations.Select(c => c.Marker)); + } + + [Fact] + public async Task AskFromChunksAsync_ThreadsClampedHistoryIntoRequest() + { + var llm = new CountingLlm("ok", []); + var svc = Service(llm); + + var history = new AskTurnDto[] + { + new("user", "who is she?"), + new("assistant", "She is introduced early [1]."), + }; + + await svc.AskFromChunksAsync("and then?", [Chunk(1, "excerpt")], [], history, lastReadOrd: 0, Ct); + + // Assembly order: context block (user) → prior turns → new question last. System prompt separate. + var msgs = llm.LastRequest!.Messages; + Assert.Equal("user", msgs[0].Role); // context block + Assert.Equal("who is she?", msgs[1].Content); // prior user turn + Assert.Equal("She is introduced early [1].", msgs[2].Content); // prior assistant turn + Assert.Equal("and then?", msgs[^1].Content); // new question last + Assert.Equal(RagAskService.FeatureTag, llm.LastRequest!.FeatureTag); + Assert.False(string.IsNullOrWhiteSpace(llm.LastRequest!.SystemPrompt)); + } + + [Fact] + public async Task StreamAsync_WithChunks_CitationsFromAccumulatedText_NotPerDelta() + { + // The "[1]" marker is split across two deltas; citations must come from the FULL accumulated text. + var llm = new CountingLlm("", ["The hero leaves home [", "1] for adventure."]); + var svc = Service(llm); + + var items = new List(); + await foreach (var i in svc.StreamAsync( + "what happens?", [Chunk(1, "left home")], [], history: [], lastReadOrd: 7, Ct)) + items.Add(i); + + Assert.Equal(1, llm.StreamCalls); + var terminal = items[^1].Terminal; + Assert.NotNull(terminal); + Assert.False(terminal!.Insufficient); + Assert.Equal(7, terminal.LastReadOrd); + // Single citation resolved from the reassembled "[1]" that no single delta contained. + Assert.Equal(new[] { 1 }, terminal.Citations.Select(c => c.Marker)); + } + + [Fact] + public async Task StreamAsync_ProviderResolutionThrows_EmitsErrorTerminal_NoThrow() + { + // A keyless host throws when the gateway resolves the provider inside StreamAsync — the service + // must surface that as an Error item, not let the stream blow up. + var svc = Service(new ThrowingLlm()); + + var items = new List(); + await foreach (var i in svc.StreamAsync( + "q?", [Chunk(1, "x")], [], history: [], lastReadOrd: 0, Ct)) + items.Add(i); + + var last = Assert.Single(items); + Assert.False(string.IsNullOrEmpty(last.Error)); + Assert.Null(last.Terminal); + } +}