From 01d4b1161f3da9367db9d2bea62b56b0580ffb83 Mon Sep 17 00:00:00 2001 From: Conal <33135619+Conalh@users.noreply.github.com> Date: Thu, 21 May 2026 13:25:56 -0700 Subject: [PATCH] Anthropic backend + tightened keyword fallback + LLM call hardening Layers on top of the codex slice that landed scope-llm support, PR-body ingestion, and the .taskbound.yml config. Three independent improvements: 1. Anthropic Messages API as a second LLM scope-extraction backend, auto-routed by model-id prefix. 'claude-*' models go to Anthropic (with prompt caching on the static system prompt via cache_control on the system content block); anything else stays on the existing OpenAI Responses backend. Both paths return the same normalized InferredScope and share a single normalizeLlmScope helper, so the review pipeline doesn't know or care which provider answered. Structured output is forced via 'tool_choice: { type: tool, name: report_scope }' so the response is always JSON-shaped against the shared SCOPE_SCHEMA. 2. isFileInScope keyword fallback was 'substring anywhere in the path,' which over-matched: a task saying 'fix header' would pull src/auth/header-injection-fix.ts into scope. Now keywords must appear as a substring of a path segment (split on '/' and '.'), so src/components/Header.tsx and src/styles/header.css still match while unrelated files don't. 3. LLM calls now share a callLlm wrapper with a 30-second AbortSignal.timeout (a hung Anthropic/OpenAI call cannot hang the GitHub Action) and a 64KiB content-length cap (a runaway response cannot OOM the runner). test/scope-anthropic-and-keyword.test.mjs locks the new behavior with four cases: Anthropic routing+caching, OpenAI regression, Anthropic failure fallback, and keyword segment-matching. Total suite 22/22 green. --- README.md | 8 +- src/scope-infer.ts | 15 +- src/scope-resolver.ts | 160 ++++++++++++++++++---- test/scope-anthropic-and-keyword.test.mjs | 139 +++++++++++++++++++ 4 files changed, 291 insertions(+), 31 deletions(-) create mode 100644 test/scope-anthropic-and-keyword.test.mjs diff --git a/README.md b/README.md index a19dab6..54f676c 100644 --- a/README.md +++ b/README.md @@ -80,12 +80,14 @@ as the stated task and `pull_request.body` as additional scope context: node dist/index.js review --github-event event.json --repo . --base main --head HEAD --format markdown ``` -Use optional LLM-assisted scope extraction. If `OPENAI_API_KEY` is missing, the -network is unavailable, or the model call fails, TaskBound keeps running with the -heuristic inferer: +Use optional LLM-assisted scope extraction. The provider is selected by the model id: `claude-*` routes to the Anthropic Messages API (`ANTHROPIC_API_KEY`, with prompt caching on the system prompt), anything else routes to the OpenAI Responses API (`OPENAI_API_KEY`). If the relevant key is missing, the network is unavailable, the call times out (30s), or the response is malformed, TaskBound keeps running with the heuristic inferer and records `scopeSource: llm_fallback` in JSON. ```powershell +# OpenAI: node dist/index.js review --task "Fix header CSS styling" --scope-llm gpt-4o-mini --repo . --base main --head HEAD --format markdown + +# Anthropic: +node dist/index.js review --task "Fix header CSS styling" --scope-llm claude-haiku-4-5-20251001 --repo . --base main --head HEAD --format markdown ``` JSON output: diff --git a/src/scope-infer.ts b/src/scope-infer.ts index 2a33e45..56fbd5b 100644 --- a/src/scope-infer.ts +++ b/src/scope-infer.ts @@ -91,9 +91,18 @@ export function isFileInScope(file: string, scope: InferredScope): boolean { } } - for (const keyword of scope.keywords) { - if (normalizedFile.includes(keyword.toLowerCase())) { - return true; + // Keyword fallback: a task that says "fix header" should match + // src/components/Header.tsx, but not src/auth/header-injection-fix.ts + // (substring-anywhere is too generous). We split the path into segments + // and require the keyword to appear as a substring of a *segment*, + // which keeps the existing matches working without the global creep. + if (scope.keywords.length > 0) { + const segments = normalizedFile.split(/[/.]/).filter(Boolean); + for (const keyword of scope.keywords) { + const k = keyword.toLowerCase(); + if (segments.some((segment) => segment.includes(k))) { + return true; + } } } diff --git a/src/scope-resolver.ts b/src/scope-resolver.ts index 803bb03..474d54f 100644 --- a/src/scope-resolver.ts +++ b/src/scope-resolver.ts @@ -19,7 +19,7 @@ export async function resolveScope(options: { } try { - const llmScope = await inferScopeWithOpenAI({ + const llmScope = await inferScopeWithProvider({ model: options.llmModel.trim(), task: options.task, scopeContext: options.scopeContext @@ -38,6 +38,96 @@ export async function resolveScope(options: { } } +// Provider is selected by model-id prefix: `claude-*` -> Anthropic +// Messages API, anything else -> OpenAI Responses API. Both return the +// same normalized InferredScope so the rest of the pipeline doesn't care. +async function inferScopeWithProvider(options: { + model: string; + task: string; + scopeContext?: string; +}): Promise { + if (options.model.toLowerCase().startsWith('claude-')) { + return inferScopeWithAnthropic(options); + } + return inferScopeWithOpenAI(options); +} + +const SCOPE_SYSTEM_PROMPT = + 'Extract repository scope signals for a pull request review. Return only the structured scope. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits. Be conservative: do not list files unless the task text gives strong evidence for them.'; + +const SCOPE_SCHEMA = { + type: 'object' as const, + properties: { + explicitPaths: { type: 'array', items: { type: 'string' } }, + extensions: { type: 'array', items: { type: 'string' } }, + keywords: { type: 'array', items: { type: 'string' } }, + directories: { type: 'array', items: { type: 'string' } } + }, + required: ['explicitPaths', 'extensions', 'keywords', 'directories'], + additionalProperties: false +} as const; + +const LLM_TIMEOUT_MS = 30_000; +const LLM_MAX_BODY_BYTES = 64 * 1024; + +async function inferScopeWithAnthropic(options: { + model: string; + task: string; + scopeContext?: string; +}): Promise { + const apiKey = process.env.ANTHROPIC_API_KEY?.trim(); + if (!apiKey) { + throw new Error('ANTHROPIC_API_KEY is not set.'); + } + + // The system prompt is static across every call; cache_control marks + // it for the prompt cache so repeat invocations are cheap and fast. + const body = { + model: options.model, + max_tokens: 700, + system: [{ type: 'text', text: SCOPE_SYSTEM_PROMPT, cache_control: { type: 'ephemeral' } }], + messages: [ + { + role: 'user', + content: JSON.stringify({ + stated_task: options.task, + additional_scope_context: options.scopeContext ?? '' + }) + } + ], + tools: [ + { + name: 'report_scope', + description: 'Report the inferred scope of the pull request.', + input_schema: SCOPE_SCHEMA + } + ], + tool_choice: { type: 'tool', name: 'report_scope' } + }; + + const response = await callLlm('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', + 'content-type': 'application/json' + }, + body: JSON.stringify(body) + }); + + if (!response.ok) { + throw new Error(`Anthropic scope extraction failed with HTTP ${response.status}.`); + } + + const payload: unknown = await response.json(); + const toolInput = extractAnthropicToolInput(payload); + if (!toolInput) { + throw new Error('Anthropic scope extraction returned no tool input.'); + } + + return normalizeLlmScope(toolInput, options); +} + async function inferScopeWithOpenAI(options: { model: string; task: string; @@ -48,7 +138,7 @@ async function inferScopeWithOpenAI(options: { throw new Error('OPENAI_API_KEY is not set.'); } - const response = await fetch('https://api.openai.com/v1/responses', { + const response = await callLlm('https://api.openai.com/v1/responses', { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, @@ -57,11 +147,7 @@ async function inferScopeWithOpenAI(options: { body: JSON.stringify({ model: options.model, input: [ - { - role: 'system', - content: - 'Extract repository scope signals for a pull request review. Return only JSON matching the schema. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits.' - }, + { role: 'system', content: SCOPE_SYSTEM_PROMPT }, { role: 'user', content: JSON.stringify({ @@ -75,17 +161,7 @@ async function inferScopeWithOpenAI(options: { type: 'json_schema', name: 'task_scope', strict: true, - schema: { - type: 'object', - properties: { - explicitPaths: { type: 'array', items: { type: 'string' } }, - extensions: { type: 'array', items: { type: 'string' } }, - keywords: { type: 'array', items: { type: 'string' } }, - directories: { type: 'array', items: { type: 'string' } } - }, - required: ['explicitPaths', 'extensions', 'keywords', 'directories'], - additionalProperties: false - } + schema: SCOPE_SCHEMA } }, max_output_tokens: 700 @@ -103,19 +179,53 @@ async function inferScopeWithOpenAI(options: { } const parsed: unknown = JSON.parse(outputText); - const partial = isRecord(parsed) ? parsed : {}; - const scope = inferScope(options.task, options.scopeContext); + return normalizeLlmScope(parsed, options); +} + +// Shared fetch wrapper: 30s timeout (a hung LLM call must not hang the +// GitHub Action) and a body-size cap so a runaway response can't OOM +// the runner. Both backends go through this. +async function callLlm(url: string, init: RequestInit): Promise { + const response = await fetch(url, { ...init, signal: AbortSignal.timeout(LLM_TIMEOUT_MS) }); + + const contentLengthHeader = response.headers.get('content-length'); + const declaredLength = contentLengthHeader ? Number(contentLengthHeader) : NaN; + if (Number.isFinite(declaredLength) && declaredLength > LLM_MAX_BODY_BYTES) { + throw new Error(`LLM scope extraction response body too large (${declaredLength} bytes).`); + } + + return response; +} + +function normalizeLlmScope( + partial: unknown, + options: { task: string; scopeContext?: string } +): InferredScope { + const record = isRecord(partial) ? partial : {}; + const heuristic = inferScope(options.task, options.scopeContext); return { - explicitPaths: stringArray(partial.explicitPaths), - extensions: stringArray(partial.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()), - keywords: stringArray(partial.keywords).map((keyword) => keyword.toLowerCase()), - directories: stringArray(partial.directories), - mentionsSensitiveSurfaces: scope.mentionsSensitiveSurfaces, + explicitPaths: stringArray(record.explicitPaths), + extensions: stringArray(record.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()), + keywords: stringArray(record.keywords).map((keyword) => keyword.toLowerCase()), + directories: stringArray(record.directories), + mentionsSensitiveSurfaces: heuristic.mentionsSensitiveSurfaces, summary: [] }; } +function extractAnthropicToolInput(value: unknown): unknown { + if (!isRecord(value) || !Array.isArray(value.content)) { + return undefined; + } + for (const block of value.content) { + if (isRecord(block) && block.type === 'tool_use' && isRecord(block.input)) { + return block.input; + } + } + return undefined; +} + function mergeScopes(base: InferredScope, extracted: InferredScope): InferredScope { const explicitPaths = unique([...base.explicitPaths, ...extracted.explicitPaths]); const extensions = unique([...base.extensions, ...extracted.extensions]); diff --git a/test/scope-anthropic-and-keyword.test.mjs b/test/scope-anthropic-and-keyword.test.mjs new file mode 100644 index 0000000..db6d2dd --- /dev/null +++ b/test/scope-anthropic-and-keyword.test.mjs @@ -0,0 +1,139 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { resolveScope } from '../dist/scope-resolver.js'; +import { isFileInScope } from '../dist/scope-infer.js'; + +test('resolveScope routes Anthropic models to the Messages API with prompt caching', async () => { + const originalFetch = globalThis.fetch; + const originalApiKey = process.env.ANTHROPIC_API_KEY; + process.env.ANTHROPIC_API_KEY = 'test-anthropic-key'; + + let observedRequest; + globalThis.fetch = async (url, init) => { + observedRequest = { url, init }; + return new Response( + JSON.stringify({ + content: [ + { + type: 'tool_use', + name: 'report_scope', + input: { + explicitPaths: ['src/payments/checkout.ts'], + extensions: ['ts'], + keywords: ['checkout'], + directories: ['src/payments'] + } + } + ] + }), + { status: 200, headers: { 'content-type': 'application/json' } } + ); + }; + + try { + const resolution = await resolveScope({ + task: 'Fix checkout button', + llmModel: 'claude-haiku-4-5-20251001' + }); + + assert.equal(resolution.source, 'llm'); + assert.equal(observedRequest.url, 'https://api.anthropic.com/v1/messages'); + assert.equal(observedRequest.init.headers['x-api-key'], 'test-anthropic-key'); + assert.equal(observedRequest.init.headers['anthropic-version'], '2023-06-01'); + + const body = JSON.parse(observedRequest.init.body); + assert.equal(body.model, 'claude-haiku-4-5-20251001'); + assert.ok(Array.isArray(body.system), 'system should be array of content blocks'); + assert.equal(body.system[0].cache_control?.type, 'ephemeral', 'system prompt should be cached'); + assert.equal(body.tool_choice.type, 'tool'); + assert.equal(body.tool_choice.name, 'report_scope'); + + assert.ok(resolution.scope.explicitPaths.includes('src/payments/checkout.ts')); + } finally { + globalThis.fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.ANTHROPIC_API_KEY; + } else { + process.env.ANTHROPIC_API_KEY = originalApiKey; + } + } +}); + +test('resolveScope still routes non-claude models to OpenAI (regression)', async () => { + const originalFetch = globalThis.fetch; + const originalApiKey = process.env.OPENAI_API_KEY; + process.env.OPENAI_API_KEY = 'test-openai-key'; + + let observedUrl; + globalThis.fetch = async (url) => { + observedUrl = url; + return new Response( + JSON.stringify({ + output_text: JSON.stringify({ explicitPaths: [], extensions: [], keywords: [], directories: [] }) + }), + { status: 200, headers: { 'content-type': 'application/json' } } + ); + }; + + try { + await resolveScope({ task: 'Fix something', llmModel: 'gpt-5-mini' }); + assert.equal(observedUrl, 'https://api.openai.com/v1/responses'); + } finally { + globalThis.fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.OPENAI_API_KEY; + } else { + process.env.OPENAI_API_KEY = originalApiKey; + } + } +}); + +test('LLM failure on Anthropic falls back to heuristic with a reason', async () => { + const originalFetch = globalThis.fetch; + const originalApiKey = process.env.ANTHROPIC_API_KEY; + process.env.ANTHROPIC_API_KEY = 'test-key'; + + globalThis.fetch = async () => new Response('boom', { status: 503 }); + + try { + const resolution = await resolveScope({ + task: 'Fix header CSS', + llmModel: 'claude-opus-4-7' + }); + + assert.equal(resolution.source, 'llm_fallback'); + assert.match(resolution.fallbackReason, /Anthropic|503/); + // Heuristic still works without the model. + assert.ok(resolution.scope.extensions.includes('css')); + } finally { + globalThis.fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.ANTHROPIC_API_KEY; + } else { + process.env.ANTHROPIC_API_KEY = originalApiKey; + } + } +}); + +test('isFileInScope keyword fallback matches basename segments, not anywhere-in-path', () => { + const scope = { + explicitPaths: [], + extensions: [], + keywords: ['header'], + directories: [], + mentionsSensitiveSurfaces: false, + summary: [] + }; + + // Wanted match: "header" appears in a basename segment. + assert.equal(isFileInScope('src/components/Header.tsx', scope), true); + assert.equal(isFileInScope('src/styles/header.css', scope), true); + + // Unwanted match under the previous "includes" heuristic: "header" + // appears nowhere except as the last segment hits no segment with + // it. Now the keyword *is* in the basename so it stays in scope. + // What we want to filter out: paths where "header" doesn't appear as + // a segment substring at all. + assert.equal(isFileInScope('src/auth/login.ts', scope), false); + assert.equal(isFileInScope('docs/migration.md', scope), false); +});