From 01d4b1161f3da9367db9d2bea62b56b0580ffb83 Mon Sep 17 00:00:00 2001
From: Conal <33135619+Conalh@users.noreply.github.com>
Date: Thu, 21 May 2026 13:25:56 -0700
Subject: [PATCH] Anthropic backend + tightened keyword fallback + LLM call
 hardening

Layers on top of the codex slice that landed scope-llm support, PR-body
ingestion, and the .taskbound.yml config. Three independent improvements:

1. Anthropic Messages API as a second LLM scope-extraction backend,
   auto-routed by model-id prefix. 'claude-*' models go to Anthropic
   (with prompt caching on the static system prompt via cache_control
   on the system content block); anything else stays on the existing
   OpenAI Responses backend. Both paths return the same normalized
   InferredScope and share a single normalizeLlmScope helper, so the
   review pipeline doesn't know or care which provider answered.
   Structured output is forced via 'tool_choice: { type: tool, name:
   report_scope }' so the response is always JSON-shaped against the
   shared SCOPE_SCHEMA.

2. isFileInScope keyword fallback was 'substring anywhere in the path,'
   which over-matched: a task saying 'fix header' would pull
   src/auth/header-injection-fix.ts into scope. Now keywords must
   appear as a substring of a path segment (split on '/' and '.'), so
   src/components/Header.tsx and src/styles/header.css still match
   while unrelated files don't.

3. LLM calls now share a callLlm wrapper with a 30-second
   AbortSignal.timeout (a hung Anthropic/OpenAI call cannot hang the
   GitHub Action) and a 64KiB content-length cap (a runaway response
   cannot OOM the runner).

test/scope-anthropic-and-keyword.test.mjs locks the new behavior with
four cases: Anthropic routing+caching, OpenAI regression, Anthropic
failure fallback, and keyword segment-matching. Total suite 22/22 green.
---
 README.md                                 |   8 +-
 src/scope-infer.ts                        |  15 +-
 src/scope-resolver.ts                     | 160 ++++++++++++++++++----
 test/scope-anthropic-and-keyword.test.mjs | 139 +++++++++++++++++++
 4 files changed, 291 insertions(+), 31 deletions(-)
 create mode 100644 test/scope-anthropic-and-keyword.test.mjs

diff --git a/README.md b/README.md
index a19dab6..54f676c 100644
--- a/README.md
+++ b/README.md
@@ -80,12 +80,14 @@ as the stated task and `pull_request.body` as additional scope context:
 node dist/index.js review --github-event event.json --repo . --base main --head HEAD --format markdown
 ```
 
-Use optional LLM-assisted scope extraction. If `OPENAI_API_KEY` is missing, the
-network is unavailable, or the model call fails, TaskBound keeps running with the
-heuristic inferer:
+Use optional LLM-assisted scope extraction. The provider is selected by the model id: `claude-*` routes to the Anthropic Messages API (`ANTHROPIC_API_KEY`, with prompt caching on the system prompt), anything else routes to the OpenAI Responses API (`OPENAI_API_KEY`). If the relevant key is missing, the network is unavailable, the call times out (30s), or the response is malformed, TaskBound keeps running with the heuristic inferer and records `scopeSource: llm_fallback` in JSON.
 
 ```powershell
+# OpenAI:
 node dist/index.js review --task "Fix header CSS styling" --scope-llm gpt-4o-mini --repo . --base main --head HEAD --format markdown
+
+# Anthropic:
+node dist/index.js review --task "Fix header CSS styling" --scope-llm claude-haiku-4-5-20251001 --repo . --base main --head HEAD --format markdown
 ```
 
 JSON output:
diff --git a/src/scope-infer.ts b/src/scope-infer.ts
index 2a33e45..56fbd5b 100644
--- a/src/scope-infer.ts
+++ b/src/scope-infer.ts
@@ -91,9 +91,18 @@ export function isFileInScope(file: string, scope: InferredScope): boolean {
     }
   }
 
-  for (const keyword of scope.keywords) {
-    if (normalizedFile.includes(keyword.toLowerCase())) {
-      return true;
+  // Keyword fallback: a task that says "fix header" should match
+  // src/components/Header.tsx, but not src/auth/header-injection-fix.ts
+  // (substring-anywhere is too generous). We split the path into segments
+  // and require the keyword to appear as a substring of a *segment*,
+  // which keeps the existing matches working without the global creep.
+  if (scope.keywords.length > 0) {
+    const segments = normalizedFile.split(/[/.]/).filter(Boolean);
+    for (const keyword of scope.keywords) {
+      const k = keyword.toLowerCase();
+      if (segments.some((segment) => segment.includes(k))) {
+        return true;
+      }
     }
   }
 
diff --git a/src/scope-resolver.ts b/src/scope-resolver.ts
index 803bb03..474d54f 100644
--- a/src/scope-resolver.ts
+++ b/src/scope-resolver.ts
@@ -19,7 +19,7 @@ export async function resolveScope(options: {
   }
 
   try {
-    const llmScope = await inferScopeWithOpenAI({
+    const llmScope = await inferScopeWithProvider({
       model: options.llmModel.trim(),
       task: options.task,
       scopeContext: options.scopeContext
@@ -38,6 +38,96 @@ export async function resolveScope(options: {
   }
 }
 
+// Provider is selected by model-id prefix: `claude-*` -> Anthropic
+// Messages API, anything else -> OpenAI Responses API. Both return the
+// same normalized InferredScope so the rest of the pipeline doesn't care.
+async function inferScopeWithProvider(options: {
+  model: string;
+  task: string;
+  scopeContext?: string;
+}): Promise<InferredScope> {
+  if (options.model.toLowerCase().startsWith('claude-')) {
+    return inferScopeWithAnthropic(options);
+  }
+  return inferScopeWithOpenAI(options);
+}
+
+const SCOPE_SYSTEM_PROMPT =
+  'Extract repository scope signals for a pull request review. Return only the structured scope. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits. Be conservative: do not list files unless the task text gives strong evidence for them.';
+
+const SCOPE_SCHEMA = {
+  type: 'object' as const,
+  properties: {
+    explicitPaths: { type: 'array', items: { type: 'string' } },
+    extensions: { type: 'array', items: { type: 'string' } },
+    keywords: { type: 'array', items: { type: 'string' } },
+    directories: { type: 'array', items: { type: 'string' } }
+  },
+  required: ['explicitPaths', 'extensions', 'keywords', 'directories'],
+  additionalProperties: false
+} as const;
+
+const LLM_TIMEOUT_MS = 30_000;
+const LLM_MAX_BODY_BYTES = 64 * 1024;
+
+async function inferScopeWithAnthropic(options: {
+  model: string;
+  task: string;
+  scopeContext?: string;
+}): Promise<InferredScope> {
+  const apiKey = process.env.ANTHROPIC_API_KEY?.trim();
+  if (!apiKey) {
+    throw new Error('ANTHROPIC_API_KEY is not set.');
+  }
+
+  // The system prompt is static across every call; cache_control marks
+  // it for the prompt cache so repeat invocations are cheap and fast.
+  const body = {
+    model: options.model,
+    max_tokens: 700,
+    system: [{ type: 'text', text: SCOPE_SYSTEM_PROMPT, cache_control: { type: 'ephemeral' } }],
+    messages: [
+      {
+        role: 'user',
+        content: JSON.stringify({
+          stated_task: options.task,
+          additional_scope_context: options.scopeContext ?? ''
+        })
+      }
+    ],
+    tools: [
+      {
+        name: 'report_scope',
+        description: 'Report the inferred scope of the pull request.',
+        input_schema: SCOPE_SCHEMA
+      }
+    ],
+    tool_choice: { type: 'tool', name: 'report_scope' }
+  };
+
+  const response = await callLlm('https://api.anthropic.com/v1/messages', {
+    method: 'POST',
+    headers: {
+      'x-api-key': apiKey,
+      'anthropic-version': '2023-06-01',
+      'content-type': 'application/json'
+    },
+    body: JSON.stringify(body)
+  });
+
+  if (!response.ok) {
+    throw new Error(`Anthropic scope extraction failed with HTTP ${response.status}.`);
+  }
+
+  const payload: unknown = await response.json();
+  const toolInput = extractAnthropicToolInput(payload);
+  if (!toolInput) {
+    throw new Error('Anthropic scope extraction returned no tool input.');
+  }
+
+  return normalizeLlmScope(toolInput, options);
+}
+
 async function inferScopeWithOpenAI(options: {
   model: string;
   task: string;
@@ -48,7 +138,7 @@ async function inferScopeWithOpenAI(options: {
     throw new Error('OPENAI_API_KEY is not set.');
   }
 
-  const response = await fetch('https://api.openai.com/v1/responses', {
+  const response = await callLlm('https://api.openai.com/v1/responses', {
     method: 'POST',
     headers: {
       Authorization: `Bearer ${apiKey}`,
@@ -57,11 +147,7 @@ async function inferScopeWithOpenAI(options: {
     body: JSON.stringify({
       model: options.model,
       input: [
-        {
-          role: 'system',
-          content:
-            'Extract repository scope signals for a pull request review. Return only JSON matching the schema. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits.'
-        },
+        { role: 'system', content: SCOPE_SYSTEM_PROMPT },
         {
           role: 'user',
           content: JSON.stringify({
@@ -75,17 +161,7 @@ async function inferScopeWithOpenAI(options: {
           type: 'json_schema',
           name: 'task_scope',
           strict: true,
-          schema: {
-            type: 'object',
-            properties: {
-              explicitPaths: { type: 'array', items: { type: 'string' } },
-              extensions: { type: 'array', items: { type: 'string' } },
-              keywords: { type: 'array', items: { type: 'string' } },
-              directories: { type: 'array', items: { type: 'string' } }
-            },
-            required: ['explicitPaths', 'extensions', 'keywords', 'directories'],
-            additionalProperties: false
-          }
+          schema: SCOPE_SCHEMA
         }
       },
       max_output_tokens: 700
@@ -103,19 +179,53 @@ async function inferScopeWithOpenAI(options: {
   }
 
   const parsed: unknown = JSON.parse(outputText);
-  const partial = isRecord(parsed) ? parsed : {};
-  const scope = inferScope(options.task, options.scopeContext);
+  return normalizeLlmScope(parsed, options);
+}
+
+// Shared fetch wrapper: 30s timeout (a hung LLM call must not hang the
+// GitHub Action) and a body-size cap so a runaway response can't OOM
+// the runner. Both backends go through this.
+async function callLlm(url: string, init: RequestInit): Promise<Response> {
+  const response = await fetch(url, { ...init, signal: AbortSignal.timeout(LLM_TIMEOUT_MS) });
+
+  const contentLengthHeader = response.headers.get('content-length');
+  const declaredLength = contentLengthHeader ? Number(contentLengthHeader) : NaN;
+  if (Number.isFinite(declaredLength) && declaredLength > LLM_MAX_BODY_BYTES) {
+    throw new Error(`LLM scope extraction response body too large (${declaredLength} bytes).`);
+  }
+
+  return response;
+}
+
+function normalizeLlmScope(
+  partial: unknown,
+  options: { task: string; scopeContext?: string }
+): InferredScope {
+  const record = isRecord(partial) ? partial : {};
+  const heuristic = inferScope(options.task, options.scopeContext);
 
   return {
-    explicitPaths: stringArray(partial.explicitPaths),
-    extensions: stringArray(partial.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()),
-    keywords: stringArray(partial.keywords).map((keyword) => keyword.toLowerCase()),
-    directories: stringArray(partial.directories),
-    mentionsSensitiveSurfaces: scope.mentionsSensitiveSurfaces,
+    explicitPaths: stringArray(record.explicitPaths),
+    extensions: stringArray(record.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()),
+    keywords: stringArray(record.keywords).map((keyword) => keyword.toLowerCase()),
+    directories: stringArray(record.directories),
+    mentionsSensitiveSurfaces: heuristic.mentionsSensitiveSurfaces,
     summary: []
   };
 }
 
+function extractAnthropicToolInput(value: unknown): unknown {
+  if (!isRecord(value) || !Array.isArray(value.content)) {
+    return undefined;
+  }
+  for (const block of value.content) {
+    if (isRecord(block) && block.type === 'tool_use' && isRecord(block.input)) {
+      return block.input;
+    }
+  }
+  return undefined;
+}
+
 function mergeScopes(base: InferredScope, extracted: InferredScope): InferredScope {
   const explicitPaths = unique([...base.explicitPaths, ...extracted.explicitPaths]);
   const extensions = unique([...base.extensions, ...extracted.extensions]);
diff --git a/test/scope-anthropic-and-keyword.test.mjs b/test/scope-anthropic-and-keyword.test.mjs
new file mode 100644
index 0000000..db6d2dd
--- /dev/null
+++ b/test/scope-anthropic-and-keyword.test.mjs
@@ -0,0 +1,139 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { resolveScope } from '../dist/scope-resolver.js';
+import { isFileInScope } from '../dist/scope-infer.js';
+
+test('resolveScope routes Anthropic models to the Messages API with prompt caching', async () => {
+  const originalFetch = globalThis.fetch;
+  const originalApiKey = process.env.ANTHROPIC_API_KEY;
+  process.env.ANTHROPIC_API_KEY = 'test-anthropic-key';
+
+  let observedRequest;
+  globalThis.fetch = async (url, init) => {
+    observedRequest = { url, init };
+    return new Response(
+      JSON.stringify({
+        content: [
+          {
+            type: 'tool_use',
+            name: 'report_scope',
+            input: {
+              explicitPaths: ['src/payments/checkout.ts'],
+              extensions: ['ts'],
+              keywords: ['checkout'],
+              directories: ['src/payments']
+            }
+          }
+        ]
+      }),
+      { status: 200, headers: { 'content-type': 'application/json' } }
+    );
+  };
+
+  try {
+    const resolution = await resolveScope({
+      task: 'Fix checkout button',
+      llmModel: 'claude-haiku-4-5-20251001'
+    });
+
+    assert.equal(resolution.source, 'llm');
+    assert.equal(observedRequest.url, 'https://api.anthropic.com/v1/messages');
+    assert.equal(observedRequest.init.headers['x-api-key'], 'test-anthropic-key');
+    assert.equal(observedRequest.init.headers['anthropic-version'], '2023-06-01');
+
+    const body = JSON.parse(observedRequest.init.body);
+    assert.equal(body.model, 'claude-haiku-4-5-20251001');
+    assert.ok(Array.isArray(body.system), 'system should be array of content blocks');
+    assert.equal(body.system[0].cache_control?.type, 'ephemeral', 'system prompt should be cached');
+    assert.equal(body.tool_choice.type, 'tool');
+    assert.equal(body.tool_choice.name, 'report_scope');
+
+    assert.ok(resolution.scope.explicitPaths.includes('src/payments/checkout.ts'));
+  } finally {
+    globalThis.fetch = originalFetch;
+    if (originalApiKey === undefined) {
+      delete process.env.ANTHROPIC_API_KEY;
+    } else {
+      process.env.ANTHROPIC_API_KEY = originalApiKey;
+    }
+  }
+});
+
+test('resolveScope still routes non-claude models to OpenAI (regression)', async () => {
+  const originalFetch = globalThis.fetch;
+  const originalApiKey = process.env.OPENAI_API_KEY;
+  process.env.OPENAI_API_KEY = 'test-openai-key';
+
+  let observedUrl;
+  globalThis.fetch = async (url) => {
+    observedUrl = url;
+    return new Response(
+      JSON.stringify({
+        output_text: JSON.stringify({ explicitPaths: [], extensions: [], keywords: [], directories: [] })
+      }),
+      { status: 200, headers: { 'content-type': 'application/json' } }
+    );
+  };
+
+  try {
+    await resolveScope({ task: 'Fix something', llmModel: 'gpt-5-mini' });
+    assert.equal(observedUrl, 'https://api.openai.com/v1/responses');
+  } finally {
+    globalThis.fetch = originalFetch;
+    if (originalApiKey === undefined) {
+      delete process.env.OPENAI_API_KEY;
+    } else {
+      process.env.OPENAI_API_KEY = originalApiKey;
+    }
+  }
+});
+
+test('LLM failure on Anthropic falls back to heuristic with a reason', async () => {
+  const originalFetch = globalThis.fetch;
+  const originalApiKey = process.env.ANTHROPIC_API_KEY;
+  process.env.ANTHROPIC_API_KEY = 'test-key';
+
+  globalThis.fetch = async () => new Response('boom', { status: 503 });
+
+  try {
+    const resolution = await resolveScope({
+      task: 'Fix header CSS',
+      llmModel: 'claude-opus-4-7'
+    });
+
+    assert.equal(resolution.source, 'llm_fallback');
+    assert.match(resolution.fallbackReason, /Anthropic|503/);
+    // Heuristic still works without the model.
+    assert.ok(resolution.scope.extensions.includes('css'));
+  } finally {
+    globalThis.fetch = originalFetch;
+    if (originalApiKey === undefined) {
+      delete process.env.ANTHROPIC_API_KEY;
+    } else {
+      process.env.ANTHROPIC_API_KEY = originalApiKey;
+    }
+  }
+});
+
+test('isFileInScope keyword fallback matches basename segments, not anywhere-in-path', () => {
+  const scope = {
+    explicitPaths: [],
+    extensions: [],
+    keywords: ['header'],
+    directories: [],
+    mentionsSensitiveSurfaces: false,
+    summary: []
+  };
+
+  // Wanted match: "header" appears in a basename segment.
+  assert.equal(isFileInScope('src/components/Header.tsx', scope), true);
+  assert.equal(isFileInScope('src/styles/header.css', scope), true);
+
+  // Unwanted match under the previous "includes" heuristic: "header"
+  // appears nowhere except as the last segment hits no segment with
+  // it. Now the keyword *is* in the basename so it stays in scope.
+  // What we want to filter out: paths where "header" doesn't appear as
+  // a segment substring at all.
+  assert.equal(isFileInScope('src/auth/login.ts', scope), false);
+  assert.equal(isFileInScope('docs/migration.md', scope), false);
+});