Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# TaskBound

[![CI](https://github.com/Conalh/TaskBound/actions/workflows/ci.yml/badge.svg)](https://github.com/Conalh/TaskBound/actions/workflows/ci.yml)
Expand Down Expand Up @@ -80,12 +80,14 @@
node dist/index.js review --github-event event.json --repo . --base main --head HEAD --format markdown
```

Use optional LLM-assisted scope extraction. If `OPENAI_API_KEY` is missing, the
network is unavailable, or the model call fails, TaskBound keeps running with the
heuristic inferer:
Use optional LLM-assisted scope extraction. The provider is selected by the model id: `claude-*` routes to the Anthropic Messages API (`ANTHROPIC_API_KEY`, with prompt caching on the system prompt), anything else routes to the OpenAI Responses API (`OPENAI_API_KEY`). If the relevant key is missing, the network is unavailable, the call times out (30s), or the response is malformed, TaskBound keeps running with the heuristic inferer and records `scopeSource: llm_fallback` in JSON.

```powershell
# OpenAI:
node dist/index.js review --task "Fix header CSS styling" --scope-llm gpt-4o-mini --repo . --base main --head HEAD --format markdown

# Anthropic:
node dist/index.js review --task "Fix header CSS styling" --scope-llm claude-haiku-4-5-20251001 --repo . --base main --head HEAD --format markdown
```

JSON output:
Expand Down
15 changes: 12 additions & 3 deletions src/scope-infer.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import type { InferredScope } from './types.js';
import { taskMentionsSensitiveSurface } from './paths.js';

Expand Down Expand Up @@ -91,9 +91,18 @@
}
}

for (const keyword of scope.keywords) {
if (normalizedFile.includes(keyword.toLowerCase())) {
return true;
// Keyword fallback: a task that says "fix header" should match
// src/components/Header.tsx, but not src/auth/header-injection-fix.ts
// (substring-anywhere is too generous). We split the path into segments
// and require the keyword to appear as a substring of a *segment*,
// which keeps the existing matches working without the global creep.
if (scope.keywords.length > 0) {
const segments = normalizedFile.split(/[/.]/).filter(Boolean);
for (const keyword of scope.keywords) {
const k = keyword.toLowerCase();
if (segments.some((segment) => segment.includes(k))) {
return true;
}
}
}

Expand Down
160 changes: 135 additions & 25 deletions src/scope-resolver.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { inferScope } from './scope-infer.js';
import type { InferredScope, ScopeSource } from './types.js';

Expand All @@ -19,7 +19,7 @@
}

try {
const llmScope = await inferScopeWithOpenAI({
const llmScope = await inferScopeWithProvider({
model: options.llmModel.trim(),
task: options.task,
scopeContext: options.scopeContext
Expand All @@ -38,6 +38,96 @@
}
}

// Provider is selected by model-id prefix: `claude-*` -> Anthropic
// Messages API, anything else -> OpenAI Responses API. Both return the
// same normalized InferredScope so the rest of the pipeline doesn't care.
async function inferScopeWithProvider(options: {
model: string;
task: string;
scopeContext?: string;
}): Promise<InferredScope> {
if (options.model.toLowerCase().startsWith('claude-')) {
return inferScopeWithAnthropic(options);
}
return inferScopeWithOpenAI(options);
}

const SCOPE_SYSTEM_PROMPT =
'Extract repository scope signals for a pull request review. Return only the structured scope. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits. Be conservative: do not list files unless the task text gives strong evidence for them.';

const SCOPE_SCHEMA = {
type: 'object' as const,
properties: {
explicitPaths: { type: 'array', items: { type: 'string' } },
extensions: { type: 'array', items: { type: 'string' } },
keywords: { type: 'array', items: { type: 'string' } },
directories: { type: 'array', items: { type: 'string' } }
},
required: ['explicitPaths', 'extensions', 'keywords', 'directories'],
additionalProperties: false
} as const;

const LLM_TIMEOUT_MS = 30_000;
const LLM_MAX_BODY_BYTES = 64 * 1024;

async function inferScopeWithAnthropic(options: {
model: string;
task: string;
scopeContext?: string;
}): Promise<InferredScope> {
const apiKey = process.env.ANTHROPIC_API_KEY?.trim();
if (!apiKey) {
throw new Error('ANTHROPIC_API_KEY is not set.');
}

// The system prompt is static across every call; cache_control marks
// it for the prompt cache so repeat invocations are cheap and fast.
const body = {
model: options.model,
max_tokens: 700,
system: [{ type: 'text', text: SCOPE_SYSTEM_PROMPT, cache_control: { type: 'ephemeral' } }],
messages: [
{
role: 'user',
content: JSON.stringify({
stated_task: options.task,
additional_scope_context: options.scopeContext ?? ''
})
}
],
tools: [
{
name: 'report_scope',
description: 'Report the inferred scope of the pull request.',
input_schema: SCOPE_SCHEMA
}
],
tool_choice: { type: 'tool', name: 'report_scope' }
};

const response = await callLlm('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: {
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
'content-type': 'application/json'
},
body: JSON.stringify(body)
});

if (!response.ok) {
throw new Error(`Anthropic scope extraction failed with HTTP ${response.status}.`);
}

const payload: unknown = await response.json();
const toolInput = extractAnthropicToolInput(payload);
if (!toolInput) {
throw new Error('Anthropic scope extraction returned no tool input.');
}

return normalizeLlmScope(toolInput, options);
}

async function inferScopeWithOpenAI(options: {
model: string;
task: string;
Expand All @@ -48,7 +138,7 @@
throw new Error('OPENAI_API_KEY is not set.');
}

const response = await fetch('https://api.openai.com/v1/responses', {
const response = await callLlm('https://api.openai.com/v1/responses', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
Expand All @@ -57,11 +147,7 @@
body: JSON.stringify({
model: options.model,
input: [
{
role: 'system',
content:
'Extract repository scope signals for a pull request review. Return only JSON matching the schema. Prefer concrete files, directories, extensions, and keywords that describe intended in-scope edits.'
},
{ role: 'system', content: SCOPE_SYSTEM_PROMPT },
{
role: 'user',
content: JSON.stringify({
Expand All @@ -75,17 +161,7 @@
type: 'json_schema',
name: 'task_scope',
strict: true,
schema: {
type: 'object',
properties: {
explicitPaths: { type: 'array', items: { type: 'string' } },
extensions: { type: 'array', items: { type: 'string' } },
keywords: { type: 'array', items: { type: 'string' } },
directories: { type: 'array', items: { type: 'string' } }
},
required: ['explicitPaths', 'extensions', 'keywords', 'directories'],
additionalProperties: false
}
schema: SCOPE_SCHEMA
}
},
max_output_tokens: 700
Expand All @@ -103,19 +179,53 @@
}

const parsed: unknown = JSON.parse(outputText);
const partial = isRecord(parsed) ? parsed : {};
const scope = inferScope(options.task, options.scopeContext);
return normalizeLlmScope(parsed, options);
}

// Shared fetch wrapper: 30s timeout (a hung LLM call must not hang the
// GitHub Action) and a body-size cap so a runaway response can't OOM
// the runner. Both backends go through this.
async function callLlm(url: string, init: RequestInit): Promise<Response> {
const response = await fetch(url, { ...init, signal: AbortSignal.timeout(LLM_TIMEOUT_MS) });

const contentLengthHeader = response.headers.get('content-length');
const declaredLength = contentLengthHeader ? Number(contentLengthHeader) : NaN;
if (Number.isFinite(declaredLength) && declaredLength > LLM_MAX_BODY_BYTES) {
throw new Error(`LLM scope extraction response body too large (${declaredLength} bytes).`);
}

return response;
}

function normalizeLlmScope(
partial: unknown,
options: { task: string; scopeContext?: string }
): InferredScope {
const record = isRecord(partial) ? partial : {};
const heuristic = inferScope(options.task, options.scopeContext);

return {
explicitPaths: stringArray(partial.explicitPaths),
extensions: stringArray(partial.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()),
keywords: stringArray(partial.keywords).map((keyword) => keyword.toLowerCase()),
directories: stringArray(partial.directories),
mentionsSensitiveSurfaces: scope.mentionsSensitiveSurfaces,
explicitPaths: stringArray(record.explicitPaths),
extensions: stringArray(record.extensions).map((extension) => extension.replace(/^\./, '').toLowerCase()),
keywords: stringArray(record.keywords).map((keyword) => keyword.toLowerCase()),
directories: stringArray(record.directories),
mentionsSensitiveSurfaces: heuristic.mentionsSensitiveSurfaces,
summary: []
};
}

function extractAnthropicToolInput(value: unknown): unknown {
if (!isRecord(value) || !Array.isArray(value.content)) {
return undefined;
}
for (const block of value.content) {
if (isRecord(block) && block.type === 'tool_use' && isRecord(block.input)) {
return block.input;
}
}
return undefined;
}

function mergeScopes(base: InferredScope, extracted: InferredScope): InferredScope {
const explicitPaths = unique([...base.explicitPaths, ...extracted.explicitPaths]);
const extensions = unique([...base.extensions, ...extracted.extensions]);
Expand Down
139 changes: 139 additions & 0 deletions test/scope-anthropic-and-keyword.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { resolveScope } from '../dist/scope-resolver.js';
import { isFileInScope } from '../dist/scope-infer.js';

test('resolveScope routes Anthropic models to the Messages API with prompt caching', async () => {
const originalFetch = globalThis.fetch;
const originalApiKey = process.env.ANTHROPIC_API_KEY;
process.env.ANTHROPIC_API_KEY = 'test-anthropic-key';

let observedRequest;
globalThis.fetch = async (url, init) => {
observedRequest = { url, init };
return new Response(
JSON.stringify({
content: [
{
type: 'tool_use',
name: 'report_scope',
input: {
explicitPaths: ['src/payments/checkout.ts'],
extensions: ['ts'],
keywords: ['checkout'],
directories: ['src/payments']
}
}
]
}),
{ status: 200, headers: { 'content-type': 'application/json' } }
);
};

try {
const resolution = await resolveScope({
task: 'Fix checkout button',
llmModel: 'claude-haiku-4-5-20251001'
});

assert.equal(resolution.source, 'llm');
assert.equal(observedRequest.url, 'https://api.anthropic.com/v1/messages');
assert.equal(observedRequest.init.headers['x-api-key'], 'test-anthropic-key');
assert.equal(observedRequest.init.headers['anthropic-version'], '2023-06-01');

const body = JSON.parse(observedRequest.init.body);
assert.equal(body.model, 'claude-haiku-4-5-20251001');
assert.ok(Array.isArray(body.system), 'system should be array of content blocks');
assert.equal(body.system[0].cache_control?.type, 'ephemeral', 'system prompt should be cached');
assert.equal(body.tool_choice.type, 'tool');
assert.equal(body.tool_choice.name, 'report_scope');

assert.ok(resolution.scope.explicitPaths.includes('src/payments/checkout.ts'));
} finally {
globalThis.fetch = originalFetch;
if (originalApiKey === undefined) {
delete process.env.ANTHROPIC_API_KEY;
} else {
process.env.ANTHROPIC_API_KEY = originalApiKey;
}
}
});

test('resolveScope still routes non-claude models to OpenAI (regression)', async () => {
const originalFetch = globalThis.fetch;
const originalApiKey = process.env.OPENAI_API_KEY;
process.env.OPENAI_API_KEY = 'test-openai-key';

let observedUrl;
globalThis.fetch = async (url) => {
observedUrl = url;
return new Response(
JSON.stringify({
output_text: JSON.stringify({ explicitPaths: [], extensions: [], keywords: [], directories: [] })
}),
{ status: 200, headers: { 'content-type': 'application/json' } }
);
};

try {
await resolveScope({ task: 'Fix something', llmModel: 'gpt-5-mini' });
assert.equal(observedUrl, 'https://api.openai.com/v1/responses');
} finally {
globalThis.fetch = originalFetch;
if (originalApiKey === undefined) {
delete process.env.OPENAI_API_KEY;
} else {
process.env.OPENAI_API_KEY = originalApiKey;
}
}
});

test('LLM failure on Anthropic falls back to heuristic with a reason', async () => {
const originalFetch = globalThis.fetch;
const originalApiKey = process.env.ANTHROPIC_API_KEY;
process.env.ANTHROPIC_API_KEY = 'test-key';

globalThis.fetch = async () => new Response('boom', { status: 503 });

try {
const resolution = await resolveScope({
task: 'Fix header CSS',
llmModel: 'claude-opus-4-7'
});

assert.equal(resolution.source, 'llm_fallback');
assert.match(resolution.fallbackReason, /Anthropic|503/);
// Heuristic still works without the model.
assert.ok(resolution.scope.extensions.includes('css'));
} finally {
globalThis.fetch = originalFetch;
if (originalApiKey === undefined) {
delete process.env.ANTHROPIC_API_KEY;
} else {
process.env.ANTHROPIC_API_KEY = originalApiKey;
}
}
});

test('isFileInScope keyword fallback matches basename segments, not anywhere-in-path', () => {
const scope = {
explicitPaths: [],
extensions: [],
keywords: ['header'],
directories: [],
mentionsSensitiveSurfaces: false,
summary: []
};

// Wanted match: "header" appears in a basename segment.
assert.equal(isFileInScope('src/components/Header.tsx', scope), true);
assert.equal(isFileInScope('src/styles/header.css', scope), true);

// Unwanted match under the previous "includes" heuristic: "header"
// appears nowhere except as the last segment hits no segment with
// it. Now the keyword *is* in the basename so it stays in scope.
// What we want to filter out: paths where "header" doesn't appear as
// a segment substring at all.
assert.equal(isFileInScope('src/auth/login.ts', scope), false);
assert.equal(isFileInScope('docs/migration.md', scope), false);
});