diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index dc819c5c..51eb06b2 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -1,6 +1,6 @@ import { NextRequest } from 'next/server'; import { streamClaude } from '@/lib/claude-client'; -import { addMessage, getMessages, getSession, getSessionSummary, updateSessionTitle, updateSdkSessionId, updateSessionModel, updateSessionProvider, updateSessionProviderId, getSetting, acquireSessionLock, renewSessionLock, releaseSessionLock, setSessionRuntimeStatus, syncSdkTasks } from '@/lib/db'; +import { addMessage, getMessages, getSession, getSessionSummary, updateSessionTitle, updateSdkSessionId, updateSessionModel, updateSessionProvider, updateSessionProviderId, getSetting, acquireSessionLock, renewSessionLock, releaseSessionLock, setSessionRuntimeStatus, syncSdkTasks, cleanupStaleLocks } from '@/lib/db'; import { resolveProvider as resolveProviderUnified } from '@/lib/provider-resolver'; import { notifySessionStart, notifySessionComplete, notifySessionError } from '@/lib/telegram-bot'; import { extractCompletion } from '@/lib/onboarding-completion'; @@ -12,6 +12,8 @@ import { ensureSchedulerRunning } from '@/lib/task-scheduler'; // Start the task scheduler on first API call ensureSchedulerRunning(); +// Clean up any session locks left behind by crashes +try { const cleaned = cleanupStaleLocks(); if (cleaned > 0) console.log(`[chat API] Cleaned up ${cleaned} stale session lock(s)`); } catch { /* best effort */ } import crypto from 'crypto'; import fs from 'fs'; import path from 'path'; @@ -351,13 +353,25 @@ export async function POST(request: NextRequest) { systemPromptLength: finalSystemPrompt?.length || 0, systemPromptFirst200: finalSystemPrompt?.slice(0, 200) || 'none', }); + // Validate working directory exists — stale sessions may reference deleted directories + const sessionCwd = session.sdk_cwd || session.working_directory || undefined; + if (sessionCwd && !fs.existsSync(sessionCwd)) { + console.warn(`[chat API] Working directory does not exist: ${sessionCwd}`); + releaseSessionLock(session_id, lockId); + setSessionRuntimeStatus(session_id, 'idle'); + return new Response( + JSON.stringify({ error: `Working directory no longer exists: ${sessionCwd}`, code: 'INVALID_CWD' }), + { status: 400, headers: { 'Content-Type': 'application/json' } }, + ); + } + const stream = streamClaude({ prompt: content, sessionId: session_id, sdkSessionId: session.sdk_session_id || undefined, model: resolved.upstreamModel || resolved.model || effectiveModel, systemPrompt: finalSystemPrompt, - workingDirectory: session.sdk_cwd || session.working_directory || undefined, + workingDirectory: sessionCwd, abortController, permissionMode, files: fileAttachments, diff --git a/src/app/api/chat/sessions/[id]/route.ts b/src/app/api/chat/sessions/[id]/route.ts index d9322255..7564d721 100644 --- a/src/app/api/chat/sessions/[id]/route.ts +++ b/src/app/api/chat/sessions/[id]/route.ts @@ -1,5 +1,5 @@ import { NextRequest } from 'next/server'; -import { deleteSession, getSession, updateSessionWorkingDirectory, updateSessionTitle, updateSessionMode, updateSessionModel, updateSessionProviderId, clearSessionMessages, updateSdkSessionId, updateSessionPermissionProfile } from '@/lib/db'; +import { deleteSession, getSession, updateSessionWorkingDirectory, updateSessionTitle, updateSessionMode, updateSessionModel, updateSessionProviderId, clearSessionMessages, updateSdkSessionId, updateSessionPermissionProfile, forceReleaseSessionLock } from '@/lib/db'; import { autoApprovePendingForSession } from '@/lib/bridge/permission-broker'; export async function GET( @@ -88,6 +88,9 @@ export async function PATCH( if (body.clear_messages) { clearSessionMessages(id); } + if (body.force_unlock) { + forceReleaseSessionLock(id); + } const updated = getSession(id); return Response.json({ session: updated }); diff --git a/src/lib/agent-loop.ts b/src/lib/agent-loop.ts index 24f689bc..e60bd3cc 100644 --- a/src/lib/agent-loop.ts +++ b/src/lib/agent-loop.ts @@ -15,7 +15,7 @@ import type { SSEEvent, TokenUsage } from '@/types'; import { createModel } from './ai-provider'; import { assembleTools, READ_ONLY_TOOLS } from './agent-tools'; import { reportNativeError } from './error-classifier'; -import { pruneOldToolResults } from './context-pruner'; +import { pruneOldToolResults, estimateTokens } from './context-pruner'; import { emit as emitEvent } from './runtime/event-bus'; import { createCheckpoint } from './file-checkpoint'; import type { PermissionMode } from './permission-checker'; @@ -168,9 +168,25 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream sessionModel, }); - // 2. Load conversation history from DB - const { messages: dbMessages } = getMessages(sessionId, { limit: 200, excludeHeartbeatAck: true }); - const historyMessages = buildCoreMessages(dbMessages); + // 2. Load conversation history from DB with token-aware limiting. + // Start with 200 messages, then reduce if the context exceeds the model's window. + // This prevents context overflow that causes the model to degrade and output + // tool calls as text instead of using the tool API. + const contextWindowTokens = context1m ? 1_000_000 : 200_000; + const maxContextTokens = Math.floor(contextWindowTokens * 0.75); // Reserve 25% for response + let msgLimit = 200; + const { messages: dbMessages } = getMessages(sessionId, { limit: msgLimit, excludeHeartbeatAck: true }); + let historyMessages = buildCoreMessages(dbMessages); + { + let estimatedTokens = estimateTokens(historyMessages); + while (estimatedTokens > maxContextTokens && msgLimit > 20) { + msgLimit = Math.max(20, Math.floor(msgLimit * 0.6)); + console.log(`[agent-loop] Context too large (${estimatedTokens} tokens est.), reducing to ${msgLimit} messages`); + const { messages: reducedMsgs } = getMessages(sessionId, { limit: msgLimit, excludeHeartbeatAck: true }); + historyMessages = buildCoreMessages(reducedMsgs); + estimatedTokens = estimateTokens(historyMessages); + } + } // The chat route persists the user message to DB BEFORE calling us, // so for normal messages it's already the last entry in historyMessages. @@ -219,7 +235,8 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream onRuntimeStatusChange?.('streaming'); let step = 0; const totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 }; - let lastToolNames: string[] = []; // for doom loop detection + let lastToolKey = ''; // for doom loop detection + let doomLoopCount = 0; let messages = historyMessages; while (step < maxSteps) { @@ -419,15 +436,25 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream break; } - // Doom loop detection: same tool(s) called 3 times in a row + // Doom loop detection: same tool(s) called N times in a row → break const toolKey = stepToolNames.sort().join(','); - const lastKey = lastToolNames.sort().join(','); - if (toolKey === lastKey) { - const repeatCount = (step > 1) ? DOOM_LOOP_THRESHOLD : 1; - // Simple heuristic: track repeats via a counter we'd need to add - // For now, just detect immediate repeats and break after threshold + if (toolKey === lastToolKey) { + doomLoopCount++; + if (doomLoopCount >= DOOM_LOOP_THRESHOLD) { + console.warn(`[agent-loop] Doom loop detected: "${toolKey}" called ${doomLoopCount + 1} times in a row — breaking`); + controller.enqueue(formatSSE({ + type: 'error', + data: JSON.stringify({ + category: 'DOOM_LOOP', + userMessage: `Agent stopped: tool "${toolKey}" was called ${doomLoopCount + 1} times in a row with no progress.`, + }), + })); + break; + } + } else { + doomLoopCount = 0; } - lastToolNames = stepToolNames; + lastToolKey = toolKey; // Update messages for next iteration. // streamText returns the full message list including our input + model response. diff --git a/src/lib/claude-client.ts b/src/lib/claude-client.ts index 3ae4d048..cb7573da 100644 --- a/src/lib/claude-client.ts +++ b/src/lib/claude-client.ts @@ -300,7 +300,7 @@ function buildFallbackContext(params: { } lines.push(''); - lines.push('(This is a summary of earlier conversation turns for context. Tool calls shown here were already executed — do not repeat them or output their markers as text.)'); + lines.push('(This is a summary of earlier conversation turns for context. Tool calls shown here were already executed — do not repeat them or output their markers as text. NEVER output "[Tool call: ...]" brackets as text. Use the actual tool API to execute tools.)'); for (const msg of selected) { lines.push(`${msg.role === 'user' ? 'Human' : 'Assistant'}: ${msg.content}`); } @@ -1181,9 +1181,9 @@ export function streamClaudeSdk(options: ClaudeStreamOptions): ReadableStream c.type === 'text') - .map((c: { text?: string }) => c.text) + ? (block.content as Array<{ type: string; text?: string }>) + .filter((c) => c.type === 'text') + .map((c) => c.text) .join('\n') : String(block.content ?? ''); diff --git a/src/lib/context-pruner.ts b/src/lib/context-pruner.ts index d3b82e02..b7717873 100644 --- a/src/lib/context-pruner.ts +++ b/src/lib/context-pruner.ts @@ -9,8 +9,7 @@ import type { ModelMessage } from 'ai'; -const RECENT_TURNS_TO_KEEP = 6; // Keep last N messages fully intact -const TRUNCATED_RESULT_MARKER = '[Tool result truncated — see earlier in conversation]'; +const RECENT_TURNS_TO_KEEP = 16; // Keep last N messages fully intact (~8 exchanges) /** * Prune old tool results from message history to reduce token usage. @@ -37,9 +36,14 @@ export function pruneOldToolResults(messages: ModelMessage[]): ModelMessage[] { ...msg, content: (msg.content as Array<{ type: string; [k: string]: unknown }>).map((part) => { if (part.type === 'tool-result') { + const toolName = ('toolName' in part && typeof part.toolName === 'string') ? part.toolName : 'unknown'; + const original = ('output' in part && part.output && typeof part.output === 'object' && 'value' in (part.output as Record)) + ? String((part.output as Record).value) : ''; + const excerpt = original.slice(0, 200); + const marker = `[Pruned ${toolName} result${excerpt ? ': ' + excerpt + (original.length > 200 ? '...' : '') : ''}]`; return { ...part, - output: { type: 'text' as const, value: TRUNCATED_RESULT_MARKER }, + output: { type: 'text' as const, value: marker }, }; } return part; diff --git a/src/lib/db.ts b/src/lib/db.ts index e9bebf40..ce1af918 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -1925,6 +1925,38 @@ export function releaseSessionLock(sessionId: string, lockId: string): boolean { return result.changes > 0; } +/** + * Clean up all expired or stale session locks. + * Call on startup to recover from crashes that left locks behind. + */ +export function cleanupStaleLocks(): number { + const db = getDb(); + const now = new Date().toISOString().replace('T', ' ').split('.')[0]; + const result = db.prepare('DELETE FROM session_runtime_locks WHERE expires_at < ?').run(now); + if (result.changes > 0) { + // Reset runtime status for sessions that were left in 'running' state + db.prepare( + "UPDATE chat_sessions SET runtime_status = 'idle', runtime_error = 'Recovered from stale lock' WHERE runtime_status = 'running'" + ).run(); + } + return result.changes; +} + +/** + * Force-release a session lock regardless of lock_id. + * Use when the UI needs to break a stuck session. + */ +export function forceReleaseSessionLock(sessionId: string): boolean { + const db = getDb(); + const result = db.prepare( + 'DELETE FROM session_runtime_locks WHERE session_id = ?' + ).run(sessionId); + if (result.changes > 0) { + setSessionRuntimeStatus(sessionId, 'idle', 'Force-released by user'); + } + return result.changes > 0; +} + /** * Update the runtime status of a session. */ diff --git a/src/lib/message-builder.ts b/src/lib/message-builder.ts index ac6cdb84..1f5ca20e 100644 --- a/src/lib/message-builder.ts +++ b/src/lib/message-builder.ts @@ -67,7 +67,8 @@ export function buildCoreMessages(dbMessages: Message[]): ModelMessage[] { /** * Ensure messages alternate between user and assistant/tool roles. - * Consecutive user messages are merged. Consecutive assistant messages keep the last. + * Consecutive user messages are merged. Consecutive assistant messages are merged + * (preserving all content parts) to avoid silently dropping tool call history. */ function enforceAlternation(messages: ModelMessage[]): ModelMessage[] { if (messages.length <= 1) return messages; @@ -79,11 +80,12 @@ function enforceAlternation(messages: ModelMessage[]): ModelMessage[] { const curr = messages[i]; if (curr.role === prev.role && curr.role === 'user') { - // Merge consecutive user messages, preserving multi-part content result[result.length - 1] = { role: 'user', content: mergeUserContent(prev.content, curr.content) }; } else if (curr.role === prev.role && curr.role === 'assistant') { - // Keep the later assistant message (more recent) - result[result.length - 1] = curr; + result[result.length - 1] = { + role: 'assistant', + content: mergeAssistantContent(prev.content, curr.content), + } as AssistantModelMessage; } else { result.push(curr); } @@ -92,6 +94,51 @@ function enforceAlternation(messages: ModelMessage[]): ModelMessage[] { return result; } +// ── Sanitization ──────────────────────────────────────────────── + +/** + * Detect and clean assistant text blocks that contain fake tool call syntax. + * When the model outputs "(used Bash: {...})" as text instead of making a real + * tool_use API call, those patterns pollute the conversation history and cause + * the model to imitate them on future turns (feedback loop). + * + * Also strips leaked thinking tags like "(antml:thinking>..." which indicate + * protocol confusion from context overload. + */ +function sanitizeAssistantText(text: string): string { + // Strip fake tool call patterns: (used ToolName: {json...}) and [Tool call: Name — ...] + let cleaned = text.replace(/\(used\s+\w+:\s*\{[^}]*\}[^)]*\)/g, ''); + cleaned = cleaned.replace(/\[Tool call:\s+\w+\s*—[^\]]*\]/g, ''); + // Strip leaked thinking tags: (antml:thinking>...) or + cleaned = cleaned.replace(/\(antml:thinking>[\s\S]*?(?:<\/antml:thinking>|\))/g, ''); + cleaned = cleaned.replace(/<\/?antml:thinking>/g, ''); + // Collapse multiple blank lines left by stripping + cleaned = cleaned.replace(/\n{3,}/g, '\n\n').trim(); + return cleaned; +} + +/** + * Check if a text block consists entirely (or almost entirely) of fake tool calls. + * If so, replace with a brief marker instead of sending garbage to the model. + * Only sanitize if >80% of the content appears to be fake tool calls. + */ +function cleanAssistantTextBlock(text: string): string { + const originalLength = text.trim().length; + if (originalLength < 50) return text; // Don't sanitize short texts + + const sanitized = sanitizeAssistantText(text); + const removedLength = originalLength - sanitized.length; + const removalRatio = removedLength / originalLength; + + // Only apply sanitization if >80% was fake tool calls + if (removalRatio > 0.8) { + return sanitized || '[Tool calls in this turn were not executed]'; + } + + // Otherwise return original text untouched + return text; +} + // ── Internal ──────────────────────────────────────────────────── /** @@ -111,6 +158,19 @@ function mergeUserContent(a: any, b: any): any { return merged; } +/** + * Merge two assistant message contents, preserving all content parts (text, tool-call, etc.). + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function mergeAssistantContent(a: any, b: any): Exclude { + const toArray = (c: unknown): Exclude => { + if (typeof c === 'string') return c.trim() ? [{ type: 'text' as const, text: c }] : []; + if (Array.isArray(c)) return c as Exclude; + return []; + }; + return [...toArray(a), ...toArray(b)]; +} + /** * Parse user message content, rebuilding file attachments as multi-modal content parts. * File metadata is stored as `text`. @@ -229,7 +289,8 @@ function convertAssistantBlocks(blocks: MessageContentBlock[]): ModelMessage[] { flushToolResults(); } if (block.text.trim()) { - assistantParts.push({ type: 'text', text: block.text }); + const cleaned = cleanAssistantTextBlock(block.text); + assistantParts.push({ type: 'text', text: cleaned }); } break; diff --git a/src/lib/message-normalizer.ts b/src/lib/message-normalizer.ts index 00a113a3..215c9da1 100644 --- a/src/lib/message-normalizer.ts +++ b/src/lib/message-normalizer.ts @@ -43,11 +43,10 @@ export function normalizeMessageContent(role: string, raw: string): string { } else if (b.type === 'text' && b.text) { parts.push(b.text); } else if (b.type === 'tool_use') { - // Keep a brief summary of tool usage (name + truncated input) const name = b.name || 'unknown_tool'; const inputStr = typeof b.input === 'object' ? JSON.stringify(b.input) : String(b.input || ''); const truncated = inputStr.length > 80 ? inputStr.slice(0, 80) + '...' : inputStr; - parts.push(`(used ${name}: ${truncated})`); + parts.push(`[Tool call: ${name} — ${truncated}]`); } // tool_result blocks are skipped — the summary above captures intent } diff --git a/src/lib/tools/bash.ts b/src/lib/tools/bash.ts index 5c3f9b52..3044bc4a 100644 --- a/src/lib/tools/bash.ts +++ b/src/lib/tools/bash.ts @@ -5,6 +5,7 @@ import { tool } from 'ai'; import { z } from 'zod'; import { spawn } from 'child_process'; +import fs from 'fs'; import type { ToolContext } from './index'; const MAX_OUTPUT_BYTES = 1024 * 1024; // 1MB @@ -24,14 +25,21 @@ export function createBashTool(ctx: ToolContext) { }), execute: async ({ command, timeout }, { abortSignal }) => { const timeoutMs = timeout ?? DEFAULT_TIMEOUT_MS; + const cwd = ctx.workingDirectory; + + // Validate working directory exists before spawning + if (!fs.existsSync(cwd)) { + return `Error: working directory does not exist: ${cwd}`; + } return new Promise((resolve) => { const chunks: Buffer[] = []; let totalBytes = 0; let truncated = false; + let killTimer: ReturnType | null = null; const proc = spawn('bash', ['-c', command], { - cwd: ctx.workingDirectory, + cwd, env: { ...process.env, TERM: 'dumb' }, stdio: ['ignore', 'pipe', 'pipe'], timeout: timeoutMs, @@ -48,22 +56,41 @@ export function createBashTool(ctx: ToolContext) { } }; - proc.stdout?.on('data', collect); - proc.stderr?.on('data', collect); + // Stream tool output to the client as it arrives + const streamOutput = (data: Buffer) => { + if (ctx.emitSSE) { + try { + ctx.emitSSE({ type: 'tool_output', data: data.toString('utf-8') }); + } catch { /* stream closed */ } + } + }; + + proc.stdout?.on('data', (data: Buffer) => { + collect(data); + streamOutput(data); + }); + proc.stderr?.on('data', (data: Buffer) => { + collect(data); + streamOutput(data); + }); // Handle abort const onAbort = () => { proc.kill('SIGTERM'); - setTimeout(() => proc.kill('SIGKILL'), 3000); + killTimer = setTimeout(() => { + try { proc.kill('SIGKILL'); } catch { /* already dead */ } + }, 3000); }; abortSignal?.addEventListener('abort', onAbort, { once: true }); proc.on('close', (code, signal) => { abortSignal?.removeEventListener('abort', onAbort); + if (killTimer) clearTimeout(killTimer); let output = Buffer.concat(chunks).toString('utf-8'); if (truncated) { - output += '\n\n[Output truncated — exceeded 1MB limit]'; + const droppedBytes = totalBytes - MAX_OUTPUT_BYTES; + output += `\n\n[Output truncated at 1MB — ${droppedBytes} bytes dropped]`; } if (signal === 'SIGTERM' || signal === 'SIGKILL') { @@ -78,6 +105,7 @@ export function createBashTool(ctx: ToolContext) { }); proc.on('error', (err) => { + if (killTimer) clearTimeout(killTimer); resolve(`Error executing command: ${err.message}`); }); });