op7418 · ayourk · Apr 11, 2026
diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest } from 'next/server';
 import { streamClaude } from '@/lib/claude-client';
-import { addMessage, getMessages, getSession, getSessionSummary, updateSessionTitle, updateSdkSessionId, updateSessionModel, updateSessionProvider, updateSessionProviderId, getSetting, acquireSessionLock, renewSessionLock, releaseSessionLock, setSessionRuntimeStatus, syncSdkTasks } from '@/lib/db';
+import { addMessage, getMessages, getSession, getSessionSummary, updateSessionTitle, updateSdkSessionId, updateSessionModel, updateSessionProvider, updateSessionProviderId, getSetting, acquireSessionLock, renewSessionLock, releaseSessionLock, setSessionRuntimeStatus, syncSdkTasks, cleanupStaleLocks } from '@/lib/db';
 import { resolveProvider as resolveProviderUnified } from '@/lib/provider-resolver';
 import { notifySessionStart, notifySessionComplete, notifySessionError } from '@/lib/telegram-bot';
 import { extractCompletion } from '@/lib/onboarding-completion';
@@ -12,6 +12,8 @@ import { ensureSchedulerRunning } from '@/lib/task-scheduler';
 
 // Start the task scheduler on first API call
 ensureSchedulerRunning();
+// Clean up any session locks left behind by crashes
+try { const cleaned = cleanupStaleLocks(); if (cleaned > 0) console.log(`[chat API] Cleaned up ${cleaned} stale session lock(s)`); } catch { /* best effort */ }
 import crypto from 'crypto';
 import fs from 'fs';
 import path from 'path';
@@ -351,13 +353,25 @@ export async function POST(request: NextRequest) {
       systemPromptLength: finalSystemPrompt?.length || 0,
       systemPromptFirst200: finalSystemPrompt?.slice(0, 200) || 'none',
     });
+    // Validate working directory exists — stale sessions may reference deleted directories
+    const sessionCwd = session.sdk_cwd || session.working_directory || undefined;
+    if (sessionCwd && !fs.existsSync(sessionCwd)) {
+      console.warn(`[chat API] Working directory does not exist: ${sessionCwd}`);
+      releaseSessionLock(session_id, lockId);
+      setSessionRuntimeStatus(session_id, 'idle');
+      return new Response(
+        JSON.stringify({ error: `Working directory no longer exists: ${sessionCwd}`, code: 'INVALID_CWD' }),
+        { status: 400, headers: { 'Content-Type': 'application/json' } },
+      );
+    }
+
     const stream = streamClaude({
       prompt: content,
       sessionId: session_id,
       sdkSessionId: session.sdk_session_id || undefined,
       model: resolved.upstreamModel || resolved.model || effectiveModel,
       systemPrompt: finalSystemPrompt,
-      workingDirectory: session.sdk_cwd || session.working_directory || undefined,
+      workingDirectory: sessionCwd,
       abortController,
       permissionMode,
       files: fileAttachments,

diff --git a/src/app/api/chat/sessions/[id]/route.ts b/src/app/api/chat/sessions/[id]/route.ts
@@ -1,5 +1,5 @@
 import { NextRequest } from 'next/server';
-import { deleteSession, getSession, updateSessionWorkingDirectory, updateSessionTitle, updateSessionMode, updateSessionModel, updateSessionProviderId, clearSessionMessages, updateSdkSessionId, updateSessionPermissionProfile } from '@/lib/db';
+import { deleteSession, getSession, updateSessionWorkingDirectory, updateSessionTitle, updateSessionMode, updateSessionModel, updateSessionProviderId, clearSessionMessages, updateSdkSessionId, updateSessionPermissionProfile, forceReleaseSessionLock } from '@/lib/db';
 import { autoApprovePendingForSession } from '@/lib/bridge/permission-broker';
 
 export async function GET(
@@ -88,6 +88,9 @@ export async function PATCH(
     if (body.clear_messages) {
       clearSessionMessages(id);
     }
+    if (body.force_unlock) {
+      forceReleaseSessionLock(id);
+    }
 
     const updated = getSession(id);
     return Response.json({ session: updated });

diff --git a/src/lib/agent-loop.ts b/src/lib/agent-loop.ts
@@ -15,7 +15,7 @@ import type { SSEEvent, TokenUsage } from '@/types';
 import { createModel } from './ai-provider';
 import { assembleTools, READ_ONLY_TOOLS } from './agent-tools';
 import { reportNativeError } from './error-classifier';
-import { pruneOldToolResults } from './context-pruner';
+import { pruneOldToolResults, estimateTokens } from './context-pruner';
 import { emit as emitEvent } from './runtime/event-bus';
 import { createCheckpoint } from './file-checkpoint';
 import type { PermissionMode } from './permission-checker';
@@ -168,9 +168,25 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream<string>
           sessionModel,
         });
 
-        // 2. Load conversation history from DB
-        const { messages: dbMessages } = getMessages(sessionId, { limit: 200, excludeHeartbeatAck: true });
-        const historyMessages = buildCoreMessages(dbMessages);
+        // 2. Load conversation history from DB with token-aware limiting.
+        // Start with 200 messages, then reduce if the context exceeds the model's window.
+        // This prevents context overflow that causes the model to degrade and output
+        // tool calls as text instead of using the tool API.
+        const contextWindowTokens = context1m ? 1_000_000 : 200_000;
+        const maxContextTokens = Math.floor(contextWindowTokens * 0.75); // Reserve 25% for response
+        let msgLimit = 200;
+        const { messages: dbMessages } = getMessages(sessionId, { limit: msgLimit, excludeHeartbeatAck: true });
+        let historyMessages = buildCoreMessages(dbMessages);
+        {
+          let estimatedTokens = estimateTokens(historyMessages);
+          while (estimatedTokens > maxContextTokens && msgLimit > 20) {
+            msgLimit = Math.max(20, Math.floor(msgLimit * 0.6));
+            console.log(`[agent-loop] Context too large (${estimatedTokens} tokens est.), reducing to ${msgLimit} messages`);
+            const { messages: reducedMsgs } = getMessages(sessionId, { limit: msgLimit, excludeHeartbeatAck: true });
+            historyMessages = buildCoreMessages(reducedMsgs);
+            estimatedTokens = estimateTokens(historyMessages);
+          }
+        }
 
         // The chat route persists the user message to DB BEFORE calling us,
         // so for normal messages it's already the last entry in historyMessages.
@@ -219,7 +235,8 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream<string>
         onRuntimeStatusChange?.('streaming');
         let step = 0;
         const totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 };
-        let lastToolNames: string[] = []; // for doom loop detection
+        let lastToolKey = ''; // for doom loop detection
+        let doomLoopCount = 0;
         let messages = historyMessages;
 
         while (step < maxSteps) {
@@ -419,15 +436,25 @@ export function runAgentLoop(options: AgentLoopOptions): ReadableStream<string>
             break;
           }
 
-          // Doom loop detection: same tool(s) called 3 times in a row
+          // Doom loop detection: same tool(s) called N times in a row → break
           const toolKey = stepToolNames.sort().join(',');
-          const lastKey = lastToolNames.sort().join(',');
-          if (toolKey === lastKey) {
-            const repeatCount = (step > 1) ? DOOM_LOOP_THRESHOLD : 1;
-            // Simple heuristic: track repeats via a counter we'd need to add
-            // For now, just detect immediate repeats and break after threshold
+          if (toolKey === lastToolKey) {
+            doomLoopCount++;
+            if (doomLoopCount >= DOOM_LOOP_THRESHOLD) {
+              console.warn(`[agent-loop] Doom loop detected: "${toolKey}" called ${doomLoopCount + 1} times in a row — breaking`);
+              controller.enqueue(formatSSE({
+                type: 'error',
+                data: JSON.stringify({
+                  category: 'DOOM_LOOP',
+                  userMessage: `Agent stopped: tool "${toolKey}" was called ${doomLoopCount + 1} times in a row with no progress.`,
+                }),
+              }));
+              break;
+            }
+          } else {
+            doomLoopCount = 0;
           }
-          lastToolNames = stepToolNames;
+          lastToolKey = toolKey;
 
           // Update messages for next iteration.
           // streamText returns the full message list including our input + model response.

diff --git a/src/lib/claude-client.ts b/src/lib/claude-client.ts
@@ -300,7 +300,7 @@ function buildFallbackContext(params: {
   }
 
   lines.push('<conversation_history>');
-  lines.push('(This is a summary of earlier conversation turns for context. Tool calls shown here were already executed — do not repeat them or output their markers as text.)');
+  lines.push('(This is a summary of earlier conversation turns for context. Tool calls shown here were already executed — do not repeat them or output their markers as text. NEVER output "[Tool call: ...]" brackets as text. Use the actual tool API to execute tools.)');
   for (const msg of selected) {
     lines.push(`${msg.role === 'user' ? 'Human' : 'Assistant'}: ${msg.content}`);
   }
@@ -1181,9 +1181,9 @@ export function streamClaudeSdk(options: ClaudeStreamOptions): ReadableStream<st
                     let resultContent = typeof block.content === 'string'
                       ? block.content
                       : Array.isArray(block.content)
-                        ? block.content
-                            .filter((c: { type: string }) => c.type === 'text')
-                            .map((c: { text?: string }) => c.text)
+                        ? (block.content as Array<{ type: string; text?: string }>)
+                            .filter((c) => c.type === 'text')
+                            .map((c) => c.text)
                             .join('\n')
                         : String(block.content ?? '');
 

diff --git a/src/lib/context-pruner.ts b/src/lib/context-pruner.ts
@@ -9,8 +9,7 @@
 
 import type { ModelMessage } from 'ai';
 
-const RECENT_TURNS_TO_KEEP = 6; // Keep last N messages fully intact
-const TRUNCATED_RESULT_MARKER = '[Tool result truncated — see earlier in conversation]';
+const RECENT_TURNS_TO_KEEP = 16; // Keep last N messages fully intact (~8 exchanges)
 
 /**
  * Prune old tool results from message history to reduce token usage.
@@ -37,9 +36,14 @@ export function pruneOldToolResults(messages: ModelMessage[]): ModelMessage[] {
         ...msg,
         content: (msg.content as Array<{ type: string; [k: string]: unknown }>).map((part) => {
           if (part.type === 'tool-result') {
+            const toolName = ('toolName' in part && typeof part.toolName === 'string') ? part.toolName : 'unknown';
+            const original = ('output' in part && part.output && typeof part.output === 'object' && 'value' in (part.output as Record<string, unknown>))
+              ? String((part.output as Record<string, string>).value) : '';
+            const excerpt = original.slice(0, 200);
+            const marker = `[Pruned ${toolName} result${excerpt ? ': ' + excerpt + (original.length > 200 ? '...' : '') : ''}]`;
             return {
               ...part,
-              output: { type: 'text' as const, value: TRUNCATED_RESULT_MARKER },
+              output: { type: 'text' as const, value: marker },
             };
           }
           return part;

diff --git a/src/lib/db.ts b/src/lib/db.ts
@@ -1925,6 +1925,38 @@ export function releaseSessionLock(sessionId: string, lockId: string): boolean {
   return result.changes > 0;
 }
 
+/**
+ * Clean up all expired or stale session locks.
+ * Call on startup to recover from crashes that left locks behind.
+ */
+export function cleanupStaleLocks(): number {
+  const db = getDb();
+  const now = new Date().toISOString().replace('T', ' ').split('.')[0];
+  const result = db.prepare('DELETE FROM session_runtime_locks WHERE expires_at < ?').run(now);
+  if (result.changes > 0) {
+    // Reset runtime status for sessions that were left in 'running' state
+    db.prepare(
+      "UPDATE chat_sessions SET runtime_status = 'idle', runtime_error = 'Recovered from stale lock' WHERE runtime_status = 'running'"
+    ).run();
+  }
+  return result.changes;
+}
+
+/**
+ * Force-release a session lock regardless of lock_id.
+ * Use when the UI needs to break a stuck session.
+ */
+export function forceReleaseSessionLock(sessionId: string): boolean {
+  const db = getDb();
+  const result = db.prepare(
+    'DELETE FROM session_runtime_locks WHERE session_id = ?'
+  ).run(sessionId);
+  if (result.changes > 0) {
+    setSessionRuntimeStatus(sessionId, 'idle', 'Force-released by user');
+  }
+  return result.changes > 0;
+}
+
 /**
  * Update the runtime status of a session.
  */

diff --git a/src/lib/message-builder.ts b/src/lib/message-builder.ts
@@ -67,7 +67,8 @@ export function buildCoreMessages(dbMessages: Message[]): ModelMessage[] {
 
 /**
  * Ensure messages alternate between user and assistant/tool roles.
- * Consecutive user messages are merged. Consecutive assistant messages keep the last.
+ * Consecutive user messages are merged. Consecutive assistant messages are merged
+ * (preserving all content parts) to avoid silently dropping tool call history.
  */
 function enforceAlternation(messages: ModelMessage[]): ModelMessage[] {
   if (messages.length <= 1) return messages;
@@ -79,11 +80,12 @@ function enforceAlternation(messages: ModelMessage[]): ModelMessage[] {
     const curr = messages[i];
 
     if (curr.role === prev.role && curr.role === 'user') {
-      // Merge consecutive user messages, preserving multi-part content
       result[result.length - 1] = { role: 'user', content: mergeUserContent(prev.content, curr.content) };
     } else if (curr.role === prev.role && curr.role === 'assistant') {
-      // Keep the later assistant message (more recent)
-      result[result.length - 1] = curr;
+      result[result.length - 1] = {
+        role: 'assistant',
+        content: mergeAssistantContent(prev.content, curr.content),
+      } as AssistantModelMessage;
     } else {
       result.push(curr);
     }
@@ -92,6 +94,51 @@ function enforceAlternation(messages: ModelMessage[]): ModelMessage[] {
   return result;
 }
 
+// ── Sanitization ────────────────────────────────────────────────
+
+/**
+ * Detect and clean assistant text blocks that contain fake tool call syntax.
+ * When the model outputs "(used Bash: {...})" as text instead of making a real
+ * tool_use API call, those patterns pollute the conversation history and cause
+ * the model to imitate them on future turns (feedback loop).
+ *
+ * Also strips leaked thinking tags like "(antml:thinking>..." which indicate
+ * protocol confusion from context overload.
+ */
+function sanitizeAssistantText(text: string): string {
+  // Strip fake tool call patterns: (used ToolName: {json...}) and [Tool call: Name — ...]
+  let cleaned = text.replace(/\(used\s+\w+:\s*\{[^}]*\}[^)]*\)/g, '');
+  cleaned = cleaned.replace(/\[Tool call:\s+\w+\s*—[^\]]*\]/g, '');
+  // Strip leaked thinking tags: (antml:thinking>...) or </thinking>
+  cleaned = cleaned.replace(/\(antml:thinking>[\s\S]*?(?:<\/antml:thinking>|\))/g, '');
+  cleaned = cleaned.replace(/<\/?antml:thinking>/g, '');
+  // Collapse multiple blank lines left by stripping
+  cleaned = cleaned.replace(/\n{3,}/g, '\n\n').trim();
+  return cleaned;
+}
+
+/**
+ * Check if a text block consists entirely (or almost entirely) of fake tool calls.
+ * If so, replace with a brief marker instead of sending garbage to the model.
+ * Only sanitize if >80% of the content appears to be fake tool calls.
+ */
+function cleanAssistantTextBlock(text: string): string {
+  const originalLength = text.trim().length;
+  if (originalLength < 50) return text; // Don't sanitize short texts
+
+  const sanitized = sanitizeAssistantText(text);
+  const removedLength = originalLength - sanitized.length;
+  const removalRatio = removedLength / originalLength;
+
+  // Only apply sanitization if >80% was fake tool calls
+  if (removalRatio > 0.8) {
+    return sanitized || '[Tool calls in this turn were not executed]';
+  }
+
+  // Otherwise return original text untouched
+  return text;
+}
+
 // ── Internal ────────────────────────────────────────────────────
 
 /**
@@ -111,6 +158,19 @@ function mergeUserContent(a: any, b: any): any {
   return merged;
 }
 
+/**
+ * Merge two assistant message contents, preserving all content parts (text, tool-call, etc.).
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function mergeAssistantContent(a: any, b: any): Exclude<AssistantContent, string> {
+  const toArray = (c: unknown): Exclude<AssistantContent, string> => {
+    if (typeof c === 'string') return c.trim() ? [{ type: 'text' as const, text: c }] : [];
+    if (Array.isArray(c)) return c as Exclude<AssistantContent, string>;
+    return [];
+  };
+  return [...toArray(a), ...toArray(b)];
+}
+
 /**
  * Parse user message content, rebuilding file attachments as multi-modal content parts.
  * File metadata is stored as `<!--files:[{id,name,type,size,filePath}]-->text`.
@@ -229,7 +289,8 @@ function convertAssistantBlocks(blocks: MessageContentBlock[]): ModelMessage[] {
           flushToolResults();
         }
         if (block.text.trim()) {
-          assistantParts.push({ type: 'text', text: block.text });
+          const cleaned = cleanAssistantTextBlock(block.text);
+          assistantParts.push({ type: 'text', text: cleaned });
         }
         break;
 

diff --git a/src/lib/message-normalizer.ts b/src/lib/message-normalizer.ts
@@ -43,11 +43,10 @@ export function normalizeMessageContent(role: string, raw: string): string {
         } else if (b.type === 'text' && b.text) {
           parts.push(b.text);
         } else if (b.type === 'tool_use') {
-          // Keep a brief summary of tool usage (name + truncated input)
           const name = b.name || 'unknown_tool';
           const inputStr = typeof b.input === 'object' ? JSON.stringify(b.input) : String(b.input || '');
           const truncated = inputStr.length > 80 ? inputStr.slice(0, 80) + '...' : inputStr;
-          parts.push(`(used ${name}: ${truncated})`);
+          parts.push(`[Tool call: ${name} — ${truncated}]`);
         }
         // tool_result blocks are skipped — the summary above captures intent
       }