From 08da137ddc6ea94e31f47b9eb71d6c1cfc75a1f8 Mon Sep 17 00:00:00 2001
From: best <hczhang@linux.com>
Date: Mon, 23 Feb 2026 12:21:17 +0800
Subject: [PATCH] feat(ai): auto-strip <think> reasoning tags in OpenAI adapter

Models like DeepSeek emit <think>...</think> blocks containing
chain-of-thought reasoning. This commit strips those tags from
response text and exposes the reasoning content separately:

- Add stripThinkTags() utility for non-streaming responses
- Add optional `reasoning` field to ModelResponse type
- Add ReasoningDelta stream event type for streaming responses
- Handle edge cases: multiple blocks, unclosed tags, case-insensitive
- 20 new unit tests covering utility, generate, and stream paths

Closes #45
---
 .../ai/__tests__/strip-think-tags.test.ts     | 363 ++++++++++++++++++
 packages/ai/src/adapters/openai-adapter.ts    | 163 +++++++-
 packages/ai/src/index.ts                      |   3 +
 packages/ai/src/stream.ts                     |   8 +-
 packages/ai/src/types.ts                      |   2 +
 5 files changed, 535 insertions(+), 4 deletions(-)
 create mode 100644 packages/ai/__tests__/strip-think-tags.test.ts
diff --git a/packages/ai/__tests__/strip-think-tags.test.ts b/packages/ai/__tests__/strip-think-tags.test.ts
new file mode 100644
index 0000000..9cbf19e
--- /dev/null
+++ b/packages/ai/__tests__/strip-think-tags.test.ts
@@ -0,0 +1,363 @@
+/**
+ * Tests for <think> tag stripping in the OpenAI adapter.
+ *
+ * Covers:
+ * - stripThinkTags() utility (single/multiple blocks, unclosed tags, edge cases)
+ * - doGenerate() integration (response.text stripped, response.reasoning populated)
+ * - parseSSEStream() integration (reasoning_delta events emitted for think content)
+ */
+
+import { describe, it, expect, vi, afterEach } from "vitest";
+import {
+  stripThinkTags,
+  OpenAIAdapter,
+} from "../src/adapters/openai-adapter.js";
+import type { ModelCapabilities } from "../src/types.js";
+import type { StreamEvent } from "../src/stream.js";
+
+// ---------------------------------------------------------------------------
+// stripThinkTags unit tests
+// ---------------------------------------------------------------------------
+
+describe("stripThinkTags", () => {
+  it("returns text unchanged when no think tags present", () => {
+    const result = stripThinkTags("Hello, world!");
+    expect(result.text).toBe("Hello, world!");
+    expect(result.reasoning).toBeNull();
+  });
+
+  it("strips a single <think> block", () => {
+    const input = "<think>Let me reason about this.</think>The answer is 42.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("The answer is 42.");
+    expect(result.reasoning).toBe("Let me reason about this.");
+  });
+
+  it("strips multiple <think> blocks", () => {
+    const input =
+      "<think>First thought.</think>Hello <think>Second thought.</think>world!";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Hello world!");
+    expect(result.reasoning).toBe("First thought.\n\nSecond thought.");
+  });
+
+  it("handles unclosed <think> tag", () => {
+    const input = "Some text<think>unclosed reasoning";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Some text");
+    expect(result.reasoning).toBe("unclosed reasoning");
+  });
+
+  it("handles case-insensitive tags", () => {
+    const input = "<THINK>Reasoning here.</THINK>The answer.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("The answer.");
+    expect(result.reasoning).toBe("Reasoning here.");
+  });
+
+  it("handles mixed case tags", () => {
+    const input = "<Think>Reasoning.</Think>Output.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Output.");
+    expect(result.reasoning).toBe("Reasoning.");
+  });
+
+  it("handles multiline think content", () => {
+    const input = "<think>\nStep 1: Do X\nStep 2: Do Y\n</think>\nFinal answer.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Final answer.");
+    expect(result.reasoning).toBe("Step 1: Do X\nStep 2: Do Y");
+  });
+
+  it("handles empty think block", () => {
+    const input = "<think></think>Just text.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Just text.");
+    expect(result.reasoning).toBeNull();
+  });
+
+  it("handles think block with only whitespace", () => {
+    const input = "<think>   \n  </think>Answer.";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("Answer.");
+    expect(result.reasoning).toBeNull();
+  });
+
+  it("returns empty text when everything is inside think tags", () => {
+    const input = "<think>All reasoning, no output.</think>";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("");
+    expect(result.reasoning).toBe("All reasoning, no output.");
+  });
+
+  it("handles think block at the end of text", () => {
+    const input = "The answer is 42.<think>I computed this.</think>";
+    const result = stripThinkTags(input);
+    expect(result.text).toBe("The answer is 42.");
+    expect(result.reasoning).toBe("I computed this.");
+  });
+
+  it("handles empty string input", () => {
+    const result = stripThinkTags("");
+    expect(result.text).toBe("");
+    expect(result.reasoning).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// doGenerate integration — think tags stripped from response
+// ---------------------------------------------------------------------------
+
+/** Minimal concrete OpenAIAdapter for testing. */
+class TestOpenAIProvider extends OpenAIAdapter {
+  readonly name = "test-openai";
+  readonly capabilities: ModelCapabilities = {
+    streaming: true,
+    toolCalling: true,
+    structuredOutput: false,
+    systemMessages: true,
+    vision: false,
+  };
+
+  protected getDefaultBaseURL(): string {
+    return "https://api.test.com/v1";
+  }
+
+  protected requiresApiKey(): boolean {
+    return false;
+  }
+}
+
+describe("OpenAIAdapter.generate — think tag stripping", () => {
+  let fetchSpy: ReturnType<typeof vi.spyOn>;
+
+  afterEach(() => {
+    fetchSpy?.mockRestore();
+  });
+
+  function mockResponse(content: string | null) {
+    fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(
+        JSON.stringify({
+          id: "test",
+          choices: [
+            {
+              message: { role: "assistant", content },
+              finish_reason: "stop",
+            },
+          ],
+          usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+        }),
+      ),
+    );
+  }
+
+  it("strips think tags and populates reasoning field", async () => {
+    const provider = new TestOpenAIProvider();
+    mockResponse("<think>Internal reasoning.</think>The answer is 42.");
+
+    const result = await provider.generate(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    expect(result.text).toBe("The answer is 42.");
+    expect(result.reasoning).toBe("Internal reasoning.");
+  });
+
+  it("does not include reasoning field when no think tags", async () => {
+    const provider = new TestOpenAIProvider();
+    mockResponse("No reasoning here.");
+
+    const result = await provider.generate(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    expect(result.text).toBe("No reasoning here.");
+    expect(result.reasoning).toBeUndefined();
+  });
+
+  it("handles null content from API", async () => {
+    const provider = new TestOpenAIProvider();
+    mockResponse(null);
+
+    const result = await provider.generate(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    expect(result.text).toBeNull();
+    expect(result.reasoning).toBeUndefined();
+  });
+
+  it("sets text to null when all content is in think tags", async () => {
+    const provider = new TestOpenAIProvider();
+    mockResponse("<think>Only reasoning.</think>");
+
+    const result = await provider.generate(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    expect(result.text).toBeNull();
+    expect(result.reasoning).toBe("Only reasoning.");
+  });
+
+  it("strips think tags in generateWithTools too", async () => {
+    const provider = new TestOpenAIProvider();
+    mockResponse("<think>Thinking about tools.</think>Use the calculator.");
+
+    const result = await provider.generateWithTools(
+      [{ role: "user", content: "test" }],
+      [
+        {
+          name: "calc",
+          description: "Calculator",
+          parameters: { type: "object" },
+        },
+      ],
+      { modelName: "test-model" },
+    );
+
+    expect(result.text).toBe("Use the calculator.");
+    expect(result.reasoning).toBe("Thinking about tools.");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// parseSSEStream integration — think tags routed to reasoning_delta
+// ---------------------------------------------------------------------------
+
+describe("OpenAIAdapter.stream — think tag stripping", () => {
+  let fetchSpy: ReturnType<typeof vi.spyOn>;
+
+  afterEach(() => {
+    fetchSpy?.mockRestore();
+  });
+
+  function makeSSEChunk(content: string) {
+    return `data: ${JSON.stringify({
+      choices: [{ delta: { content }, finish_reason: null }],
+    })}\n\n`;
+  }
+
+  function makeSSEDone() {
+    return "data: [DONE]\n\n";
+  }
+
+  function mockStreamResponse(chunks: string[]) {
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream({
+      start(controller) {
+        for (const chunk of chunks) {
+          controller.enqueue(encoder.encode(chunk));
+        }
+        controller.close();
+      },
+    });
+
+    fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(stream, {
+        status: 200,
+        headers: { "Content-Type": "text/event-stream" },
+      }),
+    );
+  }
+
+  it("emits reasoning_delta for think tag content in stream", async () => {
+    const provider = new TestOpenAIProvider();
+    mockStreamResponse([
+      makeSSEChunk("<think>"),
+      makeSSEChunk("reasoning content"),
+      makeSSEChunk("</think>"),
+      makeSSEChunk("visible text"),
+      makeSSEDone(),
+    ]);
+
+    const result = await provider.stream(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    const events: StreamEvent[] = [];
+    for await (const event of result) {
+      events.push(event);
+    }
+
+    const textDeltas = events
+      .filter((e) => e.type === "text_delta")
+      .map((e) => (e as { text: string }).text)
+      .join("");
+    const reasoningDeltas = events
+      .filter((e) => e.type === "reasoning_delta")
+      .map((e) => (e as { text: string }).text)
+      .join("");
+
+    expect(textDeltas).toBe("visible text");
+    expect(reasoningDeltas).toBe("reasoning content");
+  });
+
+  it("handles stream with no think tags", async () => {
+    const provider = new TestOpenAIProvider();
+    mockStreamResponse([
+      makeSSEChunk("Hello "),
+      makeSSEChunk("world"),
+      makeSSEDone(),
+    ]);
+
+    const result = await provider.stream(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    const events: StreamEvent[] = [];
+    for await (const event of result) {
+      events.push(event);
+    }
+
+    const textDeltas = events
+      .filter((e) => e.type === "text_delta")
+      .map((e) => (e as { text: string }).text)
+      .join("");
+    const reasoningDeltas = events.filter((e) => e.type === "reasoning_delta");
+
+    expect(textDeltas).toBe("Hello world");
+    expect(reasoningDeltas).toHaveLength(0);
+  });
+
+  it("handles think tag split across chunks", async () => {
+    const provider = new TestOpenAIProvider();
+    mockStreamResponse([
+      makeSSEChunk("<thi"),
+      makeSSEChunk("nk>"),
+      makeSSEChunk("reasoning"),
+      makeSSEChunk("</thi"),
+      makeSSEChunk("nk>"),
+      makeSSEChunk("output"),
+      makeSSEDone(),
+    ]);
+
+    const result = await provider.stream(
+      [{ role: "user", content: "test" }],
+      { modelName: "test-model" },
+    );
+
+    const events: StreamEvent[] = [];
+    for await (const event of result) {
+      events.push(event);
+    }
+
+    const textDeltas = events
+      .filter((e) => e.type === "text_delta")
+      .map((e) => (e as { text: string }).text)
+      .join("");
+    const reasoningDeltas = events
+      .filter((e) => e.type === "reasoning_delta")
+      .map((e) => (e as { text: string }).text)
+      .join("");
+
+    expect(textDeltas).toBe("output");
+    expect(reasoningDeltas).toBe("reasoning");
+  });
+});
diff --git a/packages/ai/src/adapters/openai-adapter.ts b/packages/ai/src/adapters/openai-adapter.ts
index 57d895f..78f3d6f 100644
--- a/packages/ai/src/adapters/openai-adapter.ts
+++ b/packages/ai/src/adapters/openai-adapter.ts
@@ -25,6 +25,54 @@ import {
   TimeoutError,
 } from "../errors.js";
 
+// ---------------------------------------------------------------------------
+// Think-tag stripping
+// ---------------------------------------------------------------------------
+
+/**
+ * Result of stripping `<think>` tags from model output.
+ */
+export interface StripThinkTagsResult {
+  /** The text with all `<think>...</think>` blocks removed. */
+  text: string;
+  /** Concatenated reasoning content extracted from the blocks (null if none found). */
+  reasoning: string | null;
+}
+
+/**
+ * Strip `<think>...</think>` reasoning blocks from model output.
+ *
+ * Handles:
+ * - Multiple `<think>` blocks
+ * - Unclosed `<think>` tags (treats everything after the tag as reasoning)
+ * - Case-insensitive matching
+ * - Whitespace normalisation of the remaining text
+ */
+export function stripThinkTags(input: string): StripThinkTagsResult {
+  const reasoningParts: string[] = [];
+
+  // Match closed <think>...</think> blocks (case-insensitive, dotAll for newlines)
+  const closedPattern = /<think>([\s\S]*?)<\/think>/gi;
+  let stripped = input.replace(closedPattern, (_match, content: string) => {
+    const trimmed = content.trim();
+    if (trimmed) reasoningParts.push(trimmed);
+    return "";
+  });
+
+  // Match unclosed <think> tag (no closing tag) — treat rest of string as reasoning
+  const unclosedPattern = /<think>([\s\S]*)$/i;
+  stripped = stripped.replace(unclosedPattern, (_match, content: string) => {
+    const trimmed = content.trim();
+    if (trimmed) reasoningParts.push(trimmed);
+    return "";
+  });
+
+  return {
+    text: stripped.trim(),
+    reasoning: reasoningParts.length > 0 ? reasoningParts.join("\n\n") : null,
+  };
+}
+
 // ---------------------------------------------------------------------------
 // OpenAI API types (minimal subset)
 // ---------------------------------------------------------------------------
@@ -415,16 +463,28 @@ export abstract class OpenAIAdapter implements ModelProvider {
       ? parseToolCalls(choice.message.tool_calls)
       : parseFunctionCall(choice.message.function_call);
 
+    // Strip <think> reasoning tags from response text
+    const rawText = choice.message.content;
+    let text: string | null = rawText;
+    let reasoning: string | null = null;
+    if (rawText) {
+      const result = stripThinkTags(rawText);
+      text = result.text || null;
+      reasoning = result.reasoning;
+    }
+
     return {
-      text: choice.message.content,
+      text,
       toolCalls,
       usage: parseUsage(data.usage),
       finishReason: parseFinishReason(choice.finish_reason),
+      ...(reasoning != null ? { reasoning } : {}),
     };
   }
 
   /**
-   * Parse SSE stream from the API.
+   * Parse SSE stream from the API, stripping `<think>` tags and emitting
+   * their contents as `reasoning_delta` events.
    */
   protected async *parseSSEStream(
     reader: ReadableStreamDefaultReader<Uint8Array>,
@@ -435,6 +495,76 @@ export abstract class OpenAIAdapter implements ModelProvider {
     const streamTimeoutMs = 30_000;
     let lastDataTime = Date.now();
 
+    // Think-tag streaming state
+    let insideThink = false;
+    // Pending text that might contain a partial "<think" or "</think" tag
+    let pendingText = "";
+
+    /**
+     * Process accumulated text, splitting it into text_delta and
+     * reasoning_delta events based on <think>...</think> boundaries.
+     */
+    function* flushText(text: string): Generator<StreamEvent> {
+      pendingText += text;
+
+      while (pendingText.length > 0) {
+        if (insideThink) {
+          // Look for closing </think> tag
+          const closeIdx = pendingText.toLowerCase().indexOf("</think>");
+          if (closeIdx !== -1) {
+            // Emit everything before the close tag as reasoning
+            const reasoning = pendingText.slice(0, closeIdx);
+            if (reasoning) {
+              yield { type: "reasoning_delta", text: reasoning };
+            }
+            pendingText = pendingText.slice(closeIdx + "</think>".length);
+            insideThink = false;
+          } else {
+            // Might have a partial "</think" at the end — keep it pending
+            const partialClose = findPartialTag(pendingText, "</think>");
+            if (partialClose > 0) {
+              const safe = pendingText.slice(0, pendingText.length - partialClose);
+              if (safe) {
+                yield { type: "reasoning_delta", text: safe };
+              }
+              pendingText = pendingText.slice(pendingText.length - partialClose);
+            } else {
+              // All content is reasoning
+              yield { type: "reasoning_delta", text: pendingText };
+              pendingText = "";
+            }
+            break;
+          }
+        } else {
+          // Look for opening <think> tag
+          const openIdx = pendingText.toLowerCase().indexOf("<think>");
+          if (openIdx !== -1) {
+            // Emit everything before the open tag as text
+            const before = pendingText.slice(0, openIdx);
+            if (before) {
+              yield { type: "text_delta", text: before };
+            }
+            pendingText = pendingText.slice(openIdx + "<think>".length);
+            insideThink = true;
+          } else {
+            // Might have a partial "<think" at the end — keep it pending
+            const partialOpen = findPartialTag(pendingText, "<think>");
+            if (partialOpen > 0) {
+              const safe = pendingText.slice(0, pendingText.length - partialOpen);
+              if (safe) {
+                yield { type: "text_delta", text: safe };
+              }
+              pendingText = pendingText.slice(pendingText.length - partialOpen);
+            } else {
+              yield { type: "text_delta", text: pendingText };
+              pendingText = "";
+            }
+            break;
+          }
+        }
+      }
+    }
+
     try {
       while (true) {
         // Check abort signal
@@ -477,7 +607,7 @@ export abstract class OpenAIAdapter implements ModelProvider {
 
           const delta = chunk.choices?.[0]?.delta;
           if (delta?.content) {
-            yield { type: "text_delta", text: delta.content };
+            yield* flushText(delta.content);
           }
 
           if (delta?.tool_calls) {
@@ -501,6 +631,12 @@ export abstract class OpenAIAdapter implements ModelProvider {
           // Handle content_filter finish reason in stream
           const finishReason = chunk.choices?.[0]?.finish_reason;
           if (finishReason === "content_filter") {
+            // Flush any remaining pending text
+            if (pendingText) {
+              const eventType = insideThink ? "reasoning_delta" : "text_delta";
+              yield { type: eventType, text: pendingText } as StreamEvent;
+              pendingText = "";
+            }
             yield { type: "done" };
             return;
           }
@@ -510,6 +646,27 @@ export abstract class OpenAIAdapter implements ModelProvider {
       reader.releaseLock();
     }
 
+    // Flush any remaining pending text at end of stream
+    if (pendingText) {
+      const eventType = insideThink ? "reasoning_delta" : "text_delta";
+      yield { type: eventType, text: pendingText } as StreamEvent;
+    }
+
     yield { type: "done" };
   }
 }
+
+/**
+ * Check if the end of `text` contains a partial (prefix) match
+ * for `tag`. Returns the length of the partial match, or 0.
+ */
+function findPartialTag(text: string, tag: string): number {
+  const lower = text.toLowerCase();
+  // Check decreasing suffix lengths of the tag
+  for (let len = tag.length - 1; len >= 1; len--) {
+    if (lower.endsWith(tag.slice(0, len).toLowerCase())) {
+      return len;
+    }
+  }
+  return 0;
+}
diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts
index b9860e1..b4379b6 100644
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@@ -44,6 +44,7 @@ export {
 // --- Streaming ---
 export type {
   TextDelta,
+  ReasoningDelta,
   ToolCallDelta,
   UsageDelta,
   StreamDone,
@@ -120,6 +121,8 @@ export {
   parseFunctionCall,
   parseFinishReason,
   parseUsage,
+  stripThinkTags,
+  type StripThinkTagsResult,
   type OpenAIMessage,
   type OpenAIToolCall,
   type OpenAITool,
diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts
index 26aa3ec..fff1af3 100644
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@@ -16,6 +16,12 @@ export interface TextDelta {
   text: string;
 }
 
+/** A chunk of reasoning content extracted from <think> tags. */
+export interface ReasoningDelta {
+  type: "reasoning_delta";
+  text: string;
+}
+
 /** A tool call chunk (may arrive incrementally). */
 export interface ToolCallDelta {
   type: "tool_call_delta";
@@ -33,7 +39,7 @@ export interface StreamDone {
   type: "done";
 }
 
-export type StreamEvent = TextDelta | ToolCallDelta | UsageDelta | StreamDone;
+export type StreamEvent = TextDelta | ReasoningDelta | ToolCallDelta | UsageDelta | StreamDone;
 
 // ---------------------------------------------------------------------------
 // StreamResult
diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts
index 10275ad..ad600d2 100644
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@@ -145,6 +145,8 @@ export interface ModelResponse {
   usage: Usage;
   /** The finish reason reported by the provider. */
   finishReason: FinishReason;
+  /** Extracted reasoning content from <think> tags (if present). */
+  reasoning?: string | null;
 }
 
 export type FinishReason = "stop" | "length" | "tool_calls" | "content_filter" | "error" | "unknown";