feat(plugin): project reasoning bytes from thinking-only assistant messages on tool drops

ualtinok · ualtinok · commit 90106518e7cc · 2026-04-01T01:31:05.000+02:00
31% of assistant messages have reasoning parts but no text part. Those
reasoning bytes are now stored on tool tags via precedingThinkingParts
and included in heuristic tool-drop projection, closing the undercount
that previously made historian fire sooner than necessary.
diff --git a/packages/plugin/src/hooks/magic-context/compartment-trigger.test.ts b/packages/plugin/src/hooks/magic-context/compartment-trigger.test.ts
@@ -11,6 +11,7 @@ import {
     openDatabase,
     queuePendingOp,
 } from "../../features/magic-context/storage";
+import type { SessionMeta } from "../../features/magic-context/types";
 import { checkCompartmentTrigger } from "./compartment-trigger";
 
 const tempDirs: string[] = [];
@@ -88,7 +89,7 @@ function createOpenCodeDb(
     }
 }
 
-function makeSessionMeta(sessionId: string, lastContextPercentage: number) {
+function makeSessionMeta(sessionId: string, lastContextPercentage: number): SessionMeta {
     return {
         sessionId,
         counter: 0,
@@ -102,6 +103,8 @@ function makeSessionMeta(sessionId: string, lastContextPercentage: number) {
         timesExecuteThresholdReached: 0,
         compartmentInProgress: false,
         lastTransformError: null,
+        systemPromptHash: "",
+        clearedReasoningThroughTag: 0,
     };
 }
 
@@ -161,6 +164,38 @@ describe("checkCompartmentTrigger", () => {
         expect(result).toEqual({ shouldFire: false });
     });
 
+    it("does not fire proactively when auto-droppable tool reasoning brings projected usage below target", () => {
+        useTempDataHome("compartment-trigger-tool-reasoning-");
+        createOpenCodeDb("ses-tool-reasoning", [
+            { id: "m-1", role: "user", text: "setup" },
+            { id: "m-2", role: "assistant", text: "done" },
+            { id: "m-3", role: "user", text: "a ".repeat(7000) },
+            { id: "m-4", role: "assistant", text: "b ".repeat(7000) },
+            { id: "m-5", role: "user", text: "protected tail 1" },
+            { id: "m-6", role: "user", text: "protected tail 2" },
+            { id: "m-7", role: "user", text: "protected tail 3" },
+            { id: "m-8", role: "user", text: "protected tail 4" },
+            { id: "m-9", role: "user", text: "protected tail 5" },
+        ]);
+        const db = openDatabase();
+        insertTag(db, "ses-tool-reasoning", "call-1", "tool", 100, 1, 900);
+        insertTag(db, "ses-tool-reasoning", "m-2", "message", 100, 2);
+
+        const result = checkCompartmentTrigger(
+            db,
+            "ses-tool-reasoning",
+            makeSessionMeta("ses-tool-reasoning", 62),
+            { percentage: 63, inputTokens: 126_000 },
+            62,
+            65,
+            undefined,
+            0,
+            0,
+        );
+
+        expect(result).toEqual({ shouldFire: false });
+    });
+
     it("does not force-fire at 80% when pending drops are enough to bring usage below target", () => {
         useTempDataHome("compartment-trigger-force-skip-");
         createOpenCodeDb("ses-force-skip", [
diff --git a/packages/plugin/src/hooks/magic-context/compartment-trigger.ts b/packages/plugin/src/hooks/magic-context/compartment-trigger.ts
@@ -70,11 +70,6 @@ function estimateProjectedPostDropPercentage(
 
     // 2. Heuristic auto-drop: old tool outputs outside protected tail
     // 3. Reasoning clearing: reasoning bytes on message tags between watermark and age cutoff
-    // Note: reasoning on thinking-only assistant messages (no text part) is attributed to
-    // subsequent tool tags at runtime via precedingThinkingParts, but not reflected in any
-    // tag's reasoningByteSize. This is a known conservative undercount (~31% of assistant
-    // messages are thinking-only). Those bytes are freed when the tool tag is dropped, but
-    // the projection doesn't account for them.
     const maxTag = activeTags.reduce((max, t) => Math.max(max, t.tagNumber), 0);
     if (autoDropToolAge !== undefined && protectedTags !== undefined) {
         const toolAgeCutoff = maxTag - autoDropToolAge;
@@ -85,7 +80,7 @@ function estimateProjectedPostDropPercentage(
             if (pendingDropTagIds.has(tag.tagNumber)) continue;
             if (tag.tagNumber > protectedCutoff) continue;
             if (tag.type === "tool" && tag.tagNumber <= toolAgeCutoff) {
-                droppableBytes += tag.byteSize;
+                droppableBytes += tag.byteSize + tag.reasoningByteSize;
             }
         }
     }
diff --git a/packages/plugin/src/hooks/magic-context/tag-messages.ts b/packages/plugin/src/hooks/magic-context/tag-messages.ts
@@ -68,6 +68,19 @@ function collectRelevantSourceTagIds(
     return Array.from(relevantTagIds);
 }
 
+function getReasoningByteSize(parts: ThinkingLikePart[]): number {
+    let reasoningBytes = 0;
+
+    for (const part of parts) {
+        const content = part.thinking ?? part.text ?? "";
+        if (content && content !== "[cleared]") {
+            reasoningBytes += byteSize(content);
+        }
+    }
+
+    return reasoningBytes;
+}
+
 export function tagMessages(
     sessionId: string,
     messages: MessageLike[],
@@ -159,17 +172,7 @@ export function tagMessages(
                         contentId,
                         textOrdinal,
                     );
-                    // Compute reasoning byte size from thinking parts associated with this message
-                    let reasoningBytes = 0;
-                    if (textOrdinal === 0) {
-                        // Attribute reasoning to the first text part of the message
-                        for (const tp of thinkingParts) {
-                            const content = tp.thinking ?? tp.text ?? "";
-                            if (content && content !== "[cleared]") {
-                                reasoningBytes += byteSize(content);
-                            }
-                        }
-                    }
+                    const reasoningBytes = textOrdinal === 0 ? getReasoningByteSize(thinkingParts) : 0;
                     const tagId = tagger.assignTag(
                         sessionId,
                         contentId,
@@ -214,13 +217,15 @@ export function tagMessages(
                 if (isToolPartWithOutput(part)) {
                     const toolPart = part;
                     const thinkingParts = precedingThinkingParts;
+                    const reasoningBytes = getReasoningByteSize(thinkingParts);
 
                     const tagId = tagger.assignTag(
                         sessionId,
                         toolPart.callID,
                         "tool",
                         byteSize(toolPart.state.output),
                         db,
+                        reasoningBytes,
                     );
                     messageTagNumbers.set(
                         message,
diff --git a/packages/plugin/src/hooks/magic-context/transform-operations.test.ts b/packages/plugin/src/hooks/magic-context/transform-operations.test.ts
@@ -4,9 +4,10 @@ import { afterEach, describe, expect, it } from "bun:test";
 import { mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { closeDatabase, openDatabase } from "../../features/magic-context/storage";
+import { closeDatabase, getTagById, openDatabase } from "../../features/magic-context/storage";
 import { createTagger } from "../../features/magic-context/tagger";
 import { clearOldReasoning, tagMessages } from "./transform-operations";
+import { byteSize } from "./tag-content-primitives";
 
 type TextPart = { type: "text"; text: string };
 type ToolPart = { type: "tool"; callID: string; state: { output: string } };
@@ -40,6 +41,47 @@ function useTempDataHome(prefix: string): void {
 
 describe("tagMessages", () => {
     describe("#given assistant message with thinking + tool_use but no text", () => {
+        it("#then stores preceding thinking bytes on the tool tag", () => {
+            useTempDataHome("tag-tool-reasoning-bytes-");
+            const db = openDatabase();
+            const tagger = createTagger();
+
+            const thinkingPart: ThinkingPart = {
+                type: "thinking",
+                thinking: "long reasoning about tool use",
+            };
+            const reasoningPart: ReasoningPart = {
+                type: "reasoning",
+                text: "structured reasoning payload",
+            };
+            const messages: TestMessage[] = [
+                {
+                    info: { id: "m-user", role: "user", sessionID: "ses-1" },
+                    parts: [{ type: "text", text: "run the command" }],
+                },
+                {
+                    info: { id: "m-assistant", role: "assistant" },
+                    parts: [
+                        thinkingPart,
+                        reasoningPart,
+                        { type: "tool-invocation", callID: "call-1" },
+                    ],
+                },
+                {
+                    info: { id: "m-tool", role: "tool" },
+                    parts: [{ type: "tool", callID: "call-1", state: { output: "command output" } }],
+                },
+            ];
+
+            tagMessages("ses-1", messages, tagger, db);
+
+            const toolTagId = tagger.getTag("ses-1", "call-1");
+            expect(toolTagId).toBeDefined();
+            expect(getTagById(db, "ses-1", toolTagId!)?.reasoningByteSize).toBe(
+                byteSize(thinkingPart.thinking) + byteSize(reasoningPart.text),
+            );
+        });
+
         describe("#when tool output is dropped", () => {
             it("#then clears thinking in the preceding assistant message", () => {
                 useTempDataHome("tag-cross-msg-clear-");

Original file line number	Diff line number	Diff line change
`@@ -70,11 +70,6 @@ function estimateProjectedPostDropPercentage(`
`70`	`70`
`71`	`71`	`// 2. Heuristic auto-drop: old tool outputs outside protected tail`
`72`	`72`	`// 3. Reasoning clearing: reasoning bytes on message tags between watermark and age cutoff`
`73`		`- // Note: reasoning on thinking-only assistant messages (no text part) is attributed to`
`74`		`- // subsequent tool tags at runtime via precedingThinkingParts, but not reflected in any`
`75`		`- // tag's reasoningByteSize. This is a known conservative undercount (~31% of assistant`
`76`		`- // messages are thinking-only). Those bytes are freed when the tool tag is dropped, but`
`77`		`- // the projection doesn't account for them.`
`78`	`73`	`const maxTag = activeTags.reduce((max, t) => Math.max(max, t.tagNumber), 0);`
`79`	`74`	`if (autoDropToolAge !== undefined && protectedTags !== undefined) {`
`80`	`75`	`const toolAgeCutoff = maxTag - autoDropToolAge;`
`@@ -85,7 +80,7 @@ function estimateProjectedPostDropPercentage(`
`85`	`80`	`if (pendingDropTagIds.has(tag.tagNumber)) continue;`
`86`	`81`	`if (tag.tagNumber > protectedCutoff) continue;`
`87`	`82`	`if (tag.type === "tool" && tag.tagNumber <= toolAgeCutoff) {`
`88`		`- droppableBytes += tag.byteSize;`
	`83`	`+ droppableBytes += tag.byteSize + tag.reasoningByteSize;`
`89`	`84`	`}`
`90`	`85`	`}`
`91`	`86`	`}`