Skip to content

Commit 9010651

Browse files
committed
feat(plugin): project reasoning bytes from thinking-only assistant messages on tool drops
31% of assistant messages have reasoning parts but no text part. Those reasoning bytes are now stored on tool tags via precedingThinkingParts and included in heuristic tool-drop projection, closing the undercount that previously made historian fire sooner than necessary.
1 parent 071972e commit 9010651

4 files changed

Lines changed: 96 additions & 19 deletions

File tree

packages/plugin/src/hooks/magic-context/compartment-trigger.test.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
openDatabase,
1212
queuePendingOp,
1313
} from "../../features/magic-context/storage";
14+
import type { SessionMeta } from "../../features/magic-context/types";
1415
import { checkCompartmentTrigger } from "./compartment-trigger";
1516

1617
const tempDirs: string[] = [];
@@ -88,7 +89,7 @@ function createOpenCodeDb(
8889
}
8990
}
9091

91-
function makeSessionMeta(sessionId: string, lastContextPercentage: number) {
92+
function makeSessionMeta(sessionId: string, lastContextPercentage: number): SessionMeta {
9293
return {
9394
sessionId,
9495
counter: 0,
@@ -102,6 +103,8 @@ function makeSessionMeta(sessionId: string, lastContextPercentage: number) {
102103
timesExecuteThresholdReached: 0,
103104
compartmentInProgress: false,
104105
lastTransformError: null,
106+
systemPromptHash: "",
107+
clearedReasoningThroughTag: 0,
105108
};
106109
}
107110

@@ -161,6 +164,38 @@ describe("checkCompartmentTrigger", () => {
161164
expect(result).toEqual({ shouldFire: false });
162165
});
163166

167+
it("does not fire proactively when auto-droppable tool reasoning brings projected usage below target", () => {
168+
useTempDataHome("compartment-trigger-tool-reasoning-");
169+
createOpenCodeDb("ses-tool-reasoning", [
170+
{ id: "m-1", role: "user", text: "setup" },
171+
{ id: "m-2", role: "assistant", text: "done" },
172+
{ id: "m-3", role: "user", text: "a ".repeat(7000) },
173+
{ id: "m-4", role: "assistant", text: "b ".repeat(7000) },
174+
{ id: "m-5", role: "user", text: "protected tail 1" },
175+
{ id: "m-6", role: "user", text: "protected tail 2" },
176+
{ id: "m-7", role: "user", text: "protected tail 3" },
177+
{ id: "m-8", role: "user", text: "protected tail 4" },
178+
{ id: "m-9", role: "user", text: "protected tail 5" },
179+
]);
180+
const db = openDatabase();
181+
insertTag(db, "ses-tool-reasoning", "call-1", "tool", 100, 1, 900);
182+
insertTag(db, "ses-tool-reasoning", "m-2", "message", 100, 2);
183+
184+
const result = checkCompartmentTrigger(
185+
db,
186+
"ses-tool-reasoning",
187+
makeSessionMeta("ses-tool-reasoning", 62),
188+
{ percentage: 63, inputTokens: 126_000 },
189+
62,
190+
65,
191+
undefined,
192+
0,
193+
0,
194+
);
195+
196+
expect(result).toEqual({ shouldFire: false });
197+
});
198+
164199
it("does not force-fire at 80% when pending drops are enough to bring usage below target", () => {
165200
useTempDataHome("compartment-trigger-force-skip-");
166201
createOpenCodeDb("ses-force-skip", [

packages/plugin/src/hooks/magic-context/compartment-trigger.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,6 @@ function estimateProjectedPostDropPercentage(
7070

7171
// 2. Heuristic auto-drop: old tool outputs outside protected tail
7272
// 3. Reasoning clearing: reasoning bytes on message tags between watermark and age cutoff
73-
// Note: reasoning on thinking-only assistant messages (no text part) is attributed to
74-
// subsequent tool tags at runtime via precedingThinkingParts, but not reflected in any
75-
// tag's reasoningByteSize. This is a known conservative undercount (~31% of assistant
76-
// messages are thinking-only). Those bytes are freed when the tool tag is dropped, but
77-
// the projection doesn't account for them.
7873
const maxTag = activeTags.reduce((max, t) => Math.max(max, t.tagNumber), 0);
7974
if (autoDropToolAge !== undefined && protectedTags !== undefined) {
8075
const toolAgeCutoff = maxTag - autoDropToolAge;
@@ -85,7 +80,7 @@ function estimateProjectedPostDropPercentage(
8580
if (pendingDropTagIds.has(tag.tagNumber)) continue;
8681
if (tag.tagNumber > protectedCutoff) continue;
8782
if (tag.type === "tool" && tag.tagNumber <= toolAgeCutoff) {
88-
droppableBytes += tag.byteSize;
83+
droppableBytes += tag.byteSize + tag.reasoningByteSize;
8984
}
9085
}
9186
}

packages/plugin/src/hooks/magic-context/tag-messages.ts

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,19 @@ function collectRelevantSourceTagIds(
6868
return Array.from(relevantTagIds);
6969
}
7070

71+
function getReasoningByteSize(parts: ThinkingLikePart[]): number {
72+
let reasoningBytes = 0;
73+
74+
for (const part of parts) {
75+
const content = part.thinking ?? part.text ?? "";
76+
if (content && content !== "[cleared]") {
77+
reasoningBytes += byteSize(content);
78+
}
79+
}
80+
81+
return reasoningBytes;
82+
}
83+
7184
export function tagMessages(
7285
sessionId: string,
7386
messages: MessageLike[],
@@ -159,17 +172,7 @@ export function tagMessages(
159172
contentId,
160173
textOrdinal,
161174
);
162-
// Compute reasoning byte size from thinking parts associated with this message
163-
let reasoningBytes = 0;
164-
if (textOrdinal === 0) {
165-
// Attribute reasoning to the first text part of the message
166-
for (const tp of thinkingParts) {
167-
const content = tp.thinking ?? tp.text ?? "";
168-
if (content && content !== "[cleared]") {
169-
reasoningBytes += byteSize(content);
170-
}
171-
}
172-
}
175+
const reasoningBytes = textOrdinal === 0 ? getReasoningByteSize(thinkingParts) : 0;
173176
const tagId = tagger.assignTag(
174177
sessionId,
175178
contentId,
@@ -214,13 +217,15 @@ export function tagMessages(
214217
if (isToolPartWithOutput(part)) {
215218
const toolPart = part;
216219
const thinkingParts = precedingThinkingParts;
220+
const reasoningBytes = getReasoningByteSize(thinkingParts);
217221

218222
const tagId = tagger.assignTag(
219223
sessionId,
220224
toolPart.callID,
221225
"tool",
222226
byteSize(toolPart.state.output),
223227
db,
228+
reasoningBytes,
224229
);
225230
messageTagNumbers.set(
226231
message,

packages/plugin/src/hooks/magic-context/transform-operations.test.ts

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ import { afterEach, describe, expect, it } from "bun:test";
44
import { mkdtempSync, rmSync } from "node:fs";
55
import { tmpdir } from "node:os";
66
import { join } from "node:path";
7-
import { closeDatabase, openDatabase } from "../../features/magic-context/storage";
7+
import { closeDatabase, getTagById, openDatabase } from "../../features/magic-context/storage";
88
import { createTagger } from "../../features/magic-context/tagger";
99
import { clearOldReasoning, tagMessages } from "./transform-operations";
10+
import { byteSize } from "./tag-content-primitives";
1011

1112
type TextPart = { type: "text"; text: string };
1213
type ToolPart = { type: "tool"; callID: string; state: { output: string } };
@@ -40,6 +41,47 @@ function useTempDataHome(prefix: string): void {
4041

4142
describe("tagMessages", () => {
4243
describe("#given assistant message with thinking + tool_use but no text", () => {
44+
it("#then stores preceding thinking bytes on the tool tag", () => {
45+
useTempDataHome("tag-tool-reasoning-bytes-");
46+
const db = openDatabase();
47+
const tagger = createTagger();
48+
49+
const thinkingPart: ThinkingPart = {
50+
type: "thinking",
51+
thinking: "long reasoning about tool use",
52+
};
53+
const reasoningPart: ReasoningPart = {
54+
type: "reasoning",
55+
text: "structured reasoning payload",
56+
};
57+
const messages: TestMessage[] = [
58+
{
59+
info: { id: "m-user", role: "user", sessionID: "ses-1" },
60+
parts: [{ type: "text", text: "run the command" }],
61+
},
62+
{
63+
info: { id: "m-assistant", role: "assistant" },
64+
parts: [
65+
thinkingPart,
66+
reasoningPart,
67+
{ type: "tool-invocation", callID: "call-1" },
68+
],
69+
},
70+
{
71+
info: { id: "m-tool", role: "tool" },
72+
parts: [{ type: "tool", callID: "call-1", state: { output: "command output" } }],
73+
},
74+
];
75+
76+
tagMessages("ses-1", messages, tagger, db);
77+
78+
const toolTagId = tagger.getTag("ses-1", "call-1");
79+
expect(toolTagId).toBeDefined();
80+
expect(getTagById(db, "ses-1", toolTagId!)?.reasoningByteSize).toBe(
81+
byteSize(thinkingPart.thinking) + byteSize(reasoningPart.text),
82+
);
83+
});
84+
4385
describe("#when tool output is dropped", () => {
4486
it("#then clears thinking in the preceding assistant message", () => {
4587
useTempDataHome("tag-cross-msg-clear-");

0 commit comments

Comments
 (0)