From 90d69ef4b0404a1ef046cc387726ed37a4bccc46 Mon Sep 17 00:00:00 2001 From: Colin B Date: Thu, 16 Apr 2026 13:52:37 -0700 Subject: [PATCH] fix(trace-opencode): capture system prompt in LLM span input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenCode emits the resolved system prompt (AGENTS.md / CLAUDE.md content) via experimental.chat.system.transform — a hook the plugin did not subscribe to. LLM spans therefore rendered with only the user turn, making it impossible to debug prompt behavior in Braintrust. Subscribe to the hook, store the joined system prompt on SessionState, and prepend it as the first message when building LLM span input. The hook's output.system is string[] (not string as sometimes assumed); parts are joined with a blank line so the trace shows one system entry. Mirror the capture in the EventProcessor replay path, the FileLogger, and replay.ts so recorded fixtures reconstruct system prompts too. Tests cover: present, absent (regression guard), multi-part joining, across-turn updates, empty arrays, and the production hook wired end- to-end through createTracingHooks. --- src/event-processor.ts | 21 +++++ src/file-logger.ts | 14 +++ src/replay.ts | 26 ++++++ src/test-helpers.ts | 26 ++++++ src/tracing.test.ts | 196 +++++++++++++++++++++++++++++++++++++++++ src/tracing.ts | 36 ++++++++ 6 files changed, 319 insertions(+) diff --git a/src/event-processor.ts b/src/event-processor.ts index a56dc4b..a94b9e3 100644 --- a/src/event-processor.ts +++ b/src/event-processor.ts @@ -28,6 +28,8 @@ interface SessionState { currentInput?: string currentOutput?: string currentMessageId?: string + // Joined system prompt captured from experimental.chat.system.transform + systemPrompt?: string // Parent-child session tracking (for subagents) parentSessionId?: string // If this is a child session, the parent's session ID parentRootSpanId?: string // The parent's root span ID (child spans link to this as root) @@ -188,6 +190,22 @@ export class EventProcessor { return this.processChatMessage(sessionID, userMessage, model) } + /** + * Capture the system prompt from experimental.chat.system.transform. + * Joins the array with \n\n so the LLM span shows one system message. + */ + async processSystemTransform(sessionID: string, system: string[]): Promise { + const state = this.sessionStates.get(sessionID) + if (state && Array.isArray(system) && system.length > 0) { + state.systemPrompt = system.join("\n\n") + this.log("Captured system prompt", { + sessionID, + parts: system.length, + length: state.systemPrompt.length, + }) + } + } + /** * Lazily initialize session state and emit a root span for API-created sessions. * Called from processChatMessage when no state exists for a session — this happens @@ -623,6 +641,9 @@ export class EventProcessor { // Build input/output in Braintrust's expected format const llmInput: Array> = [] + if (state.systemPrompt) { + llmInput.push({ role: "system", content: state.systemPrompt }) + } if (state.currentInput) { llmInput.push({ role: "user", content: state.currentInput }) } diff --git a/src/file-logger.ts b/src/file-logger.ts index 46c3266..16ac6f7 100644 --- a/src/file-logger.ts +++ b/src/file-logger.ts @@ -21,6 +21,8 @@ export type LogKind = | "event" // raw OpenCode event received by the event hook | "chat.message.input" // messageInput arg to chat.message hook | "chat.message.output" // output arg to chat.message hook + | "chat.system.input" // input arg to experimental.chat.system.transform + | "chat.system.output" // output arg to experimental.chat.system.transform | "tool.before.input" // toolInput arg to tool.execute.before | "tool.before.output" // output arg to tool.execute.before | "tool.after.input" // toolInput arg to tool.execute.after @@ -117,6 +119,18 @@ export class FileLogger { this.write("chat.message.output", output, { hook: "chat.message", session_id: sessionId }) } + /** Convenience: log an experimental.chat.system.transform hook input + output pair */ + logChatSystem(input: unknown, output: unknown, sessionId?: string): void { + this.write("chat.system.input", input, { + hook: "experimental.chat.system.transform", + session_id: sessionId, + }) + this.write("chat.system.output", output, { + hook: "experimental.chat.system.transform", + session_id: sessionId, + }) + } + /** Convenience: log a tool.execute.before hook args */ logToolBefore(toolInput: unknown, output: unknown, sessionId?: string): void { this.write("tool.before.input", toolInput, { diff --git a/src/replay.ts b/src/replay.ts index 4948081..bd7b9a2 100644 --- a/src/replay.ts +++ b/src/replay.ts @@ -38,6 +38,14 @@ interface RawChatMessageOutput { parts?: Array<{ type: string; text?: string }> } +interface RawChatSystemInput { + sessionID?: string +} + +interface RawChatSystemOutput { + system?: string[] +} + interface RawToolInput { tool?: string sessionID?: string @@ -108,6 +116,7 @@ export async function replayLogFile( sessionID: string model?: { providerID?: string; modelID?: string } } | null = null + let pendingChatSystemSessionID: string | null = null let pendingToolBeforeInput: RawToolInput | null = null let pendingToolAfterInput: RawToolInput | null = null @@ -142,6 +151,23 @@ export async function replayLogFile( break } + case "chat.system.input": { + const input = record.data as RawChatSystemInput + pendingChatSystemSessionID = input.sessionID ?? record.session_id ?? null + break + } + + case "chat.system.output": { + const sessionID = pendingChatSystemSessionID + pendingChatSystemSessionID = null + if (!sessionID) break + const output = record.data as RawChatSystemOutput + if (Array.isArray(output.system) && output.system.length > 0) { + await processor.processSystemTransform(sessionID, output.system) + } + break + } + case "tool.before.input": { // Buffer: args may arrive in the paired tool.before.output record pendingToolBeforeInput = record.data as RawToolInput diff --git a/src/test-helpers.ts b/src/test-helpers.ts index a771fa8..d367149 100644 --- a/src/test-helpers.ts +++ b/src/test-helpers.ts @@ -52,6 +52,11 @@ export type TestItem = input: Record output: string | undefined | unknown } + | { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string // Optional: target session (defaults to main session) + } export interface TestToolCall { id: string @@ -337,6 +342,14 @@ export function toolExecute( return { _hook: "tool.execute", callID, tool, title, input, output } } +/** + * experimental.chat.system.transform hook call (not an event). + * Carries the resolved system prompt parts that OpenCode passes to the LLM. + */ +export function systemTransform(system: string[], options?: { sessionID?: string }): TestItem { + return { _hook: "experimental.chat.system.transform", system, sessionID: options?.sessionID } +} + /** * Helper to build a tool call object for use with toolCallPart */ @@ -390,6 +403,11 @@ type HookItem = input: Record output: string | undefined | unknown } + | { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string + } function isHook(item: TestItem): item is HookItem { return typeof item === "object" && "_hook" in item @@ -446,6 +464,14 @@ export async function eventsToTree( hook.output, hook.input, ) + } else if (item._hook === "experimental.chat.system.transform") { + const hook = item as { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string + } + const targetSessionID = hook.sessionID || sessionID + await processor.processSystemTransform(targetSessionID, hook.system) } } else { // It's a real Event - patch timestamps to use clock time for deterministic ordering diff --git a/src/tracing.test.ts b/src/tracing.test.ts index 6250010..d75a759 100644 --- a/src/tracing.test.ts +++ b/src/tracing.test.ts @@ -25,6 +25,7 @@ import { sessionDeleted, sessionError, sessionIdle, + systemTransform, textPart, toolCallCompletedPart, toolCallPart, @@ -544,6 +545,201 @@ describe("Subagents (Child Sessions)", () => { }) }) +describe("System prompt capture", () => { + it("prepends system message to LLM span input when hook fires before message completes", async () => { + const sessionId = "ses_sys_1" + const messageId = "msg_sys_1" + const systemContent = "You are a helpful coding assistant. Follow AGENTS.md rules." + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + // OpenCode fires this right before the LLM call with the resolved system prompt + systemTransform([systemContent]), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + expect(llmSpan).toBeDefined() + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([ + { role: "system", content: systemContent }, + { role: "user", content: "Hi" }, + ]) + }) + + it("omits system message when the hook never fires (existing behavior preserved)", async () => { + const sessionId = "ses_sys_absent" + const messageId = "msg_sys_absent" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + // No systemTransform — e.g. an OpenCode version that does not emit it + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + expect(llmSpan).toBeDefined() + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([{ role: "user", content: "Hi" }]) + // And definitely no system entry snuck in + expect(input.some((m) => m.role === "system")).toBe(false) + }) + + it("joins multi-part system arrays with a blank line between parts", async () => { + const sessionId = "ses_sys_multi" + const messageId = "msg_sys_multi" + const parts = ["Base instructions from AGENTS.md", "Additional context from CLAUDE.md"] + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + systemTransform(parts), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input[0]?.role).toBe("system") + expect(input[0]?.content).toBe(parts.join("\n\n")) + }) + + it("updates system prompt across turns and reflects latest in each LLM span", async () => { + const sessionId = "ses_sys_turns" + const firstSystem = "Instructions v1" + const secondSystem = "Instructions v2 with new context" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + // Turn 1 + chatMessage("First"), + systemTransform([firstSystem]), + textPart(sessionId, "msg_1", "Reply 1"), + messageCompleted(sessionId, "msg_1", { tokens: { input: 5, output: 3 } }), + sessionIdle(sessionId), + // Turn 2: OpenCode fires the hook again with an updated prompt + chatMessage("Second"), + systemTransform([secondSystem]), + textPart(sessionId, "msg_2", "Reply 2"), + messageCompleted(sessionId, "msg_2", { tokens: { input: 6, output: 4 } }), + sessionIdle(sessionId), + ), + ) + + const turn1Llm = tree?.children[0]?.children.find((c) => c.type === "llm") + const turn2Llm = tree?.children[1]?.children.find((c) => c.type === "llm") + + const turn1Input = turn1Llm?.input as Array<{ role: string; content: string }> + const turn2Input = turn2Llm?.input as Array<{ role: string; content: string }> + + expect(turn1Input[0]).toEqual({ role: "system", content: firstSystem }) + expect(turn2Input[0]).toEqual({ role: "system", content: secondSystem }) + }) + + it("ignores empty system arrays and leaves the LLM span input unchanged", async () => { + const sessionId = "ses_sys_empty" + const messageId = "msg_sys_empty" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + systemTransform([]), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([{ role: "user", content: "Hi" }]) + }) +}) + +describe("System prompt capture: production hooks", () => { + it("createTracingHooks captures system prompt via experimental.chat.system.transform", async () => { + const sessionId = "ses_hooks_sys" + const messageId = "msg_hooks_sys" + const systemContent = "System instructions from AGENTS.md" + + const collector = new TestSpanCollector() + const hooks = createTracingHooks( + collector, + { + client: { app: { log: async () => undefined } }, + worktree: "/tmp/test-project", + directory: "/tmp/test-project", + } as any, + { + apiKey: "", + apiUrl: "https://api.braintrust.dev", + appUrl: "https://www.braintrust.dev", + projectName: "test-project", + tracingEnabled: true, + debug: false, + }, + ) + + const eventHook = hooks.event as (args: { event: unknown }) => Promise + const chatMessageHook = hooks["chat.message"] as ( + input: unknown, + output: unknown, + ) => Promise + const systemTransformHook = hooks["experimental.chat.system.transform"] as ( + input: unknown, + output: unknown, + ) => Promise + + expect(systemTransformHook).toBeDefined() + + await eventHook({ event: sessionCreated(sessionId) }) + await chatMessageHook( + { + sessionID: sessionId, + agent: "assistant", + model: { providerID: "anthropic", modelID: "claude-3-haiku" }, + }, + { parts: [{ type: "text", text: "Hi" }] }, + ) + // Plugin hook signature: (input: { sessionID }, output: { system: string[] }) + await systemTransformHook({ sessionID: sessionId }, { system: [systemContent] }) + await eventHook({ event: textPart(sessionId, messageId, "Hello!") }) + await eventHook({ + event: messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + }) + await eventHook({ event: sessionIdle(sessionId) }) + await eventHook({ event: sessionDeleted(sessionId) }) + + const tree = spansToTree(collector.getSpans()) + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([ + { role: "system", content: systemContent }, + { role: "user", content: "Hi" }, + ]) + }) +}) + describe("Reasoning/Thinking Content", () => { it("LLM span includes reasoning content in output", async () => { const sessionId = "ses_reasoning" diff --git a/src/tracing.ts b/src/tracing.ts index 237d19a..7d33c40 100644 --- a/src/tracing.ts +++ b/src/tracing.ts @@ -34,6 +34,8 @@ interface SessionState { currentInput?: string currentOutput?: string currentMessageId?: string + // Joined system prompt captured from experimental.chat.system.transform + systemPrompt?: string // Parent-child session tracking (for subagents) parentSessionId?: string // If this is a child session, the parent's session ID parentRootSpanId?: string // The parent's root span ID (child spans link to this as root) @@ -500,6 +502,9 @@ export function createTracingHooks( // Build output as single-element array with the assistant response // This is the format Braintrust's LLM view expects const llmInput: Array> = [] + if (state.systemPrompt) { + llmInput.push({ role: "system", content: state.systemPrompt }) + } if (state.currentInput) { llmInput.push({ role: "user", content: state.currentInput }) } @@ -898,6 +903,37 @@ export function createTracingHooks( } }, + // Capture the resolved system prompt (AGENTS.md / CLAUDE.md content). + // OpenCode fires this before each LLM call; we join parts with \n\n and + // prepend them to the LLM span input so traces show the real instructions. + "experimental.chat.system.transform": async (hookInput, hookOutput) => { + fileLogger?.logChatSystem(hookInput, hookOutput, hookInput.sessionID) + try { + const { sessionID } = hookInput + const { system } = hookOutput + const state = sessionStates.get(sessionID) + if (state && Array.isArray(system) && system.length > 0) { + state.systemPrompt = system.join("\n\n") + log("Captured system prompt", { + sessionID, + parts: system.length, + length: state.systemPrompt.length, + }) + } + } catch (error) { + log("Error in experimental.chat.system.transform hook", { error: String(error) }) + client.app + .log({ + body: { + service: "braintrust-trace", + level: "error", + message: `experimental.chat.system.transform hook error: ${error}`, + }, + }) + .catch(() => {}) + } + }, + // Track tool executions "tool.execute.before": async (toolInput, output) => { fileLogger?.logToolBefore(toolInput, output, toolInput.sessionID)