diff --git a/src/event-processor.ts b/src/event-processor.ts index a56dc4b..a94b9e3 100644 --- a/src/event-processor.ts +++ b/src/event-processor.ts @@ -28,6 +28,8 @@ interface SessionState { currentInput?: string currentOutput?: string currentMessageId?: string + // Joined system prompt captured from experimental.chat.system.transform + systemPrompt?: string // Parent-child session tracking (for subagents) parentSessionId?: string // If this is a child session, the parent's session ID parentRootSpanId?: string // The parent's root span ID (child spans link to this as root) @@ -188,6 +190,22 @@ export class EventProcessor { return this.processChatMessage(sessionID, userMessage, model) } + /** + * Capture the system prompt from experimental.chat.system.transform. + * Joins the array with \n\n so the LLM span shows one system message. + */ + async processSystemTransform(sessionID: string, system: string[]): Promise { + const state = this.sessionStates.get(sessionID) + if (state && Array.isArray(system) && system.length > 0) { + state.systemPrompt = system.join("\n\n") + this.log("Captured system prompt", { + sessionID, + parts: system.length, + length: state.systemPrompt.length, + }) + } + } + /** * Lazily initialize session state and emit a root span for API-created sessions. * Called from processChatMessage when no state exists for a session — this happens @@ -623,6 +641,9 @@ export class EventProcessor { // Build input/output in Braintrust's expected format const llmInput: Array> = [] + if (state.systemPrompt) { + llmInput.push({ role: "system", content: state.systemPrompt }) + } if (state.currentInput) { llmInput.push({ role: "user", content: state.currentInput }) } diff --git a/src/file-logger.ts b/src/file-logger.ts index 46c3266..16ac6f7 100644 --- a/src/file-logger.ts +++ b/src/file-logger.ts @@ -21,6 +21,8 @@ export type LogKind = | "event" // raw OpenCode event received by the event hook | "chat.message.input" // messageInput arg to chat.message hook | "chat.message.output" // output arg to chat.message hook + | "chat.system.input" // input arg to experimental.chat.system.transform + | "chat.system.output" // output arg to experimental.chat.system.transform | "tool.before.input" // toolInput arg to tool.execute.before | "tool.before.output" // output arg to tool.execute.before | "tool.after.input" // toolInput arg to tool.execute.after @@ -117,6 +119,18 @@ export class FileLogger { this.write("chat.message.output", output, { hook: "chat.message", session_id: sessionId }) } + /** Convenience: log an experimental.chat.system.transform hook input + output pair */ + logChatSystem(input: unknown, output: unknown, sessionId?: string): void { + this.write("chat.system.input", input, { + hook: "experimental.chat.system.transform", + session_id: sessionId, + }) + this.write("chat.system.output", output, { + hook: "experimental.chat.system.transform", + session_id: sessionId, + }) + } + /** Convenience: log a tool.execute.before hook args */ logToolBefore(toolInput: unknown, output: unknown, sessionId?: string): void { this.write("tool.before.input", toolInput, { diff --git a/src/replay.ts b/src/replay.ts index 4948081..bd7b9a2 100644 --- a/src/replay.ts +++ b/src/replay.ts @@ -38,6 +38,14 @@ interface RawChatMessageOutput { parts?: Array<{ type: string; text?: string }> } +interface RawChatSystemInput { + sessionID?: string +} + +interface RawChatSystemOutput { + system?: string[] +} + interface RawToolInput { tool?: string sessionID?: string @@ -108,6 +116,7 @@ export async function replayLogFile( sessionID: string model?: { providerID?: string; modelID?: string } } | null = null + let pendingChatSystemSessionID: string | null = null let pendingToolBeforeInput: RawToolInput | null = null let pendingToolAfterInput: RawToolInput | null = null @@ -142,6 +151,23 @@ export async function replayLogFile( break } + case "chat.system.input": { + const input = record.data as RawChatSystemInput + pendingChatSystemSessionID = input.sessionID ?? record.session_id ?? null + break + } + + case "chat.system.output": { + const sessionID = pendingChatSystemSessionID + pendingChatSystemSessionID = null + if (!sessionID) break + const output = record.data as RawChatSystemOutput + if (Array.isArray(output.system) && output.system.length > 0) { + await processor.processSystemTransform(sessionID, output.system) + } + break + } + case "tool.before.input": { // Buffer: args may arrive in the paired tool.before.output record pendingToolBeforeInput = record.data as RawToolInput diff --git a/src/test-helpers.ts b/src/test-helpers.ts index a771fa8..d367149 100644 --- a/src/test-helpers.ts +++ b/src/test-helpers.ts @@ -52,6 +52,11 @@ export type TestItem = input: Record output: string | undefined | unknown } + | { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string // Optional: target session (defaults to main session) + } export interface TestToolCall { id: string @@ -337,6 +342,14 @@ export function toolExecute( return { _hook: "tool.execute", callID, tool, title, input, output } } +/** + * experimental.chat.system.transform hook call (not an event). + * Carries the resolved system prompt parts that OpenCode passes to the LLM. + */ +export function systemTransform(system: string[], options?: { sessionID?: string }): TestItem { + return { _hook: "experimental.chat.system.transform", system, sessionID: options?.sessionID } +} + /** * Helper to build a tool call object for use with toolCallPart */ @@ -390,6 +403,11 @@ type HookItem = input: Record output: string | undefined | unknown } + | { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string + } function isHook(item: TestItem): item is HookItem { return typeof item === "object" && "_hook" in item @@ -446,6 +464,14 @@ export async function eventsToTree( hook.output, hook.input, ) + } else if (item._hook === "experimental.chat.system.transform") { + const hook = item as { + _hook: "experimental.chat.system.transform" + system: string[] + sessionID?: string + } + const targetSessionID = hook.sessionID || sessionID + await processor.processSystemTransform(targetSessionID, hook.system) } } else { // It's a real Event - patch timestamps to use clock time for deterministic ordering diff --git a/src/tracing.test.ts b/src/tracing.test.ts index 6250010..d75a759 100644 --- a/src/tracing.test.ts +++ b/src/tracing.test.ts @@ -25,6 +25,7 @@ import { sessionDeleted, sessionError, sessionIdle, + systemTransform, textPart, toolCallCompletedPart, toolCallPart, @@ -544,6 +545,201 @@ describe("Subagents (Child Sessions)", () => { }) }) +describe("System prompt capture", () => { + it("prepends system message to LLM span input when hook fires before message completes", async () => { + const sessionId = "ses_sys_1" + const messageId = "msg_sys_1" + const systemContent = "You are a helpful coding assistant. Follow AGENTS.md rules." + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + // OpenCode fires this right before the LLM call with the resolved system prompt + systemTransform([systemContent]), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + expect(llmSpan).toBeDefined() + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([ + { role: "system", content: systemContent }, + { role: "user", content: "Hi" }, + ]) + }) + + it("omits system message when the hook never fires (existing behavior preserved)", async () => { + const sessionId = "ses_sys_absent" + const messageId = "msg_sys_absent" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + // No systemTransform — e.g. an OpenCode version that does not emit it + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + expect(llmSpan).toBeDefined() + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([{ role: "user", content: "Hi" }]) + // And definitely no system entry snuck in + expect(input.some((m) => m.role === "system")).toBe(false) + }) + + it("joins multi-part system arrays with a blank line between parts", async () => { + const sessionId = "ses_sys_multi" + const messageId = "msg_sys_multi" + const parts = ["Base instructions from AGENTS.md", "Additional context from CLAUDE.md"] + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + systemTransform(parts), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input[0]?.role).toBe("system") + expect(input[0]?.content).toBe(parts.join("\n\n")) + }) + + it("updates system prompt across turns and reflects latest in each LLM span", async () => { + const sessionId = "ses_sys_turns" + const firstSystem = "Instructions v1" + const secondSystem = "Instructions v2 with new context" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + // Turn 1 + chatMessage("First"), + systemTransform([firstSystem]), + textPart(sessionId, "msg_1", "Reply 1"), + messageCompleted(sessionId, "msg_1", { tokens: { input: 5, output: 3 } }), + sessionIdle(sessionId), + // Turn 2: OpenCode fires the hook again with an updated prompt + chatMessage("Second"), + systemTransform([secondSystem]), + textPart(sessionId, "msg_2", "Reply 2"), + messageCompleted(sessionId, "msg_2", { tokens: { input: 6, output: 4 } }), + sessionIdle(sessionId), + ), + ) + + const turn1Llm = tree?.children[0]?.children.find((c) => c.type === "llm") + const turn2Llm = tree?.children[1]?.children.find((c) => c.type === "llm") + + const turn1Input = turn1Llm?.input as Array<{ role: string; content: string }> + const turn2Input = turn2Llm?.input as Array<{ role: string; content: string }> + + expect(turn1Input[0]).toEqual({ role: "system", content: firstSystem }) + expect(turn2Input[0]).toEqual({ role: "system", content: secondSystem }) + }) + + it("ignores empty system arrays and leaves the LLM span input unchanged", async () => { + const sessionId = "ses_sys_empty" + const messageId = "msg_sys_empty" + + const tree = await eventsToTree( + session( + sessionId, + sessionCreated(sessionId), + chatMessage("Hi"), + systemTransform([]), + textPart(sessionId, messageId, "Hello!"), + messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + sessionIdle(sessionId), + ), + ) + + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([{ role: "user", content: "Hi" }]) + }) +}) + +describe("System prompt capture: production hooks", () => { + it("createTracingHooks captures system prompt via experimental.chat.system.transform", async () => { + const sessionId = "ses_hooks_sys" + const messageId = "msg_hooks_sys" + const systemContent = "System instructions from AGENTS.md" + + const collector = new TestSpanCollector() + const hooks = createTracingHooks( + collector, + { + client: { app: { log: async () => undefined } }, + worktree: "/tmp/test-project", + directory: "/tmp/test-project", + } as any, + { + apiKey: "", + apiUrl: "https://api.braintrust.dev", + appUrl: "https://www.braintrust.dev", + projectName: "test-project", + tracingEnabled: true, + debug: false, + }, + ) + + const eventHook = hooks.event as (args: { event: unknown }) => Promise + const chatMessageHook = hooks["chat.message"] as ( + input: unknown, + output: unknown, + ) => Promise + const systemTransformHook = hooks["experimental.chat.system.transform"] as ( + input: unknown, + output: unknown, + ) => Promise + + expect(systemTransformHook).toBeDefined() + + await eventHook({ event: sessionCreated(sessionId) }) + await chatMessageHook( + { + sessionID: sessionId, + agent: "assistant", + model: { providerID: "anthropic", modelID: "claude-3-haiku" }, + }, + { parts: [{ type: "text", text: "Hi" }] }, + ) + // Plugin hook signature: (input: { sessionID }, output: { system: string[] }) + await systemTransformHook({ sessionID: sessionId }, { system: [systemContent] }) + await eventHook({ event: textPart(sessionId, messageId, "Hello!") }) + await eventHook({ + event: messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }), + }) + await eventHook({ event: sessionIdle(sessionId) }) + await eventHook({ event: sessionDeleted(sessionId) }) + + const tree = spansToTree(collector.getSpans()) + const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm") + const input = llmSpan?.input as Array<{ role: string; content: string }> + expect(input).toEqual([ + { role: "system", content: systemContent }, + { role: "user", content: "Hi" }, + ]) + }) +}) + describe("Reasoning/Thinking Content", () => { it("LLM span includes reasoning content in output", async () => { const sessionId = "ses_reasoning" diff --git a/src/tracing.ts b/src/tracing.ts index 237d19a..7d33c40 100644 --- a/src/tracing.ts +++ b/src/tracing.ts @@ -34,6 +34,8 @@ interface SessionState { currentInput?: string currentOutput?: string currentMessageId?: string + // Joined system prompt captured from experimental.chat.system.transform + systemPrompt?: string // Parent-child session tracking (for subagents) parentSessionId?: string // If this is a child session, the parent's session ID parentRootSpanId?: string // The parent's root span ID (child spans link to this as root) @@ -500,6 +502,9 @@ export function createTracingHooks( // Build output as single-element array with the assistant response // This is the format Braintrust's LLM view expects const llmInput: Array> = [] + if (state.systemPrompt) { + llmInput.push({ role: "system", content: state.systemPrompt }) + } if (state.currentInput) { llmInput.push({ role: "user", content: state.currentInput }) } @@ -898,6 +903,37 @@ export function createTracingHooks( } }, + // Capture the resolved system prompt (AGENTS.md / CLAUDE.md content). + // OpenCode fires this before each LLM call; we join parts with \n\n and + // prepend them to the LLM span input so traces show the real instructions. + "experimental.chat.system.transform": async (hookInput, hookOutput) => { + fileLogger?.logChatSystem(hookInput, hookOutput, hookInput.sessionID) + try { + const { sessionID } = hookInput + const { system } = hookOutput + const state = sessionStates.get(sessionID) + if (state && Array.isArray(system) && system.length > 0) { + state.systemPrompt = system.join("\n\n") + log("Captured system prompt", { + sessionID, + parts: system.length, + length: state.systemPrompt.length, + }) + } + } catch (error) { + log("Error in experimental.chat.system.transform hook", { error: String(error) }) + client.app + .log({ + body: { + service: "braintrust-trace", + level: "error", + message: `experimental.chat.system.transform hook error: ${error}`, + }, + }) + .catch(() => {}) + } + }, + // Track tool executions "tool.execute.before": async (toolInput, output) => { fileLogger?.logToolBefore(toolInput, output, toolInput.sessionID)