Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/event-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ interface SessionState {
currentInput?: string
currentOutput?: string
currentMessageId?: string
// Joined system prompt captured from experimental.chat.system.transform
systemPrompt?: string
// Parent-child session tracking (for subagents)
parentSessionId?: string // If this is a child session, the parent's session ID
parentRootSpanId?: string // The parent's root span ID (child spans link to this as root)
Expand Down Expand Up @@ -188,6 +190,22 @@ export class EventProcessor {
return this.processChatMessage(sessionID, userMessage, model)
}

/**
* Capture the system prompt from experimental.chat.system.transform.
* Joins the array with \n\n so the LLM span shows one system message.
*/
async processSystemTransform(sessionID: string, system: string[]): Promise<void> {
const state = this.sessionStates.get(sessionID)
if (state && Array.isArray(system) && system.length > 0) {
state.systemPrompt = system.join("\n\n")
this.log("Captured system prompt", {
sessionID,
parts: system.length,
length: state.systemPrompt.length,
})
}
}

/**
* Lazily initialize session state and emit a root span for API-created sessions.
* Called from processChatMessage when no state exists for a session — this happens
Expand Down Expand Up @@ -623,6 +641,9 @@ export class EventProcessor {

// Build input/output in Braintrust's expected format
const llmInput: Array<Record<string, unknown>> = []
if (state.systemPrompt) {
llmInput.push({ role: "system", content: state.systemPrompt })
}
if (state.currentInput) {
llmInput.push({ role: "user", content: state.currentInput })
}
Expand Down
14 changes: 14 additions & 0 deletions src/file-logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ export type LogKind =
| "event" // raw OpenCode event received by the event hook
| "chat.message.input" // messageInput arg to chat.message hook
| "chat.message.output" // output arg to chat.message hook
| "chat.system.input" // input arg to experimental.chat.system.transform
| "chat.system.output" // output arg to experimental.chat.system.transform
| "tool.before.input" // toolInput arg to tool.execute.before
| "tool.before.output" // output arg to tool.execute.before
| "tool.after.input" // toolInput arg to tool.execute.after
Expand Down Expand Up @@ -117,6 +119,18 @@ export class FileLogger {
this.write("chat.message.output", output, { hook: "chat.message", session_id: sessionId })
}

/** Convenience: log an experimental.chat.system.transform hook input + output pair */
logChatSystem(input: unknown, output: unknown, sessionId?: string): void {
this.write("chat.system.input", input, {
hook: "experimental.chat.system.transform",
session_id: sessionId,
})
this.write("chat.system.output", output, {
hook: "experimental.chat.system.transform",
session_id: sessionId,
})
}

/** Convenience: log a tool.execute.before hook args */
logToolBefore(toolInput: unknown, output: unknown, sessionId?: string): void {
this.write("tool.before.input", toolInput, {
Expand Down
26 changes: 26 additions & 0 deletions src/replay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ interface RawChatMessageOutput {
parts?: Array<{ type: string; text?: string }>
}

interface RawChatSystemInput {
sessionID?: string
}

interface RawChatSystemOutput {
system?: string[]
}

interface RawToolInput {
tool?: string
sessionID?: string
Expand Down Expand Up @@ -108,6 +116,7 @@ export async function replayLogFile(
sessionID: string
model?: { providerID?: string; modelID?: string }
} | null = null
let pendingChatSystemSessionID: string | null = null
let pendingToolBeforeInput: RawToolInput | null = null
let pendingToolAfterInput: RawToolInput | null = null

Expand Down Expand Up @@ -142,6 +151,23 @@ export async function replayLogFile(
break
}

case "chat.system.input": {
const input = record.data as RawChatSystemInput
pendingChatSystemSessionID = input.sessionID ?? record.session_id ?? null
break
}

case "chat.system.output": {
const sessionID = pendingChatSystemSessionID
pendingChatSystemSessionID = null
if (!sessionID) break
const output = record.data as RawChatSystemOutput
if (Array.isArray(output.system) && output.system.length > 0) {
await processor.processSystemTransform(sessionID, output.system)
}
break
}

case "tool.before.input": {
// Buffer: args may arrive in the paired tool.before.output record
pendingToolBeforeInput = record.data as RawToolInput
Expand Down
26 changes: 26 additions & 0 deletions src/test-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ export type TestItem =
input: Record<string, unknown>
output: string | undefined | unknown
}
| {
_hook: "experimental.chat.system.transform"
system: string[]
sessionID?: string // Optional: target session (defaults to main session)
}

export interface TestToolCall {
id: string
Expand Down Expand Up @@ -337,6 +342,14 @@ export function toolExecute(
return { _hook: "tool.execute", callID, tool, title, input, output }
}

/**
* experimental.chat.system.transform hook call (not an event).
* Carries the resolved system prompt parts that OpenCode passes to the LLM.
*/
export function systemTransform(system: string[], options?: { sessionID?: string }): TestItem {
return { _hook: "experimental.chat.system.transform", system, sessionID: options?.sessionID }
}

/**
* Helper to build a tool call object for use with toolCallPart
*/
Expand Down Expand Up @@ -390,6 +403,11 @@ type HookItem =
input: Record<string, unknown>
output: string | undefined | unknown
}
| {
_hook: "experimental.chat.system.transform"
system: string[]
sessionID?: string
}

function isHook(item: TestItem): item is HookItem {
return typeof item === "object" && "_hook" in item
Expand Down Expand Up @@ -446,6 +464,14 @@ export async function eventsToTree(
hook.output,
hook.input,
)
} else if (item._hook === "experimental.chat.system.transform") {
const hook = item as {
_hook: "experimental.chat.system.transform"
system: string[]
sessionID?: string
}
const targetSessionID = hook.sessionID || sessionID
await processor.processSystemTransform(targetSessionID, hook.system)
}
} else {
// It's a real Event - patch timestamps to use clock time for deterministic ordering
Expand Down
196 changes: 196 additions & 0 deletions src/tracing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
sessionDeleted,
sessionError,
sessionIdle,
systemTransform,
textPart,
toolCallCompletedPart,
toolCallPart,
Expand Down Expand Up @@ -544,6 +545,201 @@ describe("Subagents (Child Sessions)", () => {
})
})

describe("System prompt capture", () => {
it("prepends system message to LLM span input when hook fires before message completes", async () => {
const sessionId = "ses_sys_1"
const messageId = "msg_sys_1"
const systemContent = "You are a helpful coding assistant. Follow AGENTS.md rules."

const tree = await eventsToTree(
session(
sessionId,
sessionCreated(sessionId),
chatMessage("Hi"),
// OpenCode fires this right before the LLM call with the resolved system prompt
systemTransform([systemContent]),
textPart(sessionId, messageId, "Hello!"),
messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }),
sessionIdle(sessionId),
),
)

const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm")
expect(llmSpan).toBeDefined()
const input = llmSpan?.input as Array<{ role: string; content: string }>
expect(input).toEqual([
{ role: "system", content: systemContent },
{ role: "user", content: "Hi" },
])
})

it("omits system message when the hook never fires (existing behavior preserved)", async () => {
const sessionId = "ses_sys_absent"
const messageId = "msg_sys_absent"

const tree = await eventsToTree(
session(
sessionId,
sessionCreated(sessionId),
chatMessage("Hi"),
// No systemTransform — e.g. an OpenCode version that does not emit it
textPart(sessionId, messageId, "Hello!"),
messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }),
sessionIdle(sessionId),
),
)

const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm")
expect(llmSpan).toBeDefined()
const input = llmSpan?.input as Array<{ role: string; content: string }>
expect(input).toEqual([{ role: "user", content: "Hi" }])
// And definitely no system entry snuck in
expect(input.some((m) => m.role === "system")).toBe(false)
})

it("joins multi-part system arrays with a blank line between parts", async () => {
const sessionId = "ses_sys_multi"
const messageId = "msg_sys_multi"
const parts = ["Base instructions from AGENTS.md", "Additional context from CLAUDE.md"]

const tree = await eventsToTree(
session(
sessionId,
sessionCreated(sessionId),
chatMessage("Hi"),
systemTransform(parts),
textPart(sessionId, messageId, "Hello!"),
messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }),
sessionIdle(sessionId),
),
)

const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm")
const input = llmSpan?.input as Array<{ role: string; content: string }>
expect(input[0]?.role).toBe("system")
expect(input[0]?.content).toBe(parts.join("\n\n"))
})

it("updates system prompt across turns and reflects latest in each LLM span", async () => {
const sessionId = "ses_sys_turns"
const firstSystem = "Instructions v1"
const secondSystem = "Instructions v2 with new context"

const tree = await eventsToTree(
session(
sessionId,
sessionCreated(sessionId),
// Turn 1
chatMessage("First"),
systemTransform([firstSystem]),
textPart(sessionId, "msg_1", "Reply 1"),
messageCompleted(sessionId, "msg_1", { tokens: { input: 5, output: 3 } }),
sessionIdle(sessionId),
// Turn 2: OpenCode fires the hook again with an updated prompt
chatMessage("Second"),
systemTransform([secondSystem]),
textPart(sessionId, "msg_2", "Reply 2"),
messageCompleted(sessionId, "msg_2", { tokens: { input: 6, output: 4 } }),
sessionIdle(sessionId),
),
)

const turn1Llm = tree?.children[0]?.children.find((c) => c.type === "llm")
const turn2Llm = tree?.children[1]?.children.find((c) => c.type === "llm")

const turn1Input = turn1Llm?.input as Array<{ role: string; content: string }>
const turn2Input = turn2Llm?.input as Array<{ role: string; content: string }>

expect(turn1Input[0]).toEqual({ role: "system", content: firstSystem })
expect(turn2Input[0]).toEqual({ role: "system", content: secondSystem })
})

it("ignores empty system arrays and leaves the LLM span input unchanged", async () => {
const sessionId = "ses_sys_empty"
const messageId = "msg_sys_empty"

const tree = await eventsToTree(
session(
sessionId,
sessionCreated(sessionId),
chatMessage("Hi"),
systemTransform([]),
textPart(sessionId, messageId, "Hello!"),
messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }),
sessionIdle(sessionId),
),
)

const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm")
const input = llmSpan?.input as Array<{ role: string; content: string }>
expect(input).toEqual([{ role: "user", content: "Hi" }])
})
})

describe("System prompt capture: production hooks", () => {
it("createTracingHooks captures system prompt via experimental.chat.system.transform", async () => {
const sessionId = "ses_hooks_sys"
const messageId = "msg_hooks_sys"
const systemContent = "System instructions from AGENTS.md"

const collector = new TestSpanCollector()
const hooks = createTracingHooks(
collector,
{
client: { app: { log: async () => undefined } },
worktree: "/tmp/test-project",
directory: "/tmp/test-project",
} as any,
{
apiKey: "",
apiUrl: "https://api.braintrust.dev",
appUrl: "https://www.braintrust.dev",
projectName: "test-project",
tracingEnabled: true,
debug: false,
},
)

const eventHook = hooks.event as (args: { event: unknown }) => Promise<void>
const chatMessageHook = hooks["chat.message"] as (
input: unknown,
output: unknown,
) => Promise<void>
const systemTransformHook = hooks["experimental.chat.system.transform"] as (
input: unknown,
output: unknown,
) => Promise<void>

expect(systemTransformHook).toBeDefined()

await eventHook({ event: sessionCreated(sessionId) })
await chatMessageHook(
{
sessionID: sessionId,
agent: "assistant",
model: { providerID: "anthropic", modelID: "claude-3-haiku" },
},
{ parts: [{ type: "text", text: "Hi" }] },
)
// Plugin hook signature: (input: { sessionID }, output: { system: string[] })
await systemTransformHook({ sessionID: sessionId }, { system: [systemContent] })
await eventHook({ event: textPart(sessionId, messageId, "Hello!") })
await eventHook({
event: messageCompleted(sessionId, messageId, { tokens: { input: 10, output: 5 } }),
})
await eventHook({ event: sessionIdle(sessionId) })
await eventHook({ event: sessionDeleted(sessionId) })

const tree = spansToTree(collector.getSpans())
const llmSpan = tree?.children[0]?.children.find((c) => c.type === "llm")
const input = llmSpan?.input as Array<{ role: string; content: string }>
expect(input).toEqual([
{ role: "system", content: systemContent },
{ role: "user", content: "Hi" },
])
})
})

describe("Reasoning/Thinking Content", () => {
it("LLM span includes reasoning content in output", async () => {
const sessionId = "ses_reasoning"
Expand Down
Loading
Loading