From e7ef1f13ac274da67933585df5c37203b6f9450e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Ferrari?= Date: Tue, 14 Apr 2026 01:21:23 +0200 Subject: [PATCH 1/2] feat: add Codex support via shared extraction core --- README.md | 110 +++-- SKILL.md | 250 +---------- agents/openai.yaml | 4 + references/provider-claude.md | 67 +++ references/provider-codex.md | 52 +++ references/workflow-core.md | 196 ++++++++ scripts/extract.py | 691 ++++++----------------------- scripts/providers/__init__.py | 2 + scripts/providers/claude.py | 471 ++++++++++++++++++++ scripts/providers/codex.py | 299 +++++++++++++ scripts/providers/common.py | 160 +++++++ tests/fixtures/codex_session.jsonl | 14 + tests/test_codex_extract.py | 96 ++++ tests/test_discovery.py | 52 +++ 14 files changed, 1619 insertions(+), 845 deletions(-) create mode 100644 agents/openai.yaml create mode 100644 references/provider-claude.md create mode 100644 references/provider-codex.md create mode 100644 references/workflow-core.md create mode 100644 scripts/providers/__init__.py create mode 100644 scripts/providers/claude.py create mode 100644 scripts/providers/codex.py create mode 100644 scripts/providers/common.py create mode 100644 tests/fixtures/codex_session.jsonl create mode 100644 tests/test_codex_extract.py create mode 100644 tests/test_discovery.py diff --git a/README.md b/README.md index 4e9b0d1..7daec0a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # agent-retro -> **Compatibility: Claude Code only (for now).** This skill reads Claude Code's JSONL session transcripts. Support for other agents (Gemini CLI, Codex, Cursor, etc.) is on the [roadmap](#roadmap). +> **Compatibility: Claude Code and OpenAI Codex.** The shared retro workflow is runtime-agnostic; transcript discovery and parsing are handled by provider adapters. -A session retrospective skill for AI coding agents. Run `/agent-retro` at the end of a session to analyze what happened, identify friction, and get concrete suggestions for improving your skills, rules, and workflows. +A session retrospective skill for AI coding agents. Run `/agent-retro` at the end of a session to analyze what happened, identify friction, and get concrete suggestions for improving skills, rules, and workflows. Follows the [Agent Skills](https://agentskills.io) open standard. @@ -10,119 +10,113 @@ Follows the [Agent Skills](https://agentskills.io) open standard. `/agent-retro` reads your session transcript from disk and produces a structured analysis: -- **Full conversation arc** — every user message and assistant response, in order. No sampling, no skipping. -- **Token cost breakdown** — per-agent attribution so you can see where the budget went. -- **Tool result waste detection** — flags oversized tool results (a 45KB file read that was never used is money burned). -- **Friction analysis** — identifies user corrections, redirects, and abandoned approaches, then traces each to a root cause. -- **Actionable proposals** — specific edits to skills, rules, or config. Not vague "improve X" — the actual text to change. +- **Full conversation arc**: every user message and assistant response, in order +- **Token budget breakdown**: totals plus runtime-estimated cost when available +- **Tool result waste detection**: flags oversized tool results that were likely wasted +- **Friction analysis**: identifies corrections, redirects, and abandoned approaches +- **Actionable proposals**: concrete edits to skills, rules, or setup The output is a markdown retro file plus an interactive walkthrough where you approve or defer each proposed action. -## Why this exists - -Every agent framework has inline reflection (retry loops, critic agents). None of them do **post-session systemic reflection** — looking across an entire conversation to find patterns of failure and proposing configuration changes to prevent them next time. - -This is the equivalent of an agile sprint retrospective, but for a single AI session, producing machine-editable artifacts rather than sticky notes on a board. See [references/design.md](references/design.md) for the full design rationale and comparison with Reflexion, LangGraph, CrewAI, and others. - ## Install -**Recommended** (works with any Agent Skills-compatible tool): +**Recommended**: ```bash npx skills add giannimassi/agent-retro ``` -**Or clone manually** into your skills directory: +**Manual clone**: ```bash # Claude Code git clone https://github.com/giannimassi/agent-retro.git ~/.claude/skills/agent-retro -# Gemini CLI (when supported) -# git clone https://github.com/giannimassi/agent-retro.git ~/.gemini/skills/agent-retro +# OpenAI Codex +git clone https://github.com/giannimassi/agent-retro.git ~/.codex/skills/agent-retro ``` ## Usage At the end of any session: -``` +```text /agent-retro ``` The skill will: -1. Find your current session's transcript -2. Extract structured data (streaming, no full file load) -3. Analyze the conversation arc for friction patterns -4. Classify what the session produced -5. Propose concrete improvements -6. Walk you through each proposal for approval +1. Discover the current session transcript +2. Verify the provider and candidate file +3. Extract structured data +4. Analyze the conversation arc +5. Classify outcomes and friction +6. Propose concrete follow-up actions -### Extraction script standalone +## Extraction script -The Python extraction script can be used independently: +The bundled script supports both runtimes. ```bash -# Quick session verification (reads only first/last 64KB) -python3 scripts/extract.py --metadata-only +# Discover and verify the current session +python3 scripts/extract.py --discover-current --provider auto --metadata-only -# Compact extraction (tool counts, no individual calls) -python3 scripts/extract.py --summary +# Compact extraction for the current session +python3 scripts/extract.py --discover-current --provider auto --summary -# Full extraction (includes every tool call detail) -python3 scripts/extract.py +# Extract a known transcript path +python3 scripts/extract.py --provider auto ``` +Supported providers: +- `auto` +- `claude` +- `codex` + ## How it works -### Token-efficient extraction +### Shared core plus provider adapters -The extraction script (`scripts/extract.py`) minimizes token usage: +The repo uses one shared retro workflow plus provider-specific session adapters: -- **Streaming** — processes JSONL line-by-line, never loads the full file into memory -- **No tool result content** — tracks result **sizes** without including the content. A 50MB session transcript produces ~30KB of extraction output. -- **Head/tail metadata** — `--metadata-only` reads only the first and last 64KB of the file for session verification, borrowing a technique from Claude Code's internal `readSessionLite` function. -- **Full conversation arc** — all user messages and assistant text are preserved. The arc is the whole point of a retro — you can't analyze friction you can't see. +- Claude adapter: `~/.claude/sessions` and `~/.claude/projects/...` +- Codex adapter: `~/.codex/sessions/.../rollout-*.jsonl` +- Shared workflow: classification, friction analysis, action proposals, retro markdown format ### What the extraction captures | Field | What | Why | |---|---|---| -| `session` | ID, cwd, branch, duration | Context | -| `tokens` | Input/output/cache totals + USD cost | Budget analysis | -| `tools` | Call counts or full call list | Pattern detection | -| `agents` | Each dispatch with type, model, cost | Delegation efficiency | -| `skills` | Each invocation with args | Skill triggering analysis | -| `git` | Branches, commits, PR operations | What was shipped | -| `files` | Files read/written/edited | Scope tracking | -| `conversation_arc` | Full timeline of messages | Friction detection | +| `provider` | Runtime that produced the transcript | Adapter routing | +| `session` | ID, cwd, branch when available, duration | Context | +| `tokens` | Token totals and estimated USD cost when available | Budget analysis | +| `tools` | Tool counts or full call list | Pattern detection | +| `agents` | Agent dispatches with type/model when available | Delegation analysis | +| `skills` | Explicit skill invocations when exposed by the runtime | Skill triggering analysis | +| `git` | Branch, commit, and PR operations | What was shipped | +| `files` | Files read, written, or edited | Scope tracking | +| `conversation_arc` | Full message timeline | Friction detection | | `tool_result_sizes` | Per-tool total/avg/max bytes | Waste detection | ## Example output -See [examples/sample-retro.md](examples/sample-retro.md) for a retro from a real session. - -## Roadmap +See [examples/sample-retro.md](examples/sample-retro.md) for a Claude retro example. The markdown structure is shared across providers. -This skill currently only works with **Claude Code**. The analysis steps (friction detection, root cause tracing, action proposals) are agent-agnostic — only the transcript reading is Claude-specific. - -Planned support: +## Runtime status | Agent | Transcript format | Status | |---|---|---| | **Claude Code** | `~/.claude/projects//.jsonl` | Supported | -| **Gemini CLI** | `~/.gemini/sessions/` | Planned | -| **OpenAI Codex** | TBD | Planned | +| **OpenAI Codex** | `~/.codex/sessions/**/rollout-*.jsonl` | Supported | +| **Gemini CLI** | TBD | Planned | | **Cursor** | TBD | Planned | | **Roo Code** | TBD | Planned | -Contributions welcome — if you know where another agent stores its session data, open an issue. - ## Requirements -- Python 3.8+ (stdlib only, no dependencies) -- Currently: Claude Code v2.1.59+ +- Python 3.8+ +- Claude Code for Claude transcripts +- OpenAI Codex for Codex transcripts ## License diff --git a/SKILL.md b/SKILL.md index d756301..6f666d5 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,245 +1,43 @@ --- name: agent-retro -description: Run a conversation retrospective — analyze what happened in this session, what worked, what didn't, and propose concrete improvements. Use when the user says "retro", "retrospective", "what happened in this session", "session review", "what did we do", "analyze this conversation", or when wrapping up a long session and wanting to capture lessons. Especially useful after using a skill you're developing — identifies what can be improved about the skill, rules, setup, or process. -compatibility: Requires Claude Code. Reads session transcripts from ~/.claude/projects/ (JSONL format). Python 3.8+ for the extraction script (stdlib only). Other agents are on the roadmap. +description: Run a conversation retrospective — analyze what happened in this session, what worked, what didn't, and propose concrete improvements. Use when the user says "retro", "retrospective", "what happened in this session", "session review", "what did we do", or "analyze this conversation". Works in Claude Code and OpenAI Codex by reading the local session transcript and producing a structured retro plus follow-up actions. metadata: author: giannimassi - version: "0.1.0" + version: "0.2.0" --- -# /retro — Conversation Retrospective +# /agent-retro — Conversation Retrospective -Analyze the current session end-to-end: what happened, what it produced, what worked, what didn't, and what to improve. The output is a structured retro file plus a set of proposed actions you walk through with the user. +Analyze the current session end-to-end: what happened, what it produced, what worked, what did not, and what to improve. The output is a structured retro file plus a set of proposed actions you walk through with the user. -## Step 1: Extract Session Data +## Step 1: Discover The Runtime And Transcript -The full conversation lives in a JSONL file on disk — including tool calls, agent dispatches, and messages that may have been compacted from current context. Always use this source, not just what's in context. +Always work from the session transcript on disk, not just current context. -### Find the current session - -The session file links a running process to its transcript. But session files are keyed by PID and may be cleaned up when processes end, so use a two-step approach with verification. - -**Step A: Try the sessions directory first** -```bash -ls -t ~/.claude/sessions/*.json | head -5 -``` - -Read the most recent file(s). Match by `cwd` (should equal current working directory). The `sessionId` maps to: - -``` -~/.claude/projects//.jsonl -``` - -Where `` replaces `/` with `-` (e.g., `/Users/foo/dev/myproject` → `-Users-foo-dev-myproject`). - -**Step B: If no session file matches** (PID rotated, file cleaned up), fall back to the most recently modified `.jsonl` in the project directory: -```bash -ls -t ~/.claude/projects//*.jsonl | head -3 -``` - -**Step C: Verify you have the right file.** Use `--metadata-only` for cheap verification — it reads only the first/last 64KB of the file (no full parse): -```bash -python3 ${CLAUDE_SKILL_DIR}/scripts/extract.py --metadata-only -``` - -This returns session_id, cwd, git_branch, first_prompt, timestamps, and file size. Check `first_prompt` matches what was said at the start of the conversation. - -If it doesn't match, try the next most recent file. If none match, state this in the retro output — analyzing the wrong session is worse than no analysis. - -### Run extraction +Start with the bundled extractor: ```bash -python3 ${CLAUDE_SKILL_DIR}/scripts/extract.py --summary +python3 scripts/extract.py --discover-current --provider auto --metadata-only ``` -Use `--summary` to get compact output (tool counts only, no individual call listings). If you need to drill into specific tool calls later, re-run without `--summary`. - -The script outputs JSON with: -- **session**: id, cwd, branch, duration, branches seen -- **tokens**: input/output/cache totals + estimated USD cost -- **tools**: call counts by tool name -- **agents**: each dispatch with type, model, description, prompt preview, subagent tokens -- **skills**: each skill invocation with name and args -- **git**: branches, commits, PR operations -- **files**: files read/written/edited -- **conversation_arc**: complete timeline of user messages and assistant text responses (tells the full story of the session — all messages preserved, no sampling) -- **tool_result_sizes**: per-tool breakdown of total/avg/max result sizes in bytes — use this to identify token waste (e.g., a 45KB Read result that was never referenced again) - -Note: tool result **content** is not included in the extraction (it would dominate the output). Only the size is tracked. If you need to see what a specific tool returned, grep the JSONL directly for the tool_use_id. - -### Understanding the cost data - -The JSONL records cache-aware token usage: -- `input_tokens`: non-cached input (typically very low due to prompt caching) -- `cache_creation_input_tokens`: new cache writes (charged at 1.25x input rate) -- `cache_read_input_tokens`: cache hits (charged at 0.1x input rate — this is usually the biggest number) -- `output_tokens`: generated tokens (most expensive per-token) - -The script computes estimated USD cost using Opus pricing. For sessions with subagents, the subagent cost is separate and additive. - -### If extraction fails - -Fall back to analyzing current conversation context. Note this limitation — compacted content is lost. - -## Step 2: Read the Conversation Arc - -The `conversation_arc` field is the story of the session — user requests and assistant responses in chronological order. Read it to understand: - -1. **What the user actually asked for** (their words, not your interpretation) -2. **How the approach evolved** — did the plan change? Were there pivots? -3. **Where friction occurred** — look for user corrections, redirects, or repeated instructions - -This is the foundation for everything that follows. Don't skip it. - -## Step 3: Classify Outcomes - -What did this session actually produce? List all that apply. - -| Outcome | Detection signal | -|---|---| -| New code | Write/Edit to source files, git commits | -| Bug fix | Commits with "fix" prefix, debugging patterns | -| Communication | Slack/email MCP calls, PR comments | -| Local files | Writes to non-code files (plans, notes, docs) | -| Setup changes | Edits to config, CLAUDE.md, settings, skills | -| Spec / Design | Plan files, design discussion, no implementation | -| Process improvement | Rule updates, workflow changes, skill creation | -| Review | PR review calls, code-review agents | -| Research | Heavy Read/Grep with minimal Write/Edit | -| Skill development | Writes to `skills/` directories | - -## Step 4: Analyze What Worked - -Look for these concrete patterns — don't just say "things went well": - -- **First-try success**: a tool call or agent dispatch that produced the right result without retries. Name the specific call. -- **Efficient delegation**: a subagent that cost < $0.50 and produced a useful result. Compare to what it would have cost to do inline. -- **Good skill match**: a skill triggered at the right time. What was the trigger phrase? Did the skill's output align with what the user wanted? -- **Clean conversation flow**: stretches where the user didn't need to correct or redirect. What made those stretches smooth? -- **Smart tool choice**: using Grep instead of spawning an Explore agent, or vice versa — whichever was more efficient for the situation. +This returns: +- `provider` +- `transcript_path` +- `session_id` +- `cwd` +- `git_branch` when available +- `first_prompt` +- timestamps and file size -## Step 5: Analyze What Didn't Work - -This is the most valuable part. Go beyond listing problems — trace each one to its root cause. - -### How to identify friction - -Read the conversation arc. Look for these patterns in user messages: -- **Corrections**: "no", "not that", "wrong", "that's not what I meant" -- **Redirects**: "instead do X", "let's try a different approach" -- **Repetitions**: "I already said", "like I mentioned", "I asked for" -- **Stops**: "wait", "hold on", "stop", "undo", "revert" -- **Frustration**: short terse responses after previously being engaged - -For each friction point, trace the causal chain: - -``` -User correction -> What did Claude do wrong -> Why did Claude do that -> -Was it a wrong assumption? Missing context? Bad skill guidance? Wrong tool? -``` - -### Specific failure patterns to check - -**Wasted agent dispatches**: Compare each agent's token cost to the usefulness of its output. If an agent cost > $1 and its result was discarded or only partially used, that's a failure. Root cause: was the prompt too vague? Wrong agent type? Missing context in the dispatch? - -**Oversized tool results**: Check `tool_result_sizes` — if a tool (especially Read) returned huge results (>10KB avg) that weren't meaningfully used, that's token waste. Could Claude have used offset/limit to read just the relevant section? Could an Explore agent have answered the question without loading the full file into context? - -**Tool call retries**: Same tool called 3+ times in a row with different inputs. Root cause: was the first attempt a guess? Should Claude have read more context first? - -**Abandoned approaches**: Stretches of work (5+ tool calls) followed by a pivot to a completely different approach. Root cause: did Claude commit too early before understanding the problem? Should it have asked? - -**Over-engineering**: More tool calls or agent dispatches than the task warranted. Root cause: did a skill push Claude toward a heavyweight process when something simpler would have worked? - -**Under-specification**: Claude asked clarifying questions the user shouldn't have needed to answer (information was available in files, context, or memory). Root cause: missing research step? Skill didn't tell Claude where to look? - -### For skill-development retros - -When the session involved using a skill under development, go deeper: - -1. **Read the skill's SKILL.md** that was active during the session -2. For each friction point, identify which skill instruction (or missing instruction) caused it -3. Categorize skill issues: - - **Triggering**: skill should have triggered but didn't, or triggered when it shouldn't have - - **Missing guidance**: skill didn't cover an edge case the session encountered - - **Wrong guidance**: skill told Claude to do X but Y would have been better - - **Over-specification**: skill was too rigid, forced a workflow that didn't fit the situation - - **Under-specification**: skill left too much to Claude's judgment in an area where it consistently makes bad choices - - **Missing tool/script**: skill described a manual process that should have been automated -4. For each issue, draft the specific SKILL.md edit that would fix it (not vague "improve X" — write the actual text change) - -## Step 6: Propose Actions - -For every issue from Step 5, propose a concrete action: - -| Type | What it means | Must include | -|---|---|---| -| `skill-update` | Edit an existing skill | The specific text to change and why | -| `skill-create` | New skill needed | What it would do and when it triggers | -| `rule-update` | Edit CLAUDE.md or rules/ | The rule text and which file | -| `rule-create` | New rule file | The rule content | -| `setup-change` | Config, hooks, tools | What to change and where | -| `memory-update` | Save to auto-memory | The fact to remember | -| `investigate` | Needs more research | What question to answer | -| `acknowledge` | No systemic fix | Why this was one-off | - -**Priority order**: systemic fixes (skill/rule) > setup changes > one-offs. - -For skill-update actions specifically, include: -- Which section of SKILL.md to edit -- The before text (or "new section after X") -- The after text -- Which test case / friction point this addresses - -## Step 7: Write the Retro File - -Save to `~/.claude/worklog/retros/YYYY-MM-DD-.md` (create the directory if it doesn't exist). - -```markdown -# Retro: - -**Date**: YYYY-MM-DD -**Duration**: Xh Ym -**Session ID**: -**Branch**: -**Transcript**: `` -**Estimated cost**: $X.XX (main) + $Y.YY (subagents) = $Z.ZZ total - -## What Happened - - -## Outcomes -- : - -## Token Budget -| Component | Output tokens | Cache read | Cache write | Est. cost | -|---|---|---|---|---| -| Main context | X | X | X | $X.XX | -| Agent: () | X | X | X | $X.XX | -| **Total** | | | | **$X.XX** | - -## Tool Result Waste - - -## What Worked -- ****: - -## What Didn't Work -- ****: why -> systemic cause> - -## Actions -| # | Type | Action | Where | Status | -|---|------|--------|-------|--------| -| 1 | skill-update | | | proposed | -| 2 | rule-update | | | proposed | -``` +Check that `cwd` matches the current project and that `first_prompt` matches the start of the conversation. -## Step 8: Walk Through Actions +Then load the provider-specific guidance: +- If `provider` is `claude`, read [references/provider-claude.md](references/provider-claude.md) +- If `provider` is `codex`, read [references/provider-codex.md](references/provider-codex.md) -Present the retro summary in conversation, then walk through proposed actions one by one with the user. +Once the provider is confirmed, read [references/workflow-core.md](references/workflow-core.md) and follow it for the rest of the retro. -For actions the user approves: -- **skill-update / rule-update / rule-create**: make the edit, show the diff -- **memory-update**: save via the memory system -- **setup-change**: apply the config change +## Fallbacks -Update the retro file's action table as you go (proposed -> done / deferred / rejected). +- If `--discover-current` fails, use the manual discovery steps in the provider reference. +- If extraction fails, fall back to analyzing current conversation context and explicitly note that compacted or omitted transcript content may be missing. diff --git a/agents/openai.yaml b/agents/openai.yaml new file mode 100644 index 0000000..8f4b5a6 --- /dev/null +++ b/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Agent Retro" + short_description: "Review agent sessions and propose improvements" + default_prompt: "Run a retrospective on this session and tell me what worked, what did not, and what to improve." diff --git a/references/provider-claude.md b/references/provider-claude.md new file mode 100644 index 0000000..25315b5 --- /dev/null +++ b/references/provider-claude.md @@ -0,0 +1,67 @@ +# Claude Provider + +Use this reference when `python3 scripts/extract.py --discover-current --provider auto --metadata-only` reports `provider: claude`. + +## Discover the current session + +Preferred path: + +```bash +python3 scripts/extract.py --discover-current --provider claude --metadata-only +``` + +If discovery fails, use the manual fallback: + +1. Inspect the recent session index files: + +```bash +ls -t ~/.claude/sessions/*.json | head -5 +``` + +2. Read the most recent file whose `cwd` matches the current project. Its `sessionId` maps to: + +```text +~/.claude/projects//.jsonl +``` + +Where `` is the working directory with `/` replaced by `-`. + +3. If no session index file matches, fall back to the newest transcripts in the project directory: + +```bash +ls -t ~/.claude/projects//*.jsonl | head -3 +``` + +4. Verify the candidate transcript: + +```bash +python3 scripts/extract.py --provider claude --metadata-only +``` + +Confirm that `cwd` and `first_prompt` match the current session. + +## Run extraction + +```bash +python3 scripts/extract.py --discover-current --provider claude --summary +``` + +If you need raw call details: + +```bash +python3 scripts/extract.py --provider claude +``` + +## Claude-specific notes + +- Claude transcripts expose cache-aware token fields directly. +- Estimated USD cost uses Claude Opus pricing. +- Subagent cost attribution is available when the sibling subagents directory exists or when you pass `--subagents-dir`. + +## Retro file location + +Write retros to: + +```text +~/.claude/worklog/retros/YYYY-MM-DD-.md +``` diff --git a/references/provider-codex.md b/references/provider-codex.md new file mode 100644 index 0000000..c27d46f --- /dev/null +++ b/references/provider-codex.md @@ -0,0 +1,52 @@ +# Codex Provider + +Use this reference when `python3 scripts/extract.py --discover-current --provider auto --metadata-only` reports `provider: codex`. + +## Discover the current session + +Preferred path: + +```bash +python3 scripts/extract.py --discover-current --provider codex --metadata-only +``` + +If discovery fails, inspect recent rollout transcripts manually: + +```bash +find ~/.codex/sessions -type f -name 'rollout-*.jsonl' | sort | tail -10 +``` + +Then verify the likely candidate: + +```bash +python3 scripts/extract.py --provider codex --metadata-only +``` + +Confirm that `cwd` matches the current project and `first_prompt` matches the start of the session. + +## Run extraction + +```bash +python3 scripts/extract.py --discover-current --provider codex --summary +``` + +If you need raw call details: + +```bash +python3 scripts/extract.py --provider codex +``` + +## Codex-specific notes + +- Codex transcripts are event-based rather than message-block-based. +- Token totals come from the latest `token_count` event. +- Estimated USD cost is left unavailable by default because local Codex transcripts do not expose a stable pricing model in the same way as Claude. +- File edits are inferred from `apply_patch` calls when present. + +## Retro file location + +Write retros to: + +```text +~/.codex/worklog/retros/YYYY-MM-DD-.md +``` diff --git a/references/workflow-core.md b/references/workflow-core.md new file mode 100644 index 0000000..5082000 --- /dev/null +++ b/references/workflow-core.md @@ -0,0 +1,196 @@ +# Shared Retro Workflow + +Use this workflow after you have identified the runtime, verified the transcript, and run extraction. + +## Run extraction + +Use the bundled script with the confirmed provider: + +```bash +python3 scripts/extract.py --discover-current --provider --summary +``` + +If you already know the transcript path: + +```bash +python3 scripts/extract.py --provider --summary +``` + +Use `--summary` first so the tool output stays compact. Re-run without `--summary` only if you need to inspect individual call details. + +The extraction output includes: +- `provider`: runtime that produced the transcript +- `session`: id, cwd, branch when available, duration, version +- `tokens`: token totals and estimated USD cost when available +- `tools`: tool call counts, and optionally individual call listings +- `agents`: agent dispatches with type, model, and prompt preview when available +- `skills`: explicit skill invocations when the runtime exposes them +- `git`: branches, commits, and PR operations +- `files`: files read, written, or edited +- `conversation_arc`: chronological user and assistant messages +- `tool_result_sizes`: total, average, and max tool-result sizes in bytes + +Tool result content is intentionally excluded from the extraction. Only result sizes are tracked. + +## Step 2: Read the conversation arc + +The `conversation_arc` is the story of the session. Read it to understand: + +1. What the user actually asked for +2. How the approach evolved +3. Where friction occurred + +Do not skip this. Everything else depends on it. + +## Step 3: Classify outcomes + +List all outcomes that apply. + +| Outcome | Detection signal | +|---|---| +| New code | Source-file writes or edits, git commits | +| Bug fix | Debugging patterns, fix commits | +| Communication | PR comments, chat/email tools | +| Local files | Writes to docs, plans, notes | +| Setup changes | Config or environment edits | +| Spec / Design | Design discussion without implementation | +| Process improvement | Rule, workflow, or skill updates | +| Review | Review tools, PR review operations | +| Research | Heavy reading/searching with little writing | +| Skill development | Writes to skill directories or skill docs | + +## Step 4: Analyze what worked + +Look for concrete patterns: + +- First-try success: a tool call or agent dispatch that worked without retries +- Efficient delegation: an agent whose output justified its cost or latency +- Good skill match: the right skill or workflow invoked at the right time +- Clean conversation flow: stretches without user correction or redirection +- Smart tool choice: a lightweight tool used where a heavyweight step was unnecessary, or vice versa + +## Step 5: Analyze what did not work + +This is the most valuable part. Trace each problem to a root cause. + +### How to identify friction + +Look for these patterns in user messages: +- Corrections: "no", "wrong", "that's not what I meant" +- Redirects: "instead do X", "try a different approach" +- Repetitions: "I already said", "like I mentioned" +- Stops: "wait", "hold on", "stop", "undo" +- Frustration: terse replies after a previously engaged flow + +For each friction point, trace the chain: + +```text +User correction -> What the agent did wrong -> Why -> +Wrong assumption? Missing context? Bad skill guidance? Wrong tool? +``` + +### Failure patterns to check + +**Wasted agent dispatches**: agent output was discarded, vague, or cost far more than it helped. + +**Oversized tool results**: a tool returned large content that was not meaningfully used. This is especially important for file reads and command output. + +**Tool call retries**: the same tool called multiple times with guesswork inputs instead of reading enough context first. + +**Abandoned approaches**: a stretch of work followed by a pivot to a completely different approach. + +**Over-engineering**: more tools or agents than the task warranted. + +**Under-specification**: the agent asked the user for information that could have been discovered in files, context, or local state. + +### For skill-development retros + +If the session involved a skill under development: + +1. Read the active skill's `SKILL.md` +2. Map each friction point to a specific instruction or missing instruction +3. Categorize the issue: + - Triggering + - Missing guidance + - Wrong guidance + - Over-specification + - Under-specification + - Missing tool or script +4. Draft the exact text change that would fix it + +## Step 6: Propose actions + +For each issue, propose one concrete action. + +| Type | Meaning | Must include | +|---|---|---| +| `skill-update` | Edit an existing skill | Exact text change and why | +| `skill-create` | New skill needed | What it does and when it triggers | +| `rule-update` | Edit a rule or local instructions file | Rule text and file | +| `rule-create` | New rule file | Rule content | +| `setup-change` | Config, hooks, scripts, tools | Exact change and where | +| `memory-update` | Save a durable lesson | Fact to remember | +| `investigate` | More research needed | Precise unresolved question | +| `acknowledge` | One-off, no systemic fix | Why it should not become process | + +Priority order: systemic fixes first, then setup changes, then one-offs. + +For `skill-update`, include: +- which section to edit +- before text or insertion point +- after text +- which friction point it addresses + +## Step 7: Write the retro file + +Save the file to the runtime-specific path from the provider reference. + +```markdown +# Retro: + +**Date**: YYYY-MM-DD +**Duration**: Xh Ym +**Provider**: +**Session ID**: +**Branch**: +**Transcript**: `` +**Estimated cost**: $X.XX total, or `unavailable` if no runtime pricing is available + +## What Happened + + +## Outcomes +- : + +## Token Budget +| Component | Output tokens | Cache read | Cache write | Est. cost | +|---|---|---|---|---| +| Main context | X | X | X | $X.XX or unavailable | +| Agent: () | X | X | X | $X.XX or unavailable | +| **Total** | | | | **$X.XX or unavailable** | + +## Tool Result Waste + + +## What Worked +- ****: + +## What Didn't Work +- ****: + +## Actions +| # | Type | Action | Where | Status | +|---|------|--------|-------|--------| +| 1 | skill-update | | | proposed | +``` + +## Step 8: Walk through actions + +Present the retro summary in conversation, then walk through proposed actions one by one. + +For actions the user approves: +- `skill-update`, `rule-update`, `rule-create`: make the edit and show the diff +- `memory-update`: save it through the available memory system +- `setup-change`: apply the exact config change + +Update the retro file table as you go from `proposed` to `done`, `deferred`, or `rejected`. diff --git a/scripts/extract.py b/scripts/extract.py index 6a37bf7..25bb9a5 100644 --- a/scripts/extract.py +++ b/scripts/extract.py @@ -1,600 +1,169 @@ #!/usr/bin/env python3 """ -Extract structured data from a Claude Code session transcript (JSONL). +Extract structured data from supported agent session transcripts. + +Supported providers: +- Claude Code +- OpenAI Codex Usage: - python extract.py [--subagents-dir ] [--summary] [--metadata-only] + python extract.py [--provider auto|claude|codex] + python extract.py --discover-current [--provider auto|claude|codex] Outputs JSON to stdout. Use --summary for a compact version that omits -individual tool call details (just counts and key events). -Use --metadata-only for cheap session verification (head/tail read only). +individual tool call details. Use --metadata-only for cheap session verification. """ +from __future__ import annotations + +import argparse import json -import sys import os -import glob -from collections import Counter, defaultdict +import sys from pathlib import Path -from datetime import datetime -# Approximate pricing per million tokens (Claude Opus 4.6) -# Update these when Anthropic changes pricing and bump PRICING_LAST_VERIFIED. -PRICING_LAST_VERIFIED = "2026-04-06" -PRICE_PER_M = { - "input": 15.0, - "output": 75.0, - "cache_create": 18.75, # 1.25x input - "cache_read": 1.50, # 0.1x input +from providers import claude, codex +from providers.common import ( + LITE_READ_BUF_SIZE, + SCHEMA_VERSION, + extract_json_field, + parse_ts, + read_head_tail, + stream_jsonl, +) + +PROVIDERS = { + "claude": claude, + "codex": codex, } -SCHEMA_VERSION = "0.1.0" - -# Head/tail buffer size for lite reads (matches Claude Code's LITE_READ_BUF_SIZE) -LITE_READ_BUF_SIZE = 65536 - - -def stream_jsonl(path): - """Yield parsed records one at a time without loading the full file.""" - with open(path) as f: - for line in f: - line = line.strip() - if line: - try: - yield json.loads(line) - except json.JSONDecodeError: - continue - - -def read_head_tail(path): - """Read first and last 64KB of a file. Returns (head_str, tail_str, file_size).""" - size = os.path.getsize(path) - with open(path, "rb") as f: - head_bytes = f.read(LITE_READ_BUF_SIZE) - head = head_bytes.decode("utf-8", errors="replace") - - if size <= LITE_READ_BUF_SIZE: - return head, head, size - - f.seek(max(0, size - LITE_READ_BUF_SIZE)) - tail_bytes = f.read(LITE_READ_BUF_SIZE) - tail = tail_bytes.decode("utf-8", errors="replace") - - return head, tail, size - - -def extract_json_field(text, key): - """Extract a JSON string field value without full parsing (regex-free). - Matches '"key":"value"' or '"key": "value"' patterns.""" - for pattern in [f'"{key}":"', f'"{key}": "']: - idx = text.find(pattern) - if idx < 0: - continue - start = idx + len(pattern) - i = start - while i < len(text): - if text[i] == "\\": - i += 2 - continue - if text[i] == '"': - return text[start:i] - i += 1 - return None - - -def extract_metadata_lite(path): - """Extract session metadata from head/tail only — no full parse. - Used for session verification and discovery.""" - head, tail, size = read_head_tail(path) - - # Extract from head (start of session) - session_id = extract_json_field(head, "sessionId") - cwd = extract_json_field(head, "cwd") - git_branch = extract_json_field(head, "gitBranch") - version = extract_json_field(head, "version") - start_time = extract_json_field(head, "timestamp") - - # Extract from tail (end of session) - end_time = extract_json_field(tail, "timestamp") - # Scan tail backwards for the last timestamp - for line in reversed(tail.split("\n")): - ts = extract_json_field(line, "timestamp") - if ts: - end_time = ts - break - - # First user message for verification - first_prompt = None - for line in head.split("\n"): - if '"role":"user"' not in line and '"role": "user"' not in line: - continue - if '"tool_result"' in line: - continue - # Try to extract text content - text = extract_json_field(line, "text") - if text and not text.startswith(""): - first_prompt = text[:200] - break - - duration_seconds = None - if start_time and end_time: - start = parse_ts(start_time) - end = parse_ts(end_time) - if start and end: - duration_seconds = round((end - start).total_seconds()) - - return { - "session_id": session_id, - "cwd": cwd, - "git_branch": git_branch, - "version": version, - "start_time": start_time, - "end_time": end_time, - "duration_seconds": duration_seconds, - "file_size_bytes": size, - "first_prompt": first_prompt, - } - - -def parse_ts(ts_str): - """Parse ISO 8601 timestamp string to datetime.""" - if not ts_str: - return None - try: - return datetime.fromisoformat(ts_str.replace("Z", "+00:00")) - except (ValueError, TypeError): - return None - - -def extract_all_streaming(jsonl_path, subagents_dir=None, summary_mode=False): - """Main extraction pipeline using streaming — processes line-by-line.""" - - # Session metadata - session = { - "session_id": None, - "cwd": None, - "git_branch": None, - "version": None, - "start_time": None, - "end_time": None, - "duration_seconds": None, - "branches_seen": set(), - } - - # Token totals - tokens_total = { - "input_tokens": 0, - "output_tokens": 0, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - } - turn_count = 0 - - # Tool tracking - tool_calls = [] - tool_counts = Counter() - total_tool_calls = 0 - - # Tool result sizes (tool_use_id -> size in bytes) - tool_result_sizes = {} - - # Conversation arc - arc = [] - - # Git tracking - branches = set() - commits = [] - prs = [] - - # File tracking - files = defaultdict(set) - - for rec in stream_jsonl(jsonl_path): - # --- Session metadata --- - if rec.get("sessionId") and not session["session_id"]: - session["session_id"] = rec["sessionId"] - if rec.get("cwd") and not session["cwd"]: - session["cwd"] = rec["cwd"] - if rec.get("gitBranch"): - if not session["git_branch"]: - session["git_branch"] = rec["gitBranch"] - session["branches_seen"].add(rec["gitBranch"]) - branches.add(rec["gitBranch"]) - if rec.get("version") and not session["version"]: - session["version"] = rec["version"] - - ts = rec.get("timestamp") - if ts: - if not session["start_time"]: - session["start_time"] = ts - session["end_time"] = ts - - msg = rec.get("message", {}) - role = msg.get("role") - content = msg.get("content", "") - usage = msg.get("usage", {}) - - # --- Token usage (assistant messages only) --- - if usage and role == "assistant": - tokens_total["input_tokens"] += usage.get("input_tokens", 0) - tokens_total["output_tokens"] += usage.get("output_tokens", 0) - tokens_total["cache_creation_input_tokens"] += usage.get("cache_creation_input_tokens", 0) - tokens_total["cache_read_input_tokens"] += usage.get("cache_read_input_tokens", 0) - turn_count += 1 - - # --- Process content blocks --- - if isinstance(content, list): - for block in content: - if not isinstance(block, dict): - continue - - block_type = block.get("type") - - # Tool use blocks (assistant calling tools) - if block_type == "tool_use": - name = block.get("name", "unknown") - tool_input = block.get("input", {}) - tool_counts[name] += 1 - total_tool_calls += 1 +# Preserve the Claude pricing exports for downstream users and tests. +PRICING_LAST_VERIFIED = claude.PRICING_LAST_VERIFIED +PRICE_PER_M = claude.PRICE_PER_M - call_summary = { - "name": name, - "timestamp": ts, - "tool_use_id": block.get("id", ""), - } - if name == "Agent": - call_summary["agent_description"] = tool_input.get("description", "") - call_summary["agent_type"] = tool_input.get("subagent_type", "") - call_summary["agent_model"] = tool_input.get("model", "") - call_summary["agent_prompt_preview"] = tool_input.get("prompt", "")[:300] - call_summary["run_in_background"] = tool_input.get("run_in_background", False) - elif name == "Skill": - call_summary["skill_name"] = tool_input.get("skill", "") - call_summary["skill_args"] = tool_input.get("args", "") - elif name == "Bash": - call_summary["command"] = tool_input.get("command", "")[:300] - elif name in ("Read", "Write", "Edit"): - call_summary["file_path"] = tool_input.get("file_path", "") - elif name in ("Grep", "Glob"): - call_summary["pattern"] = tool_input.get("pattern", "") - elif name in ("TaskCreate", "TaskUpdate", "TaskList", "TaskOutput"): - call_summary["task_detail"] = { - k: v for k, v in tool_input.items() - if k in ("description", "status", "id") - } - elif name == "AskUserQuestion": - questions = tool_input.get("questions", []) - call_summary["questions"] = [q.get("question", "") for q in questions] - elif name.startswith("mcp__"): - call_summary["mcp_inputs_preview"] = json.dumps(tool_input)[:300] +def detect_provider(path): + """Detect the transcript provider from the file header.""" + head, _, _ = read_head_tail(path) + if claude.is_match(head): + return "claude" + if codex.is_match(head): + return "codex" + raise ValueError(f"Unable to detect transcript provider for {path}") - tool_calls.append(call_summary) - # Track files - fp = tool_input.get("file_path", "") - if fp: - if name == "Read": - files["read"].add(fp) - elif name == "Write": - files["written"].add(fp) - elif name == "Edit": - files["edited"].add(fp) +def resolve_provider(path, provider): + return detect_provider(path) if provider == "auto" else provider - # Track git activity from bash commands - if name == "Bash": - cmd = tool_input.get("command", "") - if "git commit" in cmd: - commits.append({"command": cmd[:200], "timestamp": ts}) - if "gh pr" in cmd: - prs.append({"command": cmd[:200], "timestamp": ts}) - # Tool result blocks — capture SIZE only, not content - elif block_type == "tool_result": - tool_use_id = block.get("tool_use_id", "") - result_content = block.get("content", "") - if isinstance(result_content, str): - size_bytes = len(result_content.encode("utf-8", errors="replace")) - elif isinstance(result_content, list): - # Multi-block results (e.g., images + text) - size_bytes = 0 - for rb in result_content: - if isinstance(rb, dict): - text = rb.get("text", "") - if text: - size_bytes += len(text.encode("utf-8", errors="replace")) - # Image/binary blocks — estimate from base64 if present - data = rb.get("data", "") - if data: - size_bytes += len(data) - elif isinstance(rb, str): - size_bytes += len(rb.encode("utf-8", errors="replace")) - else: - size_bytes = len(json.dumps(result_content).encode("utf-8")) +def discover_current_session(provider="auto", cwd=None): + """Locate the most likely current session transcript for the working directory.""" + cwd = cwd or os.getcwd() - if tool_use_id: - tool_result_sizes[tool_use_id] = size_bytes + if provider == "auto": + candidates = [] + for provider_name, module in PROVIDERS.items(): + path = module.discover_current_session(cwd) + if path: + candidates.append((provider_name, path)) + if not candidates: + raise FileNotFoundError(f"No supported session transcript found for cwd {cwd}") + return max(candidates, key=lambda item: os.path.getmtime(item[1])) - # Text blocks — conversation arc (both assistant AND user) - elif block_type == "text": - text = block.get("text", "").strip() - if role == "assistant" and text and len(text) > 20: - arc.append({ - "role": "assistant", - "text": text[:1000], - "timestamp": ts, - }) + module = PROVIDERS[provider] + path = module.discover_current_session(cwd) + if not path: + raise FileNotFoundError(f"No {provider} session transcript found for cwd {cwd}") + return provider, path - # After processing all blocks in a list-format user message, - # collect text blocks into the arc - if role == "user" and isinstance(content, list): - user_text = "" - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - user_text += block.get("text", "") - elif isinstance(block, str): - user_text += block - user_text = user_text.strip() - if user_text and not user_text.startswith(""): - arc.append({ - "role": "user", - "text": user_text[:2000], - "timestamp": ts, - }) - # User messages with string content (simple format) - elif role == "user": - text = "" - if isinstance(content, str): - text = content - text = text.strip() - if text and not text.startswith(""): - arc.append({ - "role": "user", - "text": text[:2000], - "timestamp": ts, - }) - - # --- Post-processing --- +def extract_metadata_lite(path, provider="auto"): + resolved_provider = resolve_provider(path, provider) + result = PROVIDERS[resolved_provider].extract_metadata_lite(path) + result["provider"] = resolved_provider + result["transcript_path"] = str(Path(path)) + return result - # Compute duration - if session["start_time"] and session["end_time"]: - start = parse_ts(session["start_time"]) - end = parse_ts(session["end_time"]) - if start and end: - session["duration_seconds"] = round((end - start).total_seconds()) - session["branches_seen"] = sorted(session["branches_seen"]) - # Compute cost - cost = ( - tokens_total["input_tokens"] / 1_000_000 * PRICE_PER_M["input"] - + tokens_total["output_tokens"] / 1_000_000 * PRICE_PER_M["output"] - + tokens_total["cache_creation_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_create"] - + tokens_total["cache_read_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_read"] +def extract_all_streaming(jsonl_path, subagents_dir=None, summary_mode=False, provider="auto"): + resolved_provider = resolve_provider(jsonl_path, provider) + result = PROVIDERS[resolved_provider].extract_all_streaming( + jsonl_path, + subagents_dir=subagents_dir, + summary_mode=summary_mode, ) - - # Attach result sizes to tool calls - for call in tool_calls: - tid = call.get("tool_use_id", "") - if tid in tool_result_sizes: - call["result_size_bytes"] = tool_result_sizes[tid] - - # Compute tool result size stats - result_size_stats = {} - if tool_result_sizes: - sizes_by_tool = defaultdict(list) - for call in tool_calls: - if "result_size_bytes" in call: - sizes_by_tool[call["name"]].append(call["result_size_bytes"]) - - for tool_name, sizes in sorted(sizes_by_tool.items(), key=lambda x: -sum(x[1])): - result_size_stats[tool_name] = { - "count": len(sizes), - "total_bytes": sum(sizes), - "avg_bytes": round(sum(sizes) / len(sizes)), - "max_bytes": max(sizes), - } - - # Extract agents - agents = _extract_agents(tool_calls, subagents_dir) - - # Extract skills - skills = [ - {"name": c.get("skill_name", ""), "args": c.get("skill_args", ""), "timestamp": c.get("timestamp")} - for c in tool_calls if c["name"] == "Skill" - ] - - # Warn if agents exist but have no cost data (subagents_dir missing) - agents_without_cost = [a for a in agents if a.get("estimated_cost_usd") is None - and a.get("description") and not a.get("description", "").startswith("[unmatched")] - if agents_without_cost: - print(f"Warning: {len(agents_without_cost)} agent dispatch(es) have no subagent cost data. " - f"Pass --subagents-dir to attribute subagent costs.", - file=sys.stderr) - - # Build result - result = { - "schema_version": SCHEMA_VERSION, - "session": session, - "tokens": { - "total": tokens_total, - "turn_count": turn_count, - "estimated_cost_usd": round(cost, 4), - }, - "agents": agents, - "skills": skills, - "git": { - "branches": sorted(branches), - "commits": commits, - "pr_operations": prs, - }, - "files": {k: sorted(v) for k, v in files.items()}, - "conversation_arc": arc, - "tool_result_sizes": result_size_stats, - } - - if summary_mode: - result["tools"] = { - "counts": dict(tool_counts.most_common()), - "total_calls": total_tool_calls, - } - else: - result["tools"] = { - "calls": tool_calls, - "counts": dict(tool_counts.most_common()), - "total_calls": total_tool_calls, - } - + result["provider"] = resolved_provider + result["schema_version"] = SCHEMA_VERSION + result["transcript_path"] = str(Path(jsonl_path)) return result -def _extract_agents(tool_calls, subagents_dir=None): - """Extract agent dispatch details and match with subagent JSONL files.""" - agents = [] - for call in tool_calls: - if call["name"] == "Agent": - agent = { - "description": call.get("agent_description", ""), - "type": call.get("agent_type", "") or "general-purpose", - "model": call.get("agent_model", "") or "inherited", - "prompt_preview": call.get("agent_prompt_preview", ""), - "background": call.get("run_in_background", False), - "timestamp": call.get("timestamp"), - "tool_use_id": call.get("tool_use_id", ""), - "tokens": None, - "estimated_cost_usd": None, - } - if "result_size_bytes" in call: - agent["result_size_bytes"] = call["result_size_bytes"] - agents.append(agent) - - if subagents_dir and os.path.isdir(subagents_dir): - _match_subagent_files(agents, subagents_dir) - - return agents - +def build_parser(): + parser = argparse.ArgumentParser( + description="Extract structured data from Claude Code or Codex session transcripts.", + ) + parser.add_argument( + "jsonl_path", + nargs="?", + help="Path to the session JSONL transcript.", + ) + parser.add_argument( + "--provider", + choices=("auto", "claude", "codex"), + default="auto", + help="Transcript provider. Defaults to auto-detection.", + ) + parser.add_argument( + "--discover-current", + action="store_true", + help="Discover the most recent transcript for the current working directory.", + ) + parser.add_argument( + "--cwd", + help="Working directory to match when using --discover-current. Defaults to the current shell cwd.", + ) + parser.add_argument( + "--subagents-dir", + help="Claude-only subagents directory used for subagent cost attribution.", + ) + parser.add_argument( + "--summary", + action="store_true", + help="Omit individual tool call listings and keep only aggregate counts.", + ) + parser.add_argument( + "--metadata-only", + action="store_true", + help="Read only lightweight metadata instead of full transcript extraction.", + ) + return parser -def _match_subagent_files(agents, subagents_dir): - """Match subagent JSONL files to dispatches using timestamp proximity.""" - subagent_files = sorted(glob.glob(os.path.join(subagents_dir, "*.jsonl"))) - MAX_MATCH_WINDOW_S = 60 - subagent_info = [] - for sa_file in subagent_files: - sa_tokens = {"input_tokens": 0, "output_tokens": 0, - "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0} - sa_start = None - turn_count = 0 +def main(argv=None): + parser = build_parser() + args = parser.parse_args(argv) - for rec in stream_jsonl(sa_file): - msg = rec.get("message", {}) - usage = msg.get("usage", {}) - if usage and msg.get("role") == "assistant": - sa_tokens["input_tokens"] += usage.get("input_tokens", 0) - sa_tokens["output_tokens"] += usage.get("output_tokens", 0) - sa_tokens["cache_creation_input_tokens"] += usage.get("cache_creation_input_tokens", 0) - sa_tokens["cache_read_input_tokens"] += usage.get("cache_read_input_tokens", 0) - turn_count += 1 - if sa_start is None and "timestamp" in rec: - sa_start = parse_ts(rec["timestamp"]) + if args.discover_current: + provider_name, jsonl_path = discover_current_session(provider=args.provider, cwd=args.cwd) + else: + if not args.jsonl_path: + parser.error("jsonl_path is required unless --discover-current is used") + jsonl_path = args.jsonl_path + provider_name = resolve_provider(jsonl_path, args.provider) - sa_cost = ( - sa_tokens["input_tokens"] / 1_000_000 * PRICE_PER_M["input"] - + sa_tokens["output_tokens"] / 1_000_000 * PRICE_PER_M["output"] - + sa_tokens["cache_creation_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_create"] - + sa_tokens["cache_read_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_read"] + if args.metadata_only: + result = extract_metadata_lite(jsonl_path, provider=provider_name) + else: + result = extract_all_streaming( + jsonl_path, + subagents_dir=args.subagents_dir, + summary_mode=args.summary, + provider=provider_name, ) - # Load meta file if present - meta = None - meta_file = sa_file.replace(".jsonl", ".meta.json") - if os.path.exists(meta_file): - with open(meta_file) as f: - meta = json.load(f) - - subagent_info.append({ - "file": os.path.basename(sa_file), - "tokens": sa_tokens, - "cost": round(sa_cost, 4), - "start_time": sa_start, - "meta": meta, - }) - - # Match by timestamp proximity - matched_dispatches = set() - matched_subagents = set() - - for sa_idx, sa in enumerate(subagent_info): - if not sa["start_time"]: - continue - best_match = None - best_delta = None - - for ag_idx, agent in enumerate(agents): - if ag_idx in matched_dispatches: - continue - dispatch_time = parse_ts(agent["timestamp"]) - if not dispatch_time: - continue - delta = abs((sa["start_time"] - dispatch_time).total_seconds()) - if delta <= MAX_MATCH_WINDOW_S and (best_delta is None or delta < best_delta): - best_match = ag_idx - best_delta = delta - - if best_match is not None: - agents[best_match]["tokens"] = sa["tokens"] - agents[best_match]["estimated_cost_usd"] = sa["cost"] - agents[best_match]["subagent_file"] = sa["file"] - agents[best_match]["match_delta_s"] = round(best_delta, 1) - if sa["meta"]: - agents[best_match]["meta"] = sa["meta"] - matched_dispatches.add(best_match) - matched_subagents.add(sa_idx) - - # Report unmatched subagents - for sa_idx, sa in enumerate(subagent_info): - if sa_idx not in matched_subagents: - agents.append({ - "description": f"[unmatched subagent: {sa['file']}]", - "type": "unknown", - "model": "unknown", - "prompt_preview": "", - "background": False, - "timestamp": str(sa["start_time"]) if sa["start_time"] else None, - "tool_use_id": "", - "tokens": sa["tokens"], - "estimated_cost_usd": sa["cost"], - "subagent_file": sa["file"], - "match_confidence": "unmatched", - "meta": sa["meta"], - }) + print(json.dumps(result, indent=2, default=str)) + return 0 if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python extract.py [--subagents-dir ] [--summary] [--metadata-only]") - sys.exit(1) - - jsonl_path = sys.argv[1] - subagents_dir = None - summary_mode = "--summary" in sys.argv - metadata_only = "--metadata-only" in sys.argv - - if metadata_only: - result = extract_metadata_lite(jsonl_path) - print(json.dumps(result, indent=2, default=str)) - sys.exit(0) - - if "--subagents-dir" in sys.argv: - idx = sys.argv.index("--subagents-dir") - if idx + 1 < len(sys.argv): - subagents_dir = sys.argv[idx + 1] - else: - # Auto-detect: look for sibling directory with same name as the JSONL - stem = Path(jsonl_path).stem - candidate = Path(jsonl_path).parent / stem / "subagents" - if candidate.is_dir(): - subagents_dir = str(candidate) - - result = extract_all_streaming(jsonl_path, subagents_dir, summary_mode) - print(json.dumps(result, indent=2, default=str)) + sys.exit(main()) diff --git a/scripts/providers/__init__.py b/scripts/providers/__init__.py new file mode 100644 index 0000000..fc0dd0b --- /dev/null +++ b/scripts/providers/__init__.py @@ -0,0 +1,2 @@ +"""Provider adapters for supported session transcript formats.""" + diff --git a/scripts/providers/claude.py b/scripts/providers/claude.py new file mode 100644 index 0000000..9d32f89 --- /dev/null +++ b/scripts/providers/claude.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +"""Claude Code transcript extraction.""" + +from __future__ import annotations + +import glob +import json +import os +import sys +from collections import Counter, defaultdict +from pathlib import Path + +from providers.common import ( + classify_tool_name, + encoded_size, + extract_json_field, + iso_duration_seconds, + parse_ts, + read_head_tail, + stream_jsonl, +) + +PRICING_LAST_VERIFIED = "2026-04-06" +PRICE_PER_M = { + "input": 15.0, + "output": 75.0, + "cache_create": 18.75, + "cache_read": 1.50, +} + + +def is_match(head_text): + return ( + '"sessionId"' in head_text + or '"type":"session_start"' in head_text + or '"type": "session_start"' in head_text + or '"message":{"role"' in head_text + or '"message": {"role"' in head_text + ) + + +def discover_current_session(cwd): + sessions_dir = Path.home() / ".claude" / "sessions" + project_dir = Path.home() / ".claude" / "projects" / cwd.replace("/", "-") + + session_files = sorted(sessions_dir.glob("*.json"), key=os.path.getmtime, reverse=True) + for session_file in session_files[:10]: + try: + data = json.loads(session_file.read_text()) + except (OSError, json.JSONDecodeError): + continue + + if data.get("cwd") != cwd: + continue + + session_id = data.get("sessionId") + if not session_id: + continue + + transcript_path = project_dir / f"{session_id}.jsonl" + if transcript_path.is_file(): + return str(transcript_path) + + if project_dir.is_dir(): + candidates = sorted(project_dir.glob("*.jsonl"), key=os.path.getmtime, reverse=True) + if candidates: + return str(candidates[0]) + + return None + + +def extract_metadata_lite(path): + head, tail, size = read_head_tail(path) + + session_id = extract_json_field(head, "sessionId") + cwd = extract_json_field(head, "cwd") + git_branch = extract_json_field(head, "gitBranch") + version = extract_json_field(head, "version") + start_time = extract_json_field(head, "timestamp") + + end_time = extract_json_field(tail, "timestamp") + for line in reversed(tail.split("\n")): + ts = extract_json_field(line, "timestamp") + if ts: + end_time = ts + break + + first_prompt = None + for line in head.split("\n"): + if '"role":"user"' not in line and '"role": "user"' not in line: + continue + if '"tool_result"' in line: + continue + text = extract_json_field(line, "text") + if text and not text.startswith(""): + first_prompt = text[:200] + break + + return { + "session_id": session_id, + "cwd": cwd, + "git_branch": git_branch, + "version": version, + "start_time": start_time, + "end_time": end_time, + "duration_seconds": iso_duration_seconds(start_time, end_time), + "file_size_bytes": size, + "first_prompt": first_prompt, + } + + +def extract_all_streaming(jsonl_path, subagents_dir=None, summary_mode=False): + session = { + "session_id": None, + "cwd": None, + "git_branch": None, + "version": None, + "start_time": None, + "end_time": None, + "duration_seconds": None, + "branches_seen": set(), + } + tokens_total = { + "input_tokens": 0, + "output_tokens": 0, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + } + turn_count = 0 + tool_calls = [] + tool_counts = Counter() + total_tool_calls = 0 + tool_result_sizes = {} + arc = [] + branches = set() + commits = [] + prs = [] + files = defaultdict(set) + + for rec in stream_jsonl(jsonl_path): + if rec.get("sessionId") and not session["session_id"]: + session["session_id"] = rec["sessionId"] + if rec.get("cwd") and not session["cwd"]: + session["cwd"] = rec["cwd"] + if rec.get("gitBranch"): + if not session["git_branch"]: + session["git_branch"] = rec["gitBranch"] + session["branches_seen"].add(rec["gitBranch"]) + branches.add(rec["gitBranch"]) + if rec.get("version") and not session["version"]: + session["version"] = rec["version"] + + ts = rec.get("timestamp") + if ts: + if not session["start_time"]: + session["start_time"] = ts + session["end_time"] = ts + + msg = rec.get("message", {}) + role = msg.get("role") + content = msg.get("content", "") + usage = msg.get("usage", {}) + + if usage and role == "assistant": + tokens_total["input_tokens"] += usage.get("input_tokens", 0) + tokens_total["output_tokens"] += usage.get("output_tokens", 0) + tokens_total["cache_creation_input_tokens"] += usage.get("cache_creation_input_tokens", 0) + tokens_total["cache_read_input_tokens"] += usage.get("cache_read_input_tokens", 0) + turn_count += 1 + + if isinstance(content, list): + for block in content: + if not isinstance(block, dict): + continue + + block_type = block.get("type") + if block_type == "tool_use": + name = block.get("name", "unknown") + tool_input = block.get("input", {}) + tool_counts[name] += 1 + total_tool_calls += 1 + + call_summary = { + "name": name, + "category": classify_tool_name(name), + "timestamp": ts, + "tool_use_id": block.get("id", ""), + } + + if name == "Agent": + call_summary["agent_description"] = tool_input.get("description", "") + call_summary["agent_type"] = tool_input.get("subagent_type", "") + call_summary["agent_model"] = tool_input.get("model", "") + call_summary["agent_prompt_preview"] = tool_input.get("prompt", "")[:300] + call_summary["run_in_background"] = tool_input.get("run_in_background", False) + elif name == "Skill": + call_summary["skill_name"] = tool_input.get("skill", "") + call_summary["skill_args"] = tool_input.get("args", "") + elif name == "Bash": + call_summary["command"] = tool_input.get("command", "")[:300] + elif name in ("Read", "Write", "Edit"): + call_summary["file_path"] = tool_input.get("file_path", "") + elif name in ("Grep", "Glob"): + call_summary["pattern"] = tool_input.get("pattern", "") + elif name in ("TaskCreate", "TaskUpdate", "TaskList", "TaskOutput"): + call_summary["task_detail"] = { + key: value + for key, value in tool_input.items() + if key in ("description", "status", "id") + } + elif name == "AskUserQuestion": + questions = tool_input.get("questions", []) + call_summary["questions"] = [q.get("question", "") for q in questions] + elif name.startswith("mcp__"): + call_summary["mcp_inputs_preview"] = json.dumps(tool_input)[:300] + + tool_calls.append(call_summary) + + file_path = tool_input.get("file_path", "") + if file_path: + if name == "Read": + files["read"].add(file_path) + elif name == "Write": + files["written"].add(file_path) + elif name == "Edit": + files["edited"].add(file_path) + + if name == "Bash": + command = tool_input.get("command", "") + if "git commit" in command: + commits.append({"command": command[:200], "timestamp": ts}) + if "gh pr" in command: + prs.append({"command": command[:200], "timestamp": ts}) + + elif block_type == "tool_result": + tool_use_id = block.get("tool_use_id", "") + if tool_use_id: + tool_result_sizes[tool_use_id] = encoded_size(block.get("content", "")) + + elif block_type == "text": + text = block.get("text", "").strip() + if role == "assistant" and text and len(text) > 20: + arc.append({"role": "assistant", "text": text[:1000], "timestamp": ts}) + + if role == "user": + user_text = "" + for block in content: + if isinstance(block, dict) and block.get("type") == "text": + user_text += block.get("text", "") + elif isinstance(block, str): + user_text += block + user_text = user_text.strip() + if user_text and not user_text.startswith(""): + arc.append({"role": "user", "text": user_text[:2000], "timestamp": ts}) + + elif role == "user": + text = content.strip() if isinstance(content, str) else "" + if text and not text.startswith(""): + arc.append({"role": "user", "text": text[:2000], "timestamp": ts}) + + session["duration_seconds"] = iso_duration_seconds(session["start_time"], session["end_time"]) + session["branches_seen"] = sorted(session["branches_seen"]) + + cost = ( + tokens_total["input_tokens"] / 1_000_000 * PRICE_PER_M["input"] + + tokens_total["output_tokens"] / 1_000_000 * PRICE_PER_M["output"] + + tokens_total["cache_creation_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_create"] + + tokens_total["cache_read_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_read"] + ) + + for call in tool_calls: + tool_use_id = call.get("tool_use_id", "") + if tool_use_id in tool_result_sizes: + call["result_size_bytes"] = tool_result_sizes[tool_use_id] + + result_size_stats = {} + if tool_result_sizes: + sizes_by_tool = defaultdict(list) + for call in tool_calls: + if "result_size_bytes" in call: + sizes_by_tool[call["name"]].append(call["result_size_bytes"]) + + for tool_name, sizes in sorted(sizes_by_tool.items(), key=lambda item: -sum(item[1])): + result_size_stats[tool_name] = { + "count": len(sizes), + "total_bytes": sum(sizes), + "avg_bytes": round(sum(sizes) / len(sizes)), + "max_bytes": max(sizes), + } + + agents = _extract_agents(tool_calls, subagents_dir) + skills = [ + { + "name": call.get("skill_name", ""), + "args": call.get("skill_args", ""), + "timestamp": call.get("timestamp"), + } + for call in tool_calls + if call["name"] == "Skill" + ] + + agents_without_cost = [ + agent + for agent in agents + if agent.get("estimated_cost_usd") is None + and agent.get("description") + and not agent.get("description", "").startswith("[unmatched") + ] + if agents_without_cost: + print( + f"Warning: {len(agents_without_cost)} agent dispatch(es) have no subagent cost data. " + "Pass --subagents-dir to attribute subagent costs.", + file=sys.stderr, + ) + + result = { + "session": session, + "tokens": { + "total": tokens_total, + "turn_count": turn_count, + "estimated_cost_usd": round(cost, 4), + }, + "agents": agents, + "skills": skills, + "git": { + "branches": sorted(branches), + "commits": commits, + "pr_operations": prs, + }, + "files": {key: sorted(value) for key, value in files.items()}, + "conversation_arc": arc, + "tool_result_sizes": result_size_stats, + } + + if summary_mode: + result["tools"] = { + "counts": dict(tool_counts.most_common()), + "total_calls": total_tool_calls, + } + else: + result["tools"] = { + "calls": tool_calls, + "counts": dict(tool_counts.most_common()), + "total_calls": total_tool_calls, + } + + return result + + +def _extract_agents(tool_calls, subagents_dir=None): + agents = [] + for call in tool_calls: + if call["name"] != "Agent": + continue + agent = { + "description": call.get("agent_description", ""), + "type": call.get("agent_type", "") or "general-purpose", + "model": call.get("agent_model", "") or "inherited", + "prompt_preview": call.get("agent_prompt_preview", ""), + "background": call.get("run_in_background", False), + "timestamp": call.get("timestamp"), + "tool_use_id": call.get("tool_use_id", ""), + "tokens": None, + "estimated_cost_usd": None, + } + if "result_size_bytes" in call: + agent["result_size_bytes"] = call["result_size_bytes"] + agents.append(agent) + + if subagents_dir and os.path.isdir(subagents_dir): + _match_subagent_files(agents, subagents_dir) + + return agents + + +def _match_subagent_files(agents, subagents_dir): + subagent_files = sorted(glob.glob(os.path.join(subagents_dir, "*.jsonl"))) + max_match_window_s = 60 + subagent_info = [] + + for subagent_file in subagent_files: + subagent_tokens = { + "input_tokens": 0, + "output_tokens": 0, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + } + subagent_start = None + + for rec in stream_jsonl(subagent_file): + msg = rec.get("message", {}) + usage = msg.get("usage", {}) + if usage and msg.get("role") == "assistant": + subagent_tokens["input_tokens"] += usage.get("input_tokens", 0) + subagent_tokens["output_tokens"] += usage.get("output_tokens", 0) + subagent_tokens["cache_creation_input_tokens"] += usage.get("cache_creation_input_tokens", 0) + subagent_tokens["cache_read_input_tokens"] += usage.get("cache_read_input_tokens", 0) + if subagent_start is None and "timestamp" in rec: + subagent_start = parse_ts(rec["timestamp"]) + + cost = ( + subagent_tokens["input_tokens"] / 1_000_000 * PRICE_PER_M["input"] + + subagent_tokens["output_tokens"] / 1_000_000 * PRICE_PER_M["output"] + + subagent_tokens["cache_creation_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_create"] + + subagent_tokens["cache_read_input_tokens"] / 1_000_000 * PRICE_PER_M["cache_read"] + ) + + meta = None + meta_file = subagent_file.replace(".jsonl", ".meta.json") + if os.path.exists(meta_file): + with open(meta_file) as handle: + meta = json.load(handle) + + subagent_info.append( + { + "file": os.path.basename(subagent_file), + "tokens": subagent_tokens, + "cost": round(cost, 4), + "start_time": subagent_start, + "meta": meta, + } + ) + + matched_dispatches = set() + matched_subagents = set() + for subagent_index, subagent in enumerate(subagent_info): + if not subagent["start_time"]: + continue + best_match = None + best_delta = None + + for agent_index, agent in enumerate(agents): + if agent_index in matched_dispatches: + continue + dispatch_time = parse_ts(agent["timestamp"]) + if not dispatch_time: + continue + delta = abs((subagent["start_time"] - dispatch_time).total_seconds()) + if delta <= max_match_window_s and (best_delta is None or delta < best_delta): + best_match = agent_index + best_delta = delta + + if best_match is not None: + agents[best_match]["tokens"] = subagent["tokens"] + agents[best_match]["estimated_cost_usd"] = subagent["cost"] + agents[best_match]["subagent_file"] = subagent["file"] + agents[best_match]["match_delta_s"] = round(best_delta, 1) + if subagent["meta"]: + agents[best_match]["meta"] = subagent["meta"] + matched_dispatches.add(best_match) + matched_subagents.add(subagent_index) + + for subagent_index, subagent in enumerate(subagent_info): + if subagent_index in matched_subagents: + continue + agents.append( + { + "description": f"[unmatched subagent: {subagent['file']}]", + "type": "unknown", + "model": "unknown", + "prompt_preview": "", + "background": False, + "timestamp": str(subagent["start_time"]) if subagent["start_time"] else None, + "tool_use_id": "", + "tokens": subagent["tokens"], + "estimated_cost_usd": subagent["cost"], + "subagent_file": subagent["file"], + "match_confidence": "unmatched", + "meta": subagent["meta"], + } + ) diff --git a/scripts/providers/codex.py b/scripts/providers/codex.py new file mode 100644 index 0000000..2125b50 --- /dev/null +++ b/scripts/providers/codex.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +"""Codex transcript extraction.""" + +from __future__ import annotations + +import json +import os +from collections import Counter, defaultdict +from pathlib import Path + +from providers.common import ( + classify_tool_name, + encoded_size, + extract_patch_file_changes, + iso_duration_seconds, + load_tool_arguments, + read_head_tail, + stream_jsonl, +) + + +def is_match(head_text): + return '"type":"session_meta"' in head_text or '"type": "session_meta"' in head_text + + +def discover_current_session(cwd): + sessions_root = Path.home() / ".codex" / "sessions" + if not sessions_root.is_dir(): + return None + + candidates = sorted(sessions_root.rglob("rollout-*.jsonl"), key=os.path.getmtime, reverse=True) + for candidate in candidates[:20]: + metadata = extract_metadata_lite(candidate) + if metadata.get("cwd") == cwd: + return str(candidate) + return None + + +def extract_metadata_lite(path): + head, tail, size = read_head_tail(path) + session_id = None + cwd = None + version = None + start_time = None + first_prompt = None + + for line in head.splitlines(): + try: + rec = json.loads(line) + except json.JSONDecodeError: + continue + + if rec.get("type") == "session_meta": + payload = rec.get("payload", {}) + session_id = payload.get("id") + cwd = payload.get("cwd") + version = payload.get("cli_version") + start_time = payload.get("timestamp") or rec.get("timestamp") + elif rec.get("type") == "event_msg": + payload = rec.get("payload", {}) + if payload.get("type") == "user_message" and payload.get("message"): + first_prompt = payload["message"][:200] + break + + end_time = None + for line in reversed(tail.splitlines()): + try: + rec = json.loads(line) + except json.JSONDecodeError: + continue + end_time = rec.get("timestamp") or end_time + if end_time: + break + + return { + "session_id": session_id, + "cwd": cwd, + "git_branch": None, + "version": version, + "start_time": start_time, + "end_time": end_time, + "duration_seconds": iso_duration_seconds(start_time, end_time), + "file_size_bytes": size, + "first_prompt": first_prompt, + } + + +def extract_all_streaming(jsonl_path, subagents_dir=None, summary_mode=False): + del subagents_dir + + session = { + "session_id": None, + "cwd": None, + "git_branch": None, + "version": None, + "start_time": None, + "end_time": None, + "duration_seconds": None, + "branches_seen": [], + } + tool_calls = [] + call_index = {} + tool_counts = Counter() + total_tool_calls = 0 + arc = [] + branches = set() + commits = [] + prs = [] + files = defaultdict(set) + latest_total_usage = {} + + for rec in stream_jsonl(jsonl_path): + ts = rec.get("timestamp") + rec_type = rec.get("type") + + if rec_type == "session_meta": + payload = rec.get("payload", {}) + session["session_id"] = session["session_id"] or payload.get("id") + session["cwd"] = session["cwd"] or payload.get("cwd") + session["version"] = session["version"] or payload.get("cli_version") + start_time = payload.get("timestamp") or ts + if start_time and not session["start_time"]: + session["start_time"] = start_time + if ts: + session["end_time"] = ts + continue + + if ts: + if not session["start_time"]: + session["start_time"] = ts + session["end_time"] = ts + + if rec_type == "event_msg": + payload = rec.get("payload", {}) + event_type = payload.get("type") + + if event_type == "user_message": + text = (payload.get("message") or "").strip() + if text: + arc.append({"role": "user", "text": text[:2000], "timestamp": ts}) + elif event_type == "agent_message": + text = (payload.get("message") or "").strip() + if text and len(text) > 20: + arc.append({"role": "assistant", "text": text[:1000], "timestamp": ts}) + elif event_type == "token_count": + info = payload.get("info") or {} + latest_total_usage = info.get("total_token_usage") or latest_total_usage + elif event_type == "exec_command_end": + call_id = payload.get("call_id", "") + call = call_index.get(call_id) + if not call: + continue + output = payload.get("aggregated_output", "") + call["result_size_bytes"] = encoded_size(output) + call["exit_code"] = payload.get("exit_code") + + elif rec_type == "response_item": + payload = rec.get("payload", {}) + payload_type = payload.get("type") + + if payload_type == "function_call": + name = payload.get("name", "unknown") + tool_input, raw_arguments = load_tool_arguments(payload.get("arguments", "")) + tool_counts[name] += 1 + total_tool_calls += 1 + + call_summary = { + "name": name, + "category": classify_tool_name(name), + "timestamp": ts, + "tool_use_id": payload.get("call_id", ""), + } + + if name == "exec_command": + command = tool_input.get("cmd", "") + call_summary["command"] = command[:300] + if tool_input.get("workdir"): + call_summary["workdir"] = tool_input["workdir"] + if "git commit" in command: + commits.append({"command": command[:200], "timestamp": ts}) + if "gh pr" in command: + prs.append({"command": command[:200], "timestamp": ts}) + elif name == "spawn_agent": + description = tool_input.get("message", "") + call_summary["agent_description"] = description[:200] + call_summary["agent_type"] = tool_input.get("agent_type", "") + call_summary["agent_model"] = tool_input.get("model", "") + call_summary["agent_prompt_preview"] = description[:300] + call_summary["run_in_background"] = False + elif name == "request_user_input": + questions = tool_input.get("questions", []) + call_summary["questions"] = [q.get("question", "") for q in questions] + elif name == "apply_patch": + patch_text = raw_arguments + if isinstance(tool_input, dict) and "_raw" in tool_input: + patch_text = tool_input["_raw"] + file_changes = extract_patch_file_changes(patch_text) + file_paths = sorted(file_changes["written"] | file_changes["edited"] | file_changes["deleted"]) + if file_paths: + call_summary["file_paths"] = file_paths + call_summary["file_path"] = file_paths[0] + files["written"].update(file_changes["written"]) + files["edited"].update(file_changes["edited"]) + elif name.startswith("mcp__"): + call_summary["mcp_inputs_preview"] = json.dumps(tool_input, default=str)[:300] + if name.endswith("github_create_pull_request"): + prs.append({"command": name, "timestamp": ts}) + + if name == "exec_command" and tool_input.get("cmd"): + command = tool_input["cmd"] + if "git checkout -b " in command or "git switch -c " in command: + branches.add(command[:200]) + + tool_calls.append(call_summary) + call_index[call_summary["tool_use_id"]] = call_summary + + elif payload_type == "function_call_output": + call_id = payload.get("call_id", "") + call = call_index.get(call_id) + if call and "result_size_bytes" not in call: + call["result_size_bytes"] = encoded_size(payload.get("output", "")) + + session["duration_seconds"] = iso_duration_seconds(session["start_time"], session["end_time"]) + + tokens_total = { + "input_tokens": latest_total_usage.get("input_tokens", 0), + "output_tokens": latest_total_usage.get("output_tokens", 0), + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": latest_total_usage.get("cached_input_tokens", 0), + "reasoning_output_tokens": latest_total_usage.get("reasoning_output_tokens", 0), + "total_tokens": latest_total_usage.get("total_tokens", 0), + } + turn_count = sum(1 for message in arc if message["role"] == "assistant") + + result_size_stats = {} + sizes_by_tool = defaultdict(list) + for call in tool_calls: + if "result_size_bytes" in call: + sizes_by_tool[call["name"]].append(call["result_size_bytes"]) + + for tool_name, sizes in sorted(sizes_by_tool.items(), key=lambda item: -sum(item[1])): + result_size_stats[tool_name] = { + "count": len(sizes), + "total_bytes": sum(sizes), + "avg_bytes": round(sum(sizes) / len(sizes)), + "max_bytes": max(sizes), + } + + agents = [] + for call in tool_calls: + if call["name"] != "spawn_agent": + continue + agent = { + "description": call.get("agent_description", ""), + "type": call.get("agent_type", "") or "default", + "model": call.get("agent_model", "") or "inherited", + "prompt_preview": call.get("agent_prompt_preview", ""), + "background": call.get("run_in_background", False), + "timestamp": call.get("timestamp"), + "tool_use_id": call.get("tool_use_id", ""), + "tokens": None, + "estimated_cost_usd": None, + } + if "result_size_bytes" in call: + agent["result_size_bytes"] = call["result_size_bytes"] + agents.append(agent) + + result = { + "session": session, + "tokens": { + "total": tokens_total, + "turn_count": turn_count, + "estimated_cost_usd": None, + }, + "agents": agents, + "skills": [], + "git": { + "branches": sorted(branches), + "commits": commits, + "pr_operations": prs, + }, + "files": {key: sorted(value) for key, value in files.items()}, + "conversation_arc": arc, + "tool_result_sizes": result_size_stats, + } + + if summary_mode: + result["tools"] = { + "counts": dict(tool_counts.most_common()), + "total_calls": total_tool_calls, + } + else: + result["tools"] = { + "calls": tool_calls, + "counts": dict(tool_counts.most_common()), + "total_calls": total_tool_calls, + } + + return result diff --git a/scripts/providers/common.py b/scripts/providers/common.py new file mode 100644 index 0000000..15f5fc5 --- /dev/null +++ b/scripts/providers/common.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +"""Shared helpers for provider-specific transcript extraction.""" + +from __future__ import annotations + +import json +import os +import re +from datetime import datetime + +SCHEMA_VERSION = "0.2.0" +LITE_READ_BUF_SIZE = 65536 + +PATCH_FILE_RE = re.compile(r"^\*\*\* (Add|Delete|Update) File: (.+)$") +PATCH_MOVE_RE = re.compile(r"^\*\*\* Move to: (.+)$") + + +def stream_jsonl(path): + """Yield parsed JSONL records one at a time without loading the file.""" + with open(path) as handle: + for line in handle: + line = line.strip() + if not line: + continue + try: + yield json.loads(line) + except json.JSONDecodeError: + continue + + +def read_head_tail(path): + """Read the first and last 64KB of a file.""" + size = os.path.getsize(path) + with open(path, "rb") as handle: + head_bytes = handle.read(LITE_READ_BUF_SIZE) + head = head_bytes.decode("utf-8", errors="replace") + + if size <= LITE_READ_BUF_SIZE: + return head, head, size + + handle.seek(max(0, size - LITE_READ_BUF_SIZE)) + tail_bytes = handle.read(LITE_READ_BUF_SIZE) + tail = tail_bytes.decode("utf-8", errors="replace") + + return head, tail, size + + +def extract_json_field(text, key): + """Extract a JSON string field value without full parsing.""" + for pattern in [f'"{key}":"', f'"{key}": "']: + idx = text.find(pattern) + if idx < 0: + continue + start = idx + len(pattern) + i = start + while i < len(text): + if text[i] == "\\": + i += 2 + continue + if text[i] == '"': + return text[start:i] + i += 1 + return None + + +def parse_ts(ts_str): + """Parse ISO 8601 timestamps used by transcript files.""" + if not ts_str: + return None + try: + return datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + except (TypeError, ValueError): + return None + + +def encoded_size(value): + """Estimate the UTF-8 byte length of a structured value.""" + if isinstance(value, bytes): + return len(value) + if isinstance(value, str): + return len(value.encode("utf-8", errors="replace")) + return len(json.dumps(value, default=str).encode("utf-8")) + + +def load_tool_arguments(arguments): + """Parse a tool-call argument payload while preserving raw text.""" + if isinstance(arguments, dict): + return arguments, json.dumps(arguments) + if not isinstance(arguments, str): + return {}, "" + + stripped = arguments.strip() + if not stripped: + return {}, arguments + + if stripped.startswith("{") or stripped.startswith("["): + try: + return json.loads(stripped), arguments + except json.JSONDecodeError: + pass + + return {"_raw": arguments}, arguments + + +def extract_patch_file_changes(patch_text): + """Parse apply_patch file operations into written and edited file sets.""" + changes = { + "written": set(), + "edited": set(), + "deleted": set(), + } + pending_update_path = None + + for raw_line in patch_text.splitlines(): + match = PATCH_FILE_RE.match(raw_line) + if match: + op, file_path = match.groups() + pending_update_path = None + if op == "Add": + changes["written"].add(file_path) + elif op == "Update": + changes["edited"].add(file_path) + pending_update_path = file_path + elif op == "Delete": + changes["deleted"].add(file_path) + continue + + move_match = PATCH_MOVE_RE.match(raw_line) + if move_match and pending_update_path: + changes["edited"].discard(pending_update_path) + changes["written"].add(move_match.group(1)) + changes["deleted"].add(pending_update_path) + pending_update_path = move_match.group(1) + + return changes + + +def classify_tool_name(name): + """Map provider-specific tool names into coarse shared categories.""" + if name in {"Read", "Grep", "Glob"}: + return "read" + if name in {"Write", "Edit", "apply_patch"}: + return "write" + if name in {"Bash", "exec_command"}: + return "exec" + if name in {"Agent", "spawn_agent"}: + return "agent" + if name in {"Skill", "request_user_input"}: + return "skill" + if name.startswith("mcp__"): + return "mcp" + return "other" + + +def iso_duration_seconds(start_time, end_time): + start = parse_ts(start_time) + end = parse_ts(end_time) + if not start or not end: + return None + return round((end - start).total_seconds()) diff --git a/tests/fixtures/codex_session.jsonl b/tests/fixtures/codex_session.jsonl new file mode 100644 index 0000000..02d326a --- /dev/null +++ b/tests/fixtures/codex_session.jsonl @@ -0,0 +1,14 @@ +{"timestamp":"2026-04-13T18:30:56.823Z","type":"session_meta","payload":{"id":"019d881b-b082-7d91-9c06-25bcae39c50a","timestamp":"2026-04-13T18:30:09.293Z","cwd":"/home/test/dev/myproject","originator":"codex-tui","cli_version":"0.120.0","source":"cli","model_provider":"openai"}} +{"timestamp":"2026-04-13T18:30:56.824Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d881c-6a23-73b1-a4bb-e524042b92b6","started_at":1776105056,"model_context_window":258400,"collaboration_mode_kind":"default"}} +{"timestamp":"2026-04-13T18:30:57.000Z","type":"event_msg","payload":{"type":"user_message","message":"please review the repo and add a brief plan","images":[],"local_images":[],"text_elements":[]}} +{"timestamp":"2026-04-13T18:30:58.000Z","type":"event_msg","payload":{"type":"agent_message","message":"I’m inspecting the repository structure and checking the current branch before I plan the change.","phase":"commentary","memory_citation":null}} +{"timestamp":"2026-04-13T18:31:00.000Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"git status --short --branch\",\"workdir\":\"/home/test/dev/myproject\",\"max_output_tokens\":1200}","call_id":"call_exec_1"}} +{"timestamp":"2026-04-13T18:31:00.200Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_exec_1","process_id":"12345","turn_id":"019d881c-6a23-73b1-a4bb-e524042b92b6","command":["/bin/bash","-lc","git status --short --branch"],"cwd":"/home/test/dev/myproject","aggregated_output":"## main\n","exit_code":0,"duration":{"secs":0,"nanos":12345},"formatted_output":"","status":"completed"}} +{"timestamp":"2026-04-13T18:31:00.210Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_exec_1","output":"Chunk ID: abc123\nOutput:\n## main\n"}} +{"timestamp":"2026-04-13T18:31:01.000Z","type":"response_item","payload":{"type":"function_call","name":"spawn_agent","arguments":"{\"agent_type\":\"explorer\",\"message\":\"Inspect the tests directory and summarize what exists.\",\"model\":\"gpt-5.4-mini\"}","call_id":"call_agent_1"}} +{"timestamp":"2026-04-13T18:31:01.250Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_agent_1","output":"Found four tests and one fixtures directory."}} +{"timestamp":"2026-04-13T18:31:02.000Z","type":"response_item","payload":{"type":"function_call","name":"apply_patch","arguments":"*** Begin Patch\n*** Add File: docs/retro.md\n+Retro notes\n*** Update File: src/app.py\n@@\n-old\n+new\n*** End Patch\n","call_id":"call_patch_1"}} +{"timestamp":"2026-04-13T18:31:02.150Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_patch_1","output":"Success. Updated the following files:\nA docs/retro.md\nM src/app.py\n"}} +{"timestamp":"2026-04-13T18:31:03.000Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":2400,"cached_input_tokens":1800,"output_tokens":320,"reasoning_output_tokens":40,"total_tokens":4560},"last_token_usage":{"input_tokens":300,"cached_input_tokens":500,"output_tokens":80,"reasoning_output_tokens":10,"total_tokens":880},"model_context_window":258400},"rate_limits":{"limit_id":"codex","plan_type":"pro"}}} +{"timestamp":"2026-04-13T18:31:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"I found the current repo state and prepared a short plan with the relevant files.","phase":"final_answer","memory_citation":null}} +{"timestamp":"2026-04-13T18:31:05.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d881c-6a23-73b1-a4bb-e524042b92b6","last_agent_message":"I found the current repo state and prepared a short plan with the relevant files.","completed_at":1776105065,"duration_ms":8000}} diff --git a/tests/test_codex_extract.py b/tests/test_codex_extract.py new file mode 100644 index 0000000..8fa4143 --- /dev/null +++ b/tests/test_codex_extract.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +"""Tests for the Codex transcript adapter.""" + +import os +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT / "scripts")) + +import extract # noqa: E402 + + +FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" + + +class TestCodexMetadata(unittest.TestCase): + def setUp(self): + self.fixture = str(FIXTURES_DIR / "codex_session.jsonl") + self.meta = extract.extract_metadata_lite(self.fixture) + + def test_provider_detected(self): + self.assertEqual(self.meta["provider"], "codex") + + def test_session_id_extracted(self): + self.assertEqual(self.meta["session_id"], "019d881b-b082-7d91-9c06-25bcae39c50a") + + def test_first_prompt_extracted(self): + self.assertEqual(self.meta["first_prompt"], "please review the repo and add a brief plan") + + +class TestCodexExtraction(unittest.TestCase): + def setUp(self): + self.fixture = str(FIXTURES_DIR / "codex_session.jsonl") + self.result = extract.extract_all_streaming(self.fixture) + + def test_provider_set(self): + self.assertEqual(self.result["provider"], "codex") + + def test_tool_counts(self): + counts = self.result["tools"]["counts"] + self.assertEqual(counts.get("exec_command", 0), 1) + self.assertEqual(counts.get("spawn_agent", 0), 1) + self.assertEqual(counts.get("apply_patch", 0), 1) + + def test_files_tracked_from_apply_patch(self): + self.assertEqual(self.result["files"]["written"], ["docs/retro.md"]) + self.assertEqual(self.result["files"]["edited"], ["src/app.py"]) + + def test_conversation_arc_preserved(self): + roles = [message["role"] for message in self.result["conversation_arc"]] + self.assertIn("user", roles) + self.assertIn("assistant", roles) + + def test_token_mapping(self): + total = self.result["tokens"]["total"] + self.assertEqual(total["input_tokens"], 2400) + self.assertEqual(total["cache_read_input_tokens"], 1800) + self.assertEqual(total["output_tokens"], 320) + self.assertEqual(total["reasoning_output_tokens"], 40) + self.assertIsNone(self.result["tokens"]["estimated_cost_usd"]) + + def test_exec_command_result_sizes_prefer_raw_output(self): + exec_stats = self.result["tool_result_sizes"]["exec_command"] + self.assertEqual(exec_stats["total_bytes"], len("## main\n".encode("utf-8"))) + + def test_spawn_agent_recorded(self): + self.assertEqual(len(self.result["agents"]), 1) + self.assertEqual(self.result["agents"][0]["type"], "explorer") + + +class TestDiscovery(unittest.TestCase): + def test_codex_current_session_discovery(self): + fixture = FIXTURES_DIR / "codex_session.jsonl" + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) + target_dir = home / ".codex" / "sessions" / "2026" / "04" / "13" + target_dir.mkdir(parents=True) + target_file = target_dir / "rollout-2026-04-13T18-30-09-test.jsonl" + target_file.write_text(fixture.read_text()) + + with patch.dict(os.environ, {"HOME": str(home)}): + provider, transcript_path = extract.discover_current_session( + provider="auto", + cwd="/home/test/dev/myproject", + ) + + self.assertEqual(provider, "codex") + self.assertEqual(transcript_path, str(target_file)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_discovery.py b/tests/test_discovery.py new file mode 100644 index 0000000..1cfe981 --- /dev/null +++ b/tests/test_discovery.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +"""Cross-provider discovery tests.""" + +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT / "scripts")) + +import extract # noqa: E402 + + +class TestDiscoveryPriority(unittest.TestCase): + def test_auto_prefers_newest_matching_provider(self): + cwd = "/tmp/project" + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) + + claude_sessions = home / ".claude" / "sessions" + claude_projects = home / ".claude" / "projects" / cwd.replace("/", "-") + claude_sessions.mkdir(parents=True) + claude_projects.mkdir(parents=True) + (claude_sessions / "100.json").write_text(json.dumps({"pid": 100, "sessionId": "claude-session", "cwd": cwd})) + claude_transcript = claude_projects / "claude-session.jsonl" + claude_transcript.write_text( + '{"sessionId":"claude-session","cwd":"/tmp/project","timestamp":"2026-04-13T08:00:00Z","type":"session_start"}\n' + ) + + codex_dir = home / ".codex" / "sessions" / "2026" / "04" / "13" + codex_dir.mkdir(parents=True) + codex_transcript = codex_dir / "rollout-2026-04-13T08-00-01-codex.jsonl" + codex_transcript.write_text( + '{"timestamp":"2026-04-13T08:00:01Z","type":"session_meta","payload":{"id":"codex-session","timestamp":"2026-04-13T08:00:01Z","cwd":"/tmp/project","cli_version":"0.120.0"}}\n' + ) + + os.utime(claude_transcript, (1, 1)) + os.utime(codex_transcript, (2, 2)) + + with patch.dict(os.environ, {"HOME": str(home)}): + provider, transcript_path = extract.discover_current_session(provider="auto", cwd=cwd) + + self.assertEqual(provider, "codex") + self.assertEqual(transcript_path, str(codex_transcript)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 4dd06821c8882717e3a52d8bf75e6d3cd4a21a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Ferrari?= Date: Tue, 14 Apr 2026 01:30:34 +0200 Subject: [PATCH 2/2] test: add portable Claude discovery regression checks --- tests/test_discovery.py | 79 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 1cfe981..ca88f14 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -48,5 +48,84 @@ def test_auto_prefers_newest_matching_provider(self): self.assertEqual(transcript_path, str(codex_transcript)) +class TestClaudeDiscovery(unittest.TestCase): + def test_claude_uses_session_index_for_matching_cwd(self): + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) + target_cwd = str(home / "workspaces" / "target") + other_cwd = str(home / "workspaces" / "other") + + sessions_dir = home / ".claude" / "sessions" + project_dir = home / ".claude" / "projects" / target_cwd.replace("/", "-") + other_project_dir = home / ".claude" / "projects" / other_cwd.replace("/", "-") + sessions_dir.mkdir(parents=True) + project_dir.mkdir(parents=True) + other_project_dir.mkdir(parents=True) + + (sessions_dir / "200.json").write_text( + json.dumps({"pid": 200, "sessionId": "wrong-cwd", "cwd": other_cwd}) + ) + (sessions_dir / "201.json").write_text( + json.dumps({"pid": 201, "sessionId": "matching-cwd", "cwd": target_cwd}) + ) + + unrelated_transcript = other_project_dir / "wrong-cwd.jsonl" + unrelated_transcript.write_text( + '{"sessionId":"wrong-cwd","cwd":"' + + other_cwd + + '","timestamp":"2026-04-13T08:00:00Z","type":"session_start"}\n' + ) + matching_transcript = project_dir / "matching-cwd.jsonl" + matching_transcript.write_text( + '{"sessionId":"matching-cwd","cwd":"' + + target_cwd + + '","timestamp":"2026-04-13T08:00:01Z","type":"session_start"}\n' + ) + + os.utime(unrelated_transcript, (2, 2)) + os.utime(matching_transcript, (1, 1)) + + with patch.dict(os.environ, {"HOME": str(home)}): + provider, transcript_path = extract.discover_current_session( + provider="claude", + cwd=target_cwd, + ) + + self.assertEqual(provider, "claude") + self.assertEqual(transcript_path, str(matching_transcript)) + + def test_claude_falls_back_to_latest_project_transcript(self): + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) + target_cwd = str(home / "scratch" / "portable-target") + project_dir = home / ".claude" / "projects" / target_cwd.replace("/", "-") + project_dir.mkdir(parents=True) + + older = project_dir / "older.jsonl" + newer = project_dir / "newer.jsonl" + older.write_text( + '{"sessionId":"older","cwd":"' + + target_cwd + + '","timestamp":"2026-04-13T08:00:00Z","type":"session_start"}\n' + ) + newer.write_text( + '{"sessionId":"newer","cwd":"' + + target_cwd + + '","timestamp":"2026-04-13T08:00:01Z","type":"session_start"}\n' + ) + + os.utime(older, (1, 1)) + os.utime(newer, (2, 2)) + + with patch.dict(os.environ, {"HOME": str(home)}): + provider, transcript_path = extract.discover_current_session( + provider="claude", + cwd=target_cwd, + ) + + self.assertEqual(provider, "claude") + self.assertEqual(transcript_path, str(newer)) + + if __name__ == "__main__": unittest.main(verbosity=2)