Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions internal/attractor/agents/tmux_env_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Unit tests for the tmux agent session env-building helper.
package agents

import (
"testing"

"github.com/danshapiro/kilroy/internal/attractor/agents/templates"
"github.com/danshapiro/kilroy/internal/attractor/engine"
)

func TestBuildTmuxAgentEnv_IncludesStageStatusContractAndRuntime(t *testing.T) {
tmpl := &templates.Template{
BuildEnv: func() map[string]string {
return map[string]string{"TOOL_DEFAULT": "present"}
},
}
workDir := t.TempDir()
logsRoot := t.TempDir()
execCtx := &engine.Execution{
WorktreeDir: workDir,
LogsRoot: logsRoot,
Engine: &engine.Engine{
Options: engine.RunOptions{RunID: "run-001"},
},
}

env := buildTmuxAgentEnv(tmpl, execCtx, "node-alpha")

cases := map[string]string{
"TOOL_DEFAULT": "present",
"KILROY_RUN_ID": "run-001",
"KILROY_NODE_ID": "node-alpha",
"KILROY_WORKTREE_DIR": workDir,
"KILROY_LOGS_ROOT": logsRoot,
}
for k, want := range cases {
if got := env[k]; got != want {
t.Errorf("%s = %q, want %q", k, got, want)
}
}
if env["KILROY_STAGE_STATUS_PATH"] == "" {
t.Error("KILROY_STAGE_STATUS_PATH missing — agents cannot find where to write status.json")
}
if env["KILROY_STAGE_STATUS_FALLBACK_PATH"] == "" {
t.Error("KILROY_STAGE_STATUS_FALLBACK_PATH missing")
}
if env["KILROY_STAGE_LOGS_DIR"] == "" {
t.Error("KILROY_STAGE_LOGS_DIR missing")
}
if env["KILROY_DATA_DIR"] == "" {
t.Error("KILROY_DATA_DIR missing")
}
}

func TestBuildTmuxAgentEnv_NilTemplateStartsClean(t *testing.T) {
execCtx := &engine.Execution{
WorktreeDir: t.TempDir(),
LogsRoot: t.TempDir(),
Engine: &engine.Engine{Options: engine.RunOptions{RunID: "run-nil-tmpl"}},
}
env := buildTmuxAgentEnv(nil, execCtx, "node")
if env == nil {
t.Fatal("env must not be nil even when template is nil")
}
if env["KILROY_RUN_ID"] != "run-nil-tmpl" {
t.Errorf("KILROY_RUN_ID = %q, want run-nil-tmpl", env["KILROY_RUN_ID"])
}
}
40 changes: 26 additions & 14 deletions internal/attractor/agents/tmux_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,7 @@ func (h *TmuxAgentHandler) Execute(ctx context.Context, exec *engine.Execution,
sessionName := buildSessionName(runID, node.ID)

// Build environment variables.
env := tmpl.BuildEnv()
if env == nil {
env = map[string]string{}
}
if runID != "" {
env["KILROY_RUN_ID"] = runID
}
env["KILROY_NODE_ID"] = node.ID
// Add input env vars if available.
if exec != nil && exec.Engine != nil {
for k, v := range engine.InputEnvVars(exec.Engine.Options.Inputs) {
env[k] = v
}
}
env := buildTmuxAgentEnv(tmpl, exec, node.ID)

// Resolve model from node attributes.
modelID := strings.TrimSpace(node.Attr("llm_model", ""))
Expand Down Expand Up @@ -348,6 +335,31 @@ func resolveToolName(node *model.Node) string {
return "claude" // default
}

// buildTmuxAgentEnv constructs the environment variables passed to a tmux-run
// agent session. It consolidates the tool template's defaults with the engine
// runtime invariants (run/node IDs, worktree/logs paths, input env) and the
// stage status contract paths so the engine-injected status-contract preamble
// is actionable from inside the session. Without the status contract env vars,
// agents spend tool calls hunting for KILROY_STAGE_STATUS_PATH.
func buildTmuxAgentEnv(tmpl *templates.Template, exec *engine.Execution, nodeID string) map[string]string {
var env map[string]string
if tmpl != nil {
env = tmpl.BuildEnv()
}
if env == nil {
env = map[string]string{}
}
for k, v := range engine.BuildStageRuntimeEnv(exec, nodeID) {
env[k] = v
}
if exec != nil {
for k, v := range engine.BuildStageStatusContract(exec.WorktreeDir).EnvVars {
env[k] = v
}
}
return env
}

// buildSessionName creates a unique tmux session name for a node execution.
func buildSessionName(runID, nodeID string) string {
name := "kilroy"
Expand Down
47 changes: 47 additions & 0 deletions workflows/coding-loop/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# coding-loop

An iterative coding workflow that repeatedly chooses sub-tasks, implements them, reviews the results, and decides when the spec is complete.

## What it does

Runs up to 8 loops of:

1. **Task Chooser** — reads the spec + latest review, picks the highest-priority unimplemented sub-task, writes `.kilroy/task.md`.
2. **Implementer** — reads the task, writes code, commits.
3. **Reviewer** — diffs HEAD~1..HEAD against the spec, writes `.reviews/iter-NNN.md` and `.reviews/latest.md`, commits.
4. **Done Gate** — reads spec + latest review, writes `COMPLETE` or `CONTINUE` to `.kilroy/decision.md`.

When Done Gate writes `COMPLETE` (or `loop_max=8` is reached), the loop exits and a **Report** node writes `result.md`.

## How to launch

```bash
kilroy attractor run \
--package workflows/coding-loop/ \
--workspace /abs/path/to/target-repo \
--input '{"spec":"/abs/path/to/spec.md"}'
```

- `--workspace` — the repo being coded against (must already exist; the caller handles `git init` / `go mod init` etc.)
- `--input spec` — absolute path to the spec/requirements file (read in-place; not copied into the repo)

## Input contract

| Key | Required | Description |
|--------|----------|-------------|
| `spec` | yes | Absolute path to the spec/requirements markdown file |

## Output contract

| File | Description |
|------------------|-------------|
| `result.md` | Summary: what was implemented, iterations run, final status |
| `.reviews/iter-NNN.md` | Per-iteration reviewer feedback (committed to repo) |
| `.reviews/latest.md` | Rolling copy of the most recent review |

## Known limits

- `loop_max=8` — hard ceiling; if the done-gate never writes `COMPLETE` after 8 iterations, the run fails.
- Chooser and done-gate use `claude-haiku-4.5` (cheap, API-based). Implementer and reviewer use `claude-sonnet-4.6` (Claude Code CLI via tmux, or API).
- Spec is NOT committed to the target repo — it is read in-place via the `spec` input path.
- No pre-flight scaffolding — the caller must initialize the repo before launching.
206 changes: 206 additions & 0 deletions workflows/coding-loop/graph.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
// coding-loop: iterative coding agent with reviewer feedback and LLM done-gate.
//
// Inputs: spec (absolute path to a spec file).
// Output: result.md at the workspace root.
//
// Flow (up to loop_max=8 iterations):
// start → loop_begin → task_chooser → implementer → reviewer → done_gate → loop_end → report → done
//
// Termination: done_gate writes COMPLETE or CONTINUE to .kilroy/decision.md.
// loop_end checks for COMPLETE and either loops back to loop_begin or exits.

digraph coding_loop {
graph [
inputs="spec",
outputs="result.md",
model_stylesheet="
* { llm_provider: anthropic; llm_model: claude-haiku-4.5; }
.implementer { llm_model: claude-sonnet-4.6; }
.reviewer { llm_model: claude-sonnet-4.6; }
"
]

start [shape=Mdiamond, label="Start"]
done [shape=Msquare, label="Done"]

// Loop sentinel: marks the top of the iteration scope.
// The engine jumps back here when loop_end decides to continue.
loop_begin [
shape=trapezium,
label="Loop Begin",
loop_id="main",
loop_max=12,
loop_until_file_contains=".kilroy/decision.md:COMPLETE"
]

// Task chooser: lightweight model reads spec + latest review,
// decides the most impactful next sub-task, writes .kilroy/task.md.
task_chooser [
shape=box,
label="Task Chooser",
agent_mode="agent_loop",
prompt="You are the task chooser in an iterative coding workflow.

Your job: read the spec and latest review, then decide the single most important next sub-task for the implementer to work on this iteration.

Steps:
1. Read .kilroy/INPUT.md. Find the '## spec' section — it contains the absolute path to the spec file.
2. Read the spec file at that path.
3. Run: git log --oneline -10
4. Run: git status
5. If .reviews/latest.md exists, read it for prior feedback and what remains incomplete.
6. Choose EXACTLY ONE smallest-possible self-contained sub-task. Do NOT bundle multiple features into one iteration.
- If no code files exist yet, pick ONLY the foundation step (e.g. go.mod bootstrap, or the type + constructor) and STOP there. Do not also implement any features.
- If foundation exists, pick the SINGLE smallest unimplemented feature. Not two. Not three. Exactly one.
- If all features exist but the reviewer flagged a bug, pick JUST that bug and nothing else.
The explicit goal is to force multiple tight iterations. Each iteration should produce the minimum-viable increment — a single method, a single test, a single bugfix.
7. Write a concise task description to .kilroy/task.md. Include:
- What to implement: ONE thing, explicitly and narrowly scoped.
- Acceptance criteria for THIS sub-task only (2-4 bullets, focused solely on what's being added this iteration).
- Files likely affected (your best guess).
- Explicit guardrail line, quoted verbatim: Do NOT implement any other features in this iteration. Stop immediately after the scoped sub-task is complete, even if other unimplemented items are visible in the spec.
Keep .kilroy/task.md under 30 lines.

When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails."
]

// Implementer: reads .kilroy/task.md, implements the task, commits.
implementer [
shape=box,
label="Implementer",
class="implementer",
agent_tool="claude",
prompt="You are the implementer in an iterative coding workflow.

Your job: read the task and implement it in the codebase.

Steps:
1. Read .kilroy/task.md for the current sub-task and its acceptance criteria.
2. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec for broader context.
3. Implement ONLY what .kilroy/task.md asks for. Do NOT implement any other features you see in the spec or guess at next steps — the chooser is responsible for scheduling and will hand you the next sub-task on the next iteration. Stay strictly in your lane.
4. Write clean, idiomatic code. Make targeted changes — do not refactor unrelated code.
5. Run any applicable tests or build commands to verify your work does not break existing functionality.
6. Stage and commit ALL changes with a clear message describing what was done (e.g., 'feat: add X per spec').
If there is genuinely nothing to implement (task already done), write a note to .kilroy/implementer-note.md and commit that.

When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails."
]

// Reviewer: reads task + diff, writes .reviews/iter-NNN.md and .reviews/latest.md, commits.
reviewer [
shape=box,
label="Reviewer",
class="reviewer",
agent_tool="claude",
prompt="You are the reviewer in an iterative coding workflow.

Your job: review the latest commit against the spec and record structured feedback.

Steps:
1. Read .kilroy/task.md to understand what was supposed to be implemented this iteration.
2. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec for acceptance criteria.
3. Run: git log --oneline -5
4. Run: git show HEAD (shows the commit message + full diff of the implementer's latest commit; works regardless of commit depth)
5. Compute the iteration number:
mkdir -p .reviews
N=$(ls .reviews/iter-*.md 2>/dev/null | wc -l | tr -d ' ')
ITER=$((N + 1))
PADDED=$(printf '%03d' $ITER)
6. Write your review to both .reviews/iter-${PADDED}.md AND .reviews/latest.md (overwrite latest.md each time).
Review format:
## Iteration ${PADDED}
## What Changed
(summary of the diff — what files, what logic)
## Against Spec
- Done: (list items completed)
- Remaining: (list required items not yet implemented)
## Quality Notes
(code quality, edge cases, test coverage observations)
## Recommendation
CONTINUE — if required spec items remain unimplemented.
COMPLETE — ONLY if ALL required spec items are done and the implementation is correct.
7. Stage and commit the .reviews/ changes:
git add .reviews/
git commit -m \"review: iteration ${PADDED} feedback\"

When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails."
]

// Done-gate: lightweight LLM decides COMPLETE vs CONTINUE, writes to .kilroy/decision.md.
done_gate [
shape=box,
label="Done Gate",
agent_mode="agent_loop",
prompt="You are the done-gate in an iterative coding workflow.

Your job: decide whether the spec is fully implemented based on the latest review.

Steps:
1. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec.
2. Run: ls .reviews/
3. Read .reviews/latest.md (the most recent review).
4. Optionally read additional .reviews/iter-NNN.md files if you need more context.
5. Decide:
- COMPLETE: ALL required spec items are implemented and the latest review confirms it.
- CONTINUE: Required spec items remain unimplemented or the reviewer says CONTINUE.

CRITICAL: Write EXACTLY one of these two words to .kilroy/decision.md — nothing else, no whitespace, no newline:
echo -n COMPLETE > .kilroy/decision.md
OR
echo -n CONTINUE > .kilroy/decision.md

The engine reads this file byte-by-byte to decide loop termination. Any extra characters (spaces, newlines, punctuation) will prevent termination detection.

When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails."
]

// Loop sentinel: marks the end of the iteration scope.
// Checks termination conditions; if not met, jumps back to loop_begin.
loop_end [
shape=invtrapezium,
label="Loop End",
loop_id="main",
loop_max=12,
loop_until_file_contains=".kilroy/decision.md:COMPLETE"
]

// Report: summarizes all work done, iterations run, final status.
report [
shape=box,
label="Report",
agent_mode="agent_loop",
prompt="You are the final reporter in an iterative coding workflow.

Your job: write a comprehensive summary of the completed work.

Steps:
1. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec.
2. Read .reviews/latest.md for the final review and recommendation.
3. Count iterations: ls .reviews/iter-*.md 2>/dev/null | wc -l
4. Run: git log --oneline -20
5. Read .kilroy/decision.md to confirm the final decision (COMPLETE or CONTINUE/loop_max exceeded).
6. Write result.md at the workspace root with these sections:
## Summary
What was implemented and how it maps to the spec requirements.
## Iterations
How many loops ran and what changed in each (brief, one line per iteration).
## Final Review
Key points from .reviews/latest.md.
## Commits
List of commits made during this run (from git log).
## Status
COMPLETE — all required spec items implemented.
OR INCOMPLETE — what remains and why the loop stopped.

When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails."
]

start -> loop_begin
loop_begin -> task_chooser
task_chooser -> implementer
implementer -> reviewer
reviewer -> done_gate
done_gate -> loop_end
loop_end -> report
report -> done
}
19 changes: 19 additions & 0 deletions workflows/coding-loop/workflow.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Coding-loop workflow: iterative LLM coding agent with reviewer feedback and done-gate.
# Runs up to 8 choose→implement→review→gate iterations against a user-supplied spec.
# Usage: kilroy attractor run --package workflows/coding-loop/ \
# --workspace <target-repo-dir> \
# --input '{"spec":"/abs/path/to/spec.md"}'

name = "coding-loop"
description = "Iterative coding workflow: a task chooser selects sub-tasks, an implementer codes them, a reviewer scores each iteration, and a done-gate decides when the spec is complete."
version = "0.1.0"

outputs = ["result.md"]

[[inputs]]
name = "spec"
description = "Absolute path to the feature/task spec file that defines what to build."
required = true

[defaults]
labels = { workflow = "coding-loop" }
Loading