diff --git a/.agent/harness/context_budget.py b/.agent/harness/context_budget.py index 1dc7045..466b2fe 100644 --- a/.agent/harness/context_budget.py +++ b/.agent/harness/context_budget.py @@ -132,57 +132,253 @@ def _top_lessons(query, lessons_md, char_budget=8000): return _lines_up_to_budget([l for _, _, l in relevant], char_budget) +_TRUNC_MARKER = "\n\n[truncated to fit budget]" +_OMIT_MARKER_FMT = "[{n} items omitted: budget exceeded]" + + +class _UsedTokens(int): + """int subclass that carries an `overflow` flag. + + Existing callers do `ctx, used = build_context(...)` and treat `used` as an + int — they still see the correct number. New callers can read + `used.overflow` to learn whether enforcement had to drop or truncate + content. This keeps the public 2-tuple signature compatible. + """ + + # int subclasses can't accept __slots__ for instance attrs (variable-size + # base type), so we override __new__ to stash overflow on the instance dict + # via plain assignment after relying on the default dict. + def __new__(cls, value, overflow=False): + obj = super().__new__(cls, value) + obj.overflow = overflow + return obj + + +def _truncate_to_tokens(text, max_tokens): + """Truncate text so its token estimate fits in max_tokens, with marker. + + Uses the same chars-to-tokens ratio as `_token_estimate` (4 chars/token). + Reserves room for the truncation marker so the post-truncation estimate + still fits the budget the caller passed in. + """ + if max_tokens <= 0: + return "" + if _token_estimate(text) <= max_tokens: + return text + marker_tokens = _token_estimate(_TRUNC_MARKER) + char_budget = max(0, (max_tokens - marker_tokens) * 4) + if char_budget <= 0: + # No room even for body. Emit just the marker so the section still + # signals presence (required sections must remain in the output). + return _TRUNC_MARKER.lstrip() + return text[:char_budget] + _TRUNC_MARKER + + def build_context(user_input: str, budget: int = 88000): - """Returns (context_string, tokens_used). Lean and query-aware.""" + """Returns (context_string, used_tokens). Lean, query-aware, budget-enforced. + + Budget enforcement (P1 fix): + * Required sections (AGENTS map, active workspace, permissions) are + always present in the output. If they would overflow the budget, + their content is truncated to fit and tagged with a + `[truncated to fit budget]` marker — never dropped silently. + * Optional sections (lessons, episodes, matched skills) are skipped + entirely with an `[N items omitted: budget exceeded]` marker when + they would overflow. + * Every assembled context ends with a `[budget: used X / Y tokens]` + summary so callers can see the final accounting. + + Return shape is preserved: a 2-tuple `(context_string, used_tokens)`. + `used_tokens` is an int subclass that exposes an `overflow: bool` + attribute for new callers; existing callers that treat it as a plain + int are unaffected. + """ parts, used = [], 0 + overflow = False + + # Each appended block costs its own tokens *plus* the `\n\n---\n\n` + # separator that join() will add between it and the next block. We track + # separator overhead explicitly so the budget check matches what the + # caller actually receives. + SEPARATOR_TOKENS = _token_estimate("\n\n---\n\n") # 9 chars → 2 tokens + # Reserve room for the final `[budget: used X / Y tokens]` summary line + # plus its leading separator. Width here is a conservative upper bound. + SUMMARY_RESERVE_TOKENS = _token_estimate("[budget: used 99999999 / 99999999 tokens]") + SEPARATOR_TOKENS + # Per-block `len(s)//4` truncation undercounts vs the post-join estimate. + # Reserve small headroom so the final joined string still fits the budget. + DRIFT_HEADROOM_TOKENS = 4 - # always load: personal preferences + live workspace + AGENTS map + DECISIONS - # AGENTS.md and DECISIONS.md were missing despite AGENTS.md specifying the - # read order — the standalone path was not faithful to its own contract. - for rel in ( + # Required sections are *mandatory* — their headers + omission markers + # are floor-cost overhead. To keep the joined output within budget, we + # pre-reserve that floor so early required sections don't eat budget + # that later required sections need just for their stub. + required_files = ( "AGENTS.md", "memory/personal/PREFERENCES.md", "memory/working/WORKSPACE.md", "memory/working/REVIEW_QUEUE.md", "memory/semantic/DECISIONS.md", - ): + ) + perms_path = "protocols/permissions.md" + + def _stub_cost(rel_or_label): + """Token cost of the minimum stub (header + omission marker + sep).""" + if rel_or_label == perms_path: + header = "# PERMISSIONS\n" + else: + header = f"# {rel_or_label}\n" + stub = header + _OMIT_MARKER_FMT.format(n=1) + return _token_estimate(stub) + SEPARATOR_TOKENS + + # Floor = stub cost for every required file that exists on disk + perms. + required_floor = 0 + for rel in required_files: + if _read(rel): + required_floor += _stub_cost(rel) + if _read(perms_path): + required_floor += _stub_cost(perms_path) + + def _append(block): + """Append a block, charging both its tokens and the join separator.""" + nonlocal used + parts.append(block) + # First block has no preceding separator; subsequent blocks do. + sep_cost = SEPARATOR_TOKENS if len(parts) > 1 else 0 + used += _token_estimate(block) + sep_cost + + def _block_cost(block): + """Token cost of appending `block` (block + separator if not first).""" + sep_cost = SEPARATOR_TOKENS if len(parts) >= 1 else 0 + return _token_estimate(block) + sep_cost + + # Track how much of `required_floor` we've already paid; the remainder + # is reserved out of `_room()` so we don't overspend on early sections. + paid_floor = 0 + + def _room(): + # Remaining required_floor we haven't paid yet stays reserved. + remaining_floor = max(0, required_floor - paid_floor) + return budget - used - SUMMARY_RESERVE_TOKENS - DRIFT_HEADROOM_TOKENS - remaining_floor + + # ------------------------------------------------------------------ + # Required sections — must appear in output. Truncate if oversized. + # Preserves the original load order: AGENTS map first, then personal + # preferences, live workspace, review queue, semantic decisions. These + # are the sections agentic-stack treats as always-on context. + # ------------------------------------------------------------------ + for rel in required_files: text = _read(rel) - if text: - parts.append(f"# {rel}\n{text}") - used += _token_estimate(text) + if not text: + continue + header = f"# {rel}\n" + # Pay this section's floor first so _room() releases its reservation. + paid_floor += _stub_cost(rel) + # Room for the *body*, after subtracting header and separator overhead. + sep_cost = SEPARATOR_TOKENS if parts else 0 + body_room = _room() - _token_estimate(header) - sep_cost + body_tokens = _token_estimate(text) + if body_room <= 0: + # No room left at all. Emit header + omission marker so the + # caller still sees the section name in the assembled context. + block = header + _OMIT_MARKER_FMT.format(n=1) + _append(block) + overflow = True + continue + if body_tokens > body_room: + text = _truncate_to_tokens(text, body_room) + overflow = True + _append(header + text) - # query-aware lessons + # ------------------------------------------------------------------ + # Optional: query-aware lessons. Skip with marker on overflow. + # ------------------------------------------------------------------ lessons_raw = _read("memory/semantic/LESSONS.md") if lessons_raw: lessons = _top_lessons(user_input, lessons_raw, char_budget=8000) if lessons: - parts.append(f"# LESSONS (query-relevant)\n{lessons}") - used += _token_estimate(lessons) + header = "# LESSONS (query-relevant)\n" + block = header + lessons + if _block_cost(block) <= _room(): + _append(block) + else: + n = sum(1 for ln in lessons.splitlines() if ln.strip().startswith("- ")) + marker_block = header + _OMIT_MARKER_FMT.format(n=max(n, 1)) + if _block_cost(marker_block) <= _room(): + _append(marker_block) + overflow = True - # query-aware top episodes + # ------------------------------------------------------------------ + # Optional: query-aware top episodes. Skip with marker on overflow. + # ------------------------------------------------------------------ episodes = _top_episodes(user_input, k=5) if episodes: - parts.append(f"# RECENT EPISODES (salience x relevance)\n{episodes}") - used += _token_estimate(episodes) + header = "# RECENT EPISODES (salience x relevance)\n" + block = header + episodes + if _block_cost(block) <= _room(): + _append(block) + else: + n = sum(1 for ln in episodes.splitlines() if ln.strip().startswith("- ")) + marker_block = header + _OMIT_MARKER_FMT.format(n=max(n, 1)) + if _block_cost(marker_block) <= _room(): + _append(marker_block) + overflow = True - # matched skills only (progressive_load is already input-matched). + # ------------------------------------------------------------------ + # Optional: matched skills (progressive_load is already input-matched). # Lazy import so a missing skill_loader doesn't kill context assembly. + # ------------------------------------------------------------------ try: from skill_loader import progressive_load skills = progressive_load(user_input) except Exception: skills = [] + skipped_skills = 0 for s in skills: block = f"## Skill: {s['name']}\n{s['content']}" - t = _token_estimate(block) - if used + t < budget: - parts.append(block) - used += t + if _block_cost(block) <= _room(): + _append(block) + else: + skipped_skills += 1 + overflow = True + if skipped_skills: + marker_block = _OMIT_MARKER_FMT.format(n=skipped_skills) + " (skills)" + if _block_cost(marker_block) <= _room(): + _append(marker_block) - # permissions always last, small, safety-critical - perms = _read("protocols/permissions.md") + # ------------------------------------------------------------------ + # Required: permissions. Last and safety-critical — must appear, + # truncated if oversized. + # ------------------------------------------------------------------ + perms = _read(perms_path) if perms: - parts.append(f"# PERMISSIONS\n{perms}") - used += _token_estimate(perms) + header = "# PERMISSIONS\n" + # Pay the perms floor so _room() releases its reservation. + paid_floor += _stub_cost(perms_path) + sep_cost = SEPARATOR_TOKENS if parts else 0 + body_room = _room() - _token_estimate(header) - sep_cost + body_tokens = _token_estimate(perms) + if body_room <= 0: + block = header + _OMIT_MARKER_FMT.format(n=1) + _append(block) + overflow = True + else: + if body_tokens > body_room: + perms = _truncate_to_tokens(perms, body_room) + overflow = True + _append(header + perms) + + # ------------------------------------------------------------------ + # Final summary line. Always appended so callers can audit the + # assembled context's accounting at a glance. + # ------------------------------------------------------------------ + summary = f"[budget: used {used} / {budget} tokens]" + _append(summary) - return "\n\n---\n\n".join(parts), used + # Reconcile the running tally against the actually joined string. Per-block + # `len(s) // 4` integer truncation undercounts vs the concatenated whole, + # so prefer the post-join estimate as the authoritative number returned. + final = "\n\n---\n\n".join(parts) + final_tokens = _token_estimate(final) + if final_tokens > budget: + overflow = True + return final, _UsedTokens(final_tokens, overflow) diff --git a/.agent/harness/control_plane.py b/.agent/harness/control_plane.py new file mode 100644 index 0000000..6fffa77 --- /dev/null +++ b/.agent/harness/control_plane.py @@ -0,0 +1,404 @@ +"""Lightweight runtime control plane for managed instances. + +This module intentionally stays file-backed and stdlib-only: + - per-instance job queues under `.agent/runtime/instances//jobs/` + - per-instance transcripts under `.agent/runtime/instances//` + - one adapter-facing active-instance projection at `.agent/ACTIVE_INSTANCE.md` +""" +import datetime +import json +import os +import signal +import sys +import time +import uuid + +MEMORY_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "memory")) +if MEMORY_ROOT not in sys.path: + sys.path.insert(0, MEMORY_ROOT) + +from lesson_store import load_accepted_lessons +from render_lessons import append_lesson, load_lessons, render_lessons +from runtime import ( + AGENT_ROOT, + UnknownInstanceError, + get_instance, + instance_root, + list_instances, + load_registry, + resolve_runtime, + save_registry, + shared_runtime_context, +) + +ACTIVE_INSTANCE_DOC = os.path.join(AGENT_ROOT, "ACTIVE_INSTANCE.md") + + +def _now(): + return datetime.datetime.now().isoformat() + + +def _atomic_write_json(path, payload): + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp = f"{path}.tmp" + with open(tmp, "w") as f: + json.dump(payload, f, indent=2, sort_keys=True) + f.write("\n") + os.replace(tmp, path) + + +def _atomic_write_text(path, content): + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp = f"{path}.tmp" + with open(tmp, "w") as f: + f.write(content) + os.replace(tmp, path) + + +def jobs_root(instance_id): + return os.path.join(instance_root(instance_id), "jobs") + + +def queued_jobs_dir(instance_id): + return os.path.join(jobs_root(instance_id), "queued") + + +def running_jobs_dir(instance_id): + return os.path.join(jobs_root(instance_id), "running") + + +def completed_jobs_dir(instance_id): + return os.path.join(jobs_root(instance_id), "completed") + + +def failed_jobs_dir(instance_id): + return os.path.join(jobs_root(instance_id), "failed") + + +def worker_stdout_log(instance_id): + return os.path.join(instance_root(instance_id), "worker.stdout.log") + + +def worker_stderr_log(instance_id): + return os.path.join(instance_root(instance_id), "worker.stderr.log") + + +def worker_stop_flag(instance_id): + return os.path.join(instance_root(instance_id), "STOP") + + +def transcript_path(instance_id): + return os.path.join(instance_root(instance_id), "TRANSCRIPT.jsonl") + + +def ensure_instance_control_dirs(instance_id): + for path in ( + queued_jobs_dir(instance_id), + running_jobs_dir(instance_id), + completed_jobs_dir(instance_id), + failed_jobs_dir(instance_id), + ): + os.makedirs(path, exist_ok=True) + + +def pid_is_alive(pid): + if not pid: + return False + try: + os.kill(int(pid), 0) + except (OSError, ValueError, TypeError): + return False + return True + + +def job_counts(instance_id): + def _count(path): + if not os.path.isdir(path): + return 0 + return sum( + 1 + for name in os.listdir(path) + if name.endswith(".json") and os.path.isfile(os.path.join(path, name)) + ) + + return { + "queued": _count(queued_jobs_dir(instance_id)), + "running": _count(running_jobs_dir(instance_id)), + "completed": _count(completed_jobs_dir(instance_id)), + "failed": _count(failed_jobs_dir(instance_id)), + } + + +def queue_job(instance_id, prompt, source="instances.py", metadata=None): + ensure_instance_control_dirs(instance_id) + job_id = f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}" + payload = { + "id": job_id, + "instance_id": instance_id, + "prompt": prompt, + "source": source, + "metadata": metadata or {}, + "created_at": _now(), + "status": "queued", + } + path = os.path.join(queued_jobs_dir(instance_id), f"{job_id}.json") + _atomic_write_json(path, payload) + return payload + + +def _job_path(search_dir, job_id): + return os.path.join(search_dir, f"{job_id}.json") + + +def claim_next_job(instance_id): + ensure_instance_control_dirs(instance_id) + queued = queued_jobs_dir(instance_id) + running = running_jobs_dir(instance_id) + for name in sorted(os.listdir(queued)): + if not name.endswith(".json"): + continue + src = os.path.join(queued, name) + if not os.path.isfile(src): + continue + dst = os.path.join(running, name) + try: + os.replace(src, dst) + except OSError: + continue + try: + with open(dst) as f: + job = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + failed_dir = failed_jobs_dir(instance_id) + os.makedirs(failed_dir, exist_ok=True) + quarantined = os.path.join(failed_dir, name) + try: + os.replace(dst, quarantined) + except OSError as move_exc: + sys.stderr.write( + f"[control_plane] WARNING: failed to quarantine corrupt job " + f"{dst!r}: {move_exc}\n" + ) + continue + sidecar_path = os.path.join(failed_dir, f"{name}.error.json") + sidecar = { + "error": str(exc), + "quarantined_at": datetime.datetime.utcnow().isoformat() + "Z", + "original_path": src, + } + try: + _atomic_write_json(sidecar_path, sidecar) + except OSError as side_exc: + sys.stderr.write( + f"[control_plane] WARNING: failed to write quarantine sidecar " + f"{sidecar_path!r}: {side_exc}\n" + ) + sys.stderr.write( + f"[control_plane] WARNING: quarantined corrupt queued job " + f"{name!r} for instance {instance_id!r}: {exc}\n" + ) + continue + job["status"] = "running" + job["started_at"] = _now() + _atomic_write_json(dst, job) + return job + return None + + +def load_job(instance_id, job_id): + for folder, status in ( + (queued_jobs_dir(instance_id), "queued"), + (running_jobs_dir(instance_id), "running"), + (completed_jobs_dir(instance_id), "completed"), + (failed_jobs_dir(instance_id), "failed"), + ): + path = _job_path(folder, job_id) + if not os.path.exists(path): + continue + with open(path) as f: + job = json.load(f) + job.setdefault("status", status) + return job + return None + + +def _append_transcript(instance_id, payload): + os.makedirs(os.path.dirname(transcript_path(instance_id)), exist_ok=True) + with open(transcript_path(instance_id), "a") as f: + f.write(json.dumps(payload) + "\n") + + +def finish_job(instance_id, job, ok, result=None, error=None): + running_path = _job_path(running_jobs_dir(instance_id), job["id"]) + target_dir = completed_jobs_dir(instance_id) if ok else failed_jobs_dir(instance_id) + finished = dict(job) + finished["status"] = "completed" if ok else "failed" + finished["finished_at"] = _now() + if ok: + finished["result"] = result + else: + finished["error"] = error + target_path = _job_path(target_dir, job["id"]) + _atomic_write_json(target_path, finished) + try: + if os.path.exists(running_path): + os.remove(running_path) + except OSError: + pass + transcript = { + "timestamp": finished["finished_at"], + "job_id": job["id"], + "instance_id": instance_id, + "prompt": job.get("prompt", ""), + "status": finished["status"], + } + if ok: + transcript["result"] = result + else: + transcript["error"] = error + _append_transcript(instance_id, transcript) + return finished + + +def wait_for_job(instance_id, job_id, timeout_sec=60, poll_interval=0.2): + deadline = time.time() + timeout_sec + while time.time() < deadline: + job = load_job(instance_id, job_id) + if job and job.get("status") in {"completed", "failed"}: + return job + time.sleep(poll_interval) + return load_job(instance_id, job_id) + + +def request_worker_stop(instance_id): + os.makedirs(instance_root(instance_id), exist_ok=True) + with open(worker_stop_flag(instance_id), "w") as f: + f.write(_now() + "\n") + + +def clear_worker_stop_request(instance_id): + try: + os.remove(worker_stop_flag(instance_id)) + except OSError: + pass + + +def terminate_worker(instance_id, pid, grace_sec=5): + if not pid_is_alive(pid): + return False + request_worker_stop(instance_id) + try: + os.kill(int(pid), signal.SIGTERM) + except OSError: + return False + + deadline = time.time() + grace_sec + while time.time() < deadline: + if not pid_is_alive(pid): + return True + time.sleep(0.1) + + try: + os.kill(int(pid), signal.SIGKILL) + except OSError: + pass + return not pid_is_alive(pid) + + +def refresh_active_instance_doc(registry=None): + reg = registry or load_registry() + runtime_ctx = resolve_runtime(prefer_env=False, registry=reg) + if runtime_ctx.instance_id: + body = f"""# Active Instance + +Active instance: `{runtime_ctx.instance_id}` +Role: `{runtime_ctx.instance_role}` +State: `{runtime_ctx.state}` + +## Active paths +- workspace: `{runtime_ctx.workspace_path}` +- review_queue: `{runtime_ctx.review_queue_path}` +- episodic: `{runtime_ctx.episodic_path}` +- semantic: `{runtime_ctx.semantic_path}` +- candidates: `{runtime_ctx.candidates_path}` +- role_file: `{runtime_ctx.role_path or '(none)'}` + +## Adapter rule +If your harness does not have the standalone context builder, treat these +paths as authoritative for this session. Prefer: + +```bash +python3 .agent/tools/show.py --instance {runtime_ctx.instance_id} +python3 .agent/tools/recall.py --instance {runtime_ctx.instance_id} "" +python3 .agent/tools/learn.py --instance {runtime_ctx.instance_id} "" --rationale "" +python3 .agent/tools/list_candidates.py --instance {runtime_ctx.instance_id} +``` +""" + else: + body = """# Active Instance + +Active instance: `shared` + +No managed instance is currently selected. Use the shared `.agent/memory/...` +paths, or start/swap a managed instance with: + +```bash +python3 .agent/tools/instances.py list +python3 .agent/tools/instances.py swap +``` +""" + _atomic_write_text(ACTIVE_INSTANCE_DOC, body) + return ACTIVE_INSTANCE_DOC + + +def _norm_claim(text): + import re + + normalized = re.sub(r"[^\w\s]", " ", (text or "").lower()) + return re.sub(r"\s+", " ", normalized).strip() + + +def merge_instance_semantic(instance_id): + runtime_ctx = resolve_runtime(instance_override=instance_id) + local_rows = [ + row + for row in load_lessons(runtime_ctx.semantic_path) + if row.get("status") == "accepted" + ] + if not local_rows: + return {"merged": 0, "skipped": 0, "lesson_ids": []} + + shared_ctx = shared_runtime_context() + visible_shared, _ = load_accepted_lessons(runtime_ctx=shared_ctx) + shared_ids = {row.get("id") for row in visible_shared if row.get("id")} + shared_claims = {_norm_claim(row.get("claim", "")) for row in visible_shared} + + merged = [] + skipped = 0 + for row in local_rows: + lesson_id = row.get("id") + claim_key = _norm_claim(row.get("claim", "")) + if (lesson_id and lesson_id in shared_ids) or claim_key in shared_claims: + skipped += 1 + continue + merged_row = dict(row) + merged_row["merged_from_instance"] = instance_id + merged_row["merged_at"] = _now() + append_lesson(merged_row, shared_ctx.semantic_path) + merged.append(merged_row.get("id")) + if lesson_id: + shared_ids.add(lesson_id) + if claim_key: + shared_claims.add(claim_key) + + if merged: + render_lessons(shared_ctx.semantic_path) + return {"merged": len(merged), "skipped": skipped, "lesson_ids": merged} + + +def running_instances(): + out = [] + for entry in list_instances(): + if entry.get("state") == "running" and pid_is_alive(entry.get("worker_pid")): + out.append(entry) + return out diff --git a/.agent/harness/hooks/_provenance.py b/.agent/harness/hooks/_provenance.py index 65a0b00..4398376 100644 --- a/.agent/harness/hooks/_provenance.py +++ b/.agent/harness/hooks/_provenance.py @@ -1,7 +1,8 @@ """Shared provenance helpers for episodic entries. Cached per-process.""" -import os, subprocess +import fcntl, json, os, subprocess AGENT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +EPISODIC_PATH = os.path.join(AGENT_ROOT, "memory/episodic/AGENT_LEARNINGS.jsonl") _CACHED_COMMIT = None _CACHED_RUN_ID = None @@ -72,3 +73,31 @@ def build_source(skill): "run_id": run_id(), "commit_sha": commit_sha(), } + + +def append_episodic_entry(entry, path=None): + """Atomically append a single JSON line to the episodic log. + + Acquires an advisory exclusive lock (``fcntl.flock``) on the open file + descriptor for the duration of the write so concurrent hook invocations + serialise. Creates the parent directory if missing. + + Locking model caveat: ``auto_dream.py`` rewrites the same file via + temp-file + ``os.replace``, which does NOT honour ``flock`` (different + inode after rename). Append-side locks against itself; the rewrite path + is atomic by rename. Worst case: a "lost update" of a single entry + written between the dream-cycle's snapshot read and its rename -- + acceptable for a best-effort log. + """ + target = path or EPISODIC_PATH + os.makedirs(os.path.dirname(target), exist_ok=True) + line = json.dumps(entry) + "\n" + with open(target, "a") as f: + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + f.write(line) + f.flush() + os.fsync(f.fileno()) + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + return entry diff --git a/.agent/harness/hooks/pre_tool_call.py b/.agent/harness/hooks/pre_tool_call.py index a01376e..76ed6d7 100644 --- a/.agent/harness/hooks/pre_tool_call.py +++ b/.agent/harness/hooks/pre_tool_call.py @@ -1,5 +1,5 @@ """Runs before every tool call. Enforces permissions and tool schemas.""" -import json, os +import json, os, re, sys ROOT = os.path.join(os.path.dirname(__file__), "..", "..") @@ -16,6 +16,20 @@ def _perms_text(): return open(p).read() if os.path.exists(p) else "" +def _match_any(patterns, text): + """Returns the first pattern that matches text via re.search, or None. + Skips (with warning to stderr) any regex that fails to compile so a single + bad schema entry doesn't take down the hook.""" + for pat in patterns or []: + try: + if re.search(pat, text): + return pat + except re.error as e: + print(f"pre_tool_call: skipping invalid regex {pat!r}: {e}", file=sys.stderr) + continue + return None + + def check_tool_call(tool_name, operation, args): """Returns (allowed, reason). allowed may be True, False, or 'approval_needed'.""" schema = _schema(tool_name) @@ -26,6 +40,15 @@ def check_tool_call(tool_name, operation, args): if target and target in blocked: return False, f"BLOCKED: {operation} to '{target}' is forbidden" + command = args.get("command") or "" + if isinstance(command, str) and command: + hit = _match_any(op.get("blocked_patterns", []), command) + if hit: + return False, f"BLOCKED: command matches forbidden pattern '{hit}'" + hit = _match_any(op.get("requires_approval_patterns", []), command) + if hit: + return "approval_needed", f"command matches pattern '{hit}'; requires human approval" + if op.get("requires_approval", False): return "approval_needed", f"{operation} requires human approval" diff --git a/.agent/harness/lesson_store.py b/.agent/harness/lesson_store.py new file mode 100644 index 0000000..3fce595 --- /dev/null +++ b/.agent/harness/lesson_store.py @@ -0,0 +1,257 @@ +"""Helpers for reading accepted lessons across shared + branch-local stores.""" +import json +import os +import re + +from runtime import resolve_runtime, semantic_dirs + +_STATUS_RE = re.compile(r"status=(\w+)") + + +def _norm_claim(claim): + text = re.sub(r"[^\w\s]", " ", (claim or "").lower()) + return re.sub(r"\s+", " ", text).strip() + + +def _semantic_sources(runtime_ctx): + sources = [("shared", runtime_ctx.shared_semantic_path)] + if runtime_ctx.semantic_path != runtime_ctx.shared_semantic_path: + sources.append((runtime_ctx.instance_id or "instance", runtime_ctx.semantic_path)) + return sources + + +def _load_structured(semantic_dir, label): + path = os.path.join(semantic_dir, "lessons.jsonl") + if not os.path.exists(path): + return [] + out = [] + for line in open(path): + line = line.strip() + if not line: + continue + try: + lesson = json.loads(line) + except json.JSONDecodeError: + continue + if lesson.get("status") != "accepted": + continue + lesson.setdefault("_source", f"{label}:lessons.jsonl") + lesson.setdefault("_semantic_dir", semantic_dir) + out.append(lesson) + return out + + +def _load_structured_all(semantic_dir, label): + """Like _load_structured but returns ALL lessons regardless of status. + + Provisional, retired, and superseded lessons must still be visible to + duplicate detection — otherwise a provisional lesson can be staged or + graduated again as a "new" candidate because dedup can't see it. + """ + path = os.path.join(semantic_dir, "lessons.jsonl") + if not os.path.exists(path): + return [] + out = [] + for line in open(path): + line = line.strip() + if not line: + continue + try: + lesson = json.loads(line) + except json.JSONDecodeError: + continue + lesson.setdefault("_source", f"{label}:lessons.jsonl") + lesson.setdefault("_semantic_dir", semantic_dir) + out.append(lesson) + return out + + +def _load_markdown_fallback_all(semantic_dir, label): + """Like _load_markdown_fallback but keeps provisional + superseded bullets. + + Mirrors _load_markdown_fallback's parser, minus the status filter — every + bullet in LESSONS.md becomes a dedup candidate, including [PROVISIONAL] + and ~~superseded~~ entries that the visible-corpus loader skips. + """ + path = os.path.join(semantic_dir, "LESSONS.md") + if not os.path.exists(path): + return [] + text = open(path).read() + out = [] + for line in text.splitlines(): + s = line.strip() + if not s.startswith("- ") or len(s) <= 2: + continue + status = "accepted" + if "" + for lesson in lessons + if lesson.get("claim", "").strip() + ) + + +def _load_all_for_dedup(runtime_ctx=None): + """Return ALL lessons (any status) across shared + branch-local stores. + + Counterpart to load_accepted_lessons used exclusively for duplicate + detection. Provisional, retired, and superseded lessons must be visible + here so a candidate matching one of them is correctly flagged as a + duplicate instead of slipping through as a "new" lesson. + """ + runtime_ctx = runtime_ctx or resolve_runtime() + merged = {} + order = [] + + for label, semantic_dir in _semantic_sources(runtime_ctx): + structured = _load_structured_all(semantic_dir, label) + structured_keys = {_norm_claim(item.get("claim", "")) for item in structured} + merged_source = list(structured) + for lesson in _load_markdown_fallback_all(semantic_dir, label): + if _norm_claim(lesson.get("claim", "")) not in structured_keys: + merged_source.append(lesson) + + for lesson in merged_source: + key = _norm_claim(lesson.get("claim", "")) + if not key: + continue + if key not in merged: + order.append(key) + merged[key] = lesson + + return [merged[key] for key in order] + + +def render_dedup_text(runtime_ctx=None): + """Render every lesson — accepted, provisional, retired, superseded — as + bullets formatted for the heuristic duplicate check. + + Each bullet is annotated with `status=accepted` regardless of the + underlying lesson's true status. This is intentional: validate.extract_lesson_lines + treats non-accepted bullets as non-terminal and skips them, which would + re-introduce the very bug this function exists to fix. From dedup's + perspective every lesson is "real enough to block a duplicate," so we + present them as accepted to the substring/extract-line checker. The + bullet body is just the claim text (no conditions appended) because + validate._normalize compares the full bullet body — adding extras would + de-normalize a real-claim duplicate and let it slip through. The HTML + annotation preserves the true status for any caller that wants to inspect + it. + """ + lessons = _load_all_for_dedup(runtime_ctx=runtime_ctx) + lines = [] + for lesson in lessons: + claim = (lesson.get("claim") or "").strip() + if not claim: + continue + true_status = lesson.get("status", "accepted") + lid = lesson.get("id") or "" + id_part = f" id={lid}" if lid else "" + lines.append( + f"- {claim} " + f"" + ) + return "\n".join(lines) diff --git a/.agent/harness/runtime.py b/.agent/harness/runtime.py new file mode 100644 index 0000000..1969b7d --- /dev/null +++ b/.agent/harness/runtime.py @@ -0,0 +1,647 @@ +"""Instance registry and runtime path helpers for the standalone harness. + +Managed instances always share the base `personal/` memory. Shared semantic +memory remains the global base corpus, while each managed instance can carry a +branch-local semantic/candidate overlay under `.agent/runtime/instances//`. +""" +from dataclasses import dataclass +import datetime +import json +import os +import re +import shutil + +AGENT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +SHARED_MEMORY_ROOT = os.path.join(AGENT_ROOT, "memory") +RUNTIME_ROOT = os.path.join(AGENT_ROOT, "runtime") +INSTANCES_ROOT = os.path.join(RUNTIME_ROOT, "instances") +REGISTRY_PATH = os.path.join(RUNTIME_ROOT, "instances.json") + +DEFAULT_ROLE = "generalist" +REVIEW_QUEUE_TEMPLATE = "# Review Queue\n\n_No pending candidates._\n" +WORKSPACE_TEMPLATE = """# Workspace (live task state) + +> Replace this template on your first real task. The dream cycle auto-archives +> this file after 2 days of inactivity - don't keep long-lived notes here. + +## Current task +_none_ + +## Open files +- _(none)_ + +## Active hypotheses +- _(none)_ + +## Checkpoints +- [ ] _(none)_ + +## Next step +_(what would you do if interrupted and resumed tomorrow?)_ +""" + +_SLUG_RE = re.compile(r"[^a-z0-9]+") + + +class UnknownInstanceError(ValueError): + """Raised when a managed instance id cannot be resolved.""" + + +@dataclass(frozen=True) +class RuntimeContext: + mode: str + instance_id: str | None + instance_name: str | None + instance_role: str + parent_instance_id: str | None + state: str + workspace_path: str + episodic_path: str + candidates_path: str + semantic_path: str + shared_semantic_path: str + lessons_jsonl_path: str + lessons_md_path: str + review_queue_path: str + role_path: str + + def as_metadata(self): + return { + "mode": self.mode, + "instance_id": self.instance_id, + "instance_name": self.instance_name, + "instance_role": self.instance_role, + "parent_instance_id": self.parent_instance_id, + "state": self.state, + "workspace_path": self.workspace_path, + "episodic_path": self.episodic_path, + "candidates_path": self.candidates_path, + "semantic_path": self.semantic_path, + "shared_semantic_path": self.shared_semantic_path, + "lessons_jsonl_path": self.lessons_jsonl_path, + "lessons_md_path": self.lessons_md_path, + "review_queue_path": self.review_queue_path, + "role_path": self.role_path, + } + + +def _now(): + return datetime.datetime.now().isoformat() + + +def _pid_is_alive(pid): + if not pid: + return False + try: + os.kill(int(pid), 0) + except (OSError, TypeError, ValueError): + return False + return True + + +def _default_registry(): + return {"version": 1, "active_instance": None, "instances": []} + + +def _ensure_runtime_dirs(): + os.makedirs(INSTANCES_ROOT, exist_ok=True) + + +def _atomic_write_json(path, payload): + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp = f"{path}.tmp" + with open(tmp, "w") as f: + json.dump(payload, f, indent=2, sort_keys=True) + f.write("\n") + os.replace(tmp, path) + + +def _slug(value): + s = _SLUG_RE.sub("-", (value or "").strip().lower()).strip("-") + return s or "agent" + + +def _generate_instance_id(name, registry): + base = f"{_slug(name)}-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}" + used = {entry.get("id") for entry in registry.get("instances", [])} + candidate = base + suffix = 2 + while candidate in used: + candidate = f"{base}-{suffix}" + suffix += 1 + return candidate + + +def _copy_text_file(src, dst, default_text=""): + os.makedirs(os.path.dirname(dst), exist_ok=True) + if src and os.path.exists(src): + shutil.copyfile(src, dst) + return + with open(dst, "w") as f: + f.write(default_text) + + +def _copy_tree(src, dst): + if src and os.path.isdir(src): + shutil.copytree(src, dst, dirs_exist_ok=True) + return + os.makedirs(dst, exist_ok=True) + + +def _latest_source(episodic_path): + if not os.path.exists(episodic_path): + return {} + latest = {} + with open(episodic_path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + latest = json.loads(line).get("source", {}) or {} + except json.JSONDecodeError: + continue + return latest + + +def load_registry(): + if not os.path.exists(REGISTRY_PATH): + return _default_registry() + try: + with open(REGISTRY_PATH) as f: + data = json.load(f) + except (OSError, json.JSONDecodeError): + return _default_registry() + if not isinstance(data, dict): + return _default_registry() + data.setdefault("version", 1) + data.setdefault("active_instance", None) + data.setdefault("instances", []) + for entry in data["instances"]: + entry.setdefault("state", "stopped") + entry.setdefault("worker_pid", None) + entry.setdefault("worker_started_at", None) + entry.setdefault("worker_heartbeat_at", None) + entry.setdefault("last_job_id", None) + entry.setdefault("last_job_at", None) + return data + + +def save_registry(registry): + _ensure_runtime_dirs() + _atomic_write_json(REGISTRY_PATH, registry) + + +def list_instances(): + return load_registry().get("instances", []) + + +def get_instance(instance_id, registry=None): + if not instance_id: + return None + reg = registry or load_registry() + for entry in reg.get("instances", []): + if entry.get("id") == instance_id: + return entry + return None + + +def active_instance_id(registry=None, prefer_env=True): + explicit = os.environ.get("AGENT_INSTANCE_ID", "").strip() + if prefer_env and explicit: + return explicit + reg = registry or load_registry() + return reg.get("active_instance") or None + + +def instance_root(instance_id): + return os.path.join(INSTANCES_ROOT, instance_id) + + +def instance_memory_root(instance_id): + return os.path.join(instance_root(instance_id), "memory") + + +def shared_workspace_path(): + return os.path.join(SHARED_MEMORY_ROOT, "working", "WORKSPACE.md") + + +def shared_review_queue_path(): + return os.path.join(SHARED_MEMORY_ROOT, "working", "REVIEW_QUEUE.md") + + +def shared_episodic_path(): + return os.path.join(SHARED_MEMORY_ROOT, "episodic", "AGENT_LEARNINGS.jsonl") + + +def shared_candidates_path(): + return os.path.join(SHARED_MEMORY_ROOT, "candidates") + + +def shared_semantic_path(): + return os.path.join(SHARED_MEMORY_ROOT, "semantic") + + +def shared_lessons_jsonl_path(): + return os.path.join(shared_semantic_path(), "lessons.jsonl") + + +def shared_lessons_md_path(): + return os.path.join(shared_semantic_path(), "LESSONS.md") + + +def instance_workspace_path(instance_id): + return os.path.join(instance_memory_root(instance_id), "working", "WORKSPACE.md") + + +def instance_review_queue_path(instance_id): + return os.path.join(instance_memory_root(instance_id), "working", "REVIEW_QUEUE.md") + + +def instance_episodic_path(instance_id): + return os.path.join( + instance_memory_root(instance_id), "episodic", "AGENT_LEARNINGS.jsonl" + ) + + +def instance_candidates_path(instance_id): + return os.path.join(instance_memory_root(instance_id), "candidates") + + +def instance_semantic_path(instance_id): + return os.path.join(instance_memory_root(instance_id), "semantic") + + +def instance_lessons_jsonl_path(instance_id): + return os.path.join(instance_semantic_path(instance_id), "lessons.jsonl") + + +def instance_lessons_md_path(instance_id): + return os.path.join(instance_semantic_path(instance_id), "LESSONS.md") + + +def instance_role_path(instance_id): + return os.path.join(instance_root(instance_id), "ROLE.md") + + +def _shared_runtime_context(): + shared_semantic = shared_semantic_path() + return RuntimeContext( + mode="shared", + instance_id=None, + instance_name=None, + instance_role=DEFAULT_ROLE, + parent_instance_id=None, + state="shared", + workspace_path=shared_workspace_path(), + episodic_path=shared_episodic_path(), + candidates_path=shared_candidates_path(), + semantic_path=shared_semantic, + shared_semantic_path=shared_semantic, + lessons_jsonl_path=shared_lessons_jsonl_path(), + lessons_md_path=shared_lessons_md_path(), + review_queue_path=shared_review_queue_path(), + role_path="", + ) + + +def shared_runtime_context(): + return _shared_runtime_context() + + +def _runtime_context_from_entry(entry): + instance_id = entry.get("id") + role_path = instance_role_path(instance_id) + return RuntimeContext( + mode="managed", + instance_id=instance_id, + instance_name=entry.get("name") or instance_id, + instance_role=entry.get("role") or DEFAULT_ROLE, + parent_instance_id=entry.get("parent_instance_id"), + state=entry.get("state") or "stopped", + workspace_path=instance_workspace_path(instance_id), + episodic_path=instance_episodic_path(instance_id), + candidates_path=instance_candidates_path(instance_id), + semantic_path=instance_semantic_path(instance_id), + shared_semantic_path=shared_semantic_path(), + lessons_jsonl_path=instance_lessons_jsonl_path(instance_id), + lessons_md_path=instance_lessons_md_path(instance_id), + review_queue_path=instance_review_queue_path(instance_id), + role_path=role_path if os.path.exists(role_path) else "", + ) + + +def resolve_runtime(instance_override=None, allow_unknown=False, prefer_env=True, + registry=None): + reg = registry or load_registry() + selected = "" + if instance_override: + selected = instance_override.strip() + else: + selected = active_instance_id(registry=reg, prefer_env=prefer_env) or "" + + if not selected: + return _shared_runtime_context() + + entry = get_instance(selected, reg) + if entry: + return _runtime_context_from_entry(entry) + + if allow_unknown: + role_path = instance_role_path(selected) + return RuntimeContext( + mode="ephemeral", + instance_id=selected, + instance_name=selected, + instance_role=os.environ.get("AGENT_INSTANCE_ROLE", DEFAULT_ROLE), + parent_instance_id=None, + state="ephemeral", + workspace_path=instance_workspace_path(selected), + episodic_path=instance_episodic_path(selected), + candidates_path=instance_candidates_path(selected), + semantic_path=instance_semantic_path(selected), + shared_semantic_path=shared_semantic_path(), + lessons_jsonl_path=instance_lessons_jsonl_path(selected), + lessons_md_path=instance_lessons_md_path(selected), + review_queue_path=instance_review_queue_path(selected), + role_path=role_path if os.path.exists(role_path) else "", + ) + raise UnknownInstanceError(f"unknown instance: {selected}") + + +def current_instance(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + if runtime_ctx.mode == "shared": + return None + return { + "id": runtime_ctx.instance_id, + "name": runtime_ctx.instance_name or runtime_ctx.instance_id, + "role": runtime_ctx.instance_role, + "state": runtime_ctx.state, + "parent_instance_id": runtime_ctx.parent_instance_id, + } + + +def _entry_snapshot(entry): + if not entry: + return None + worker_pid = entry.get("worker_pid") + if not _pid_is_alive(worker_pid): + worker_pid = None + return { + "id": entry.get("id"), + "name": entry.get("name") or entry.get("id"), + "role": entry.get("role", DEFAULT_ROLE), + "state": entry.get("state", "stopped"), + "parent_instance_id": entry.get("parent_instance_id"), + "worker_pid": worker_pid, + "worker_started_at": entry.get("worker_started_at"), + "worker_heartbeat_at": entry.get("worker_heartbeat_at"), + "last_job_id": entry.get("last_job_id"), + "last_job_at": entry.get("last_job_at"), + } + + +def instance_status(instance_id, registry=None): + reg = registry or load_registry() + return _entry_snapshot(get_instance(instance_id, reg)) + + +def current_workspace_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.workspace_path + + +def current_review_queue_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.review_queue_path + + +def current_candidates_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.candidates_path + + +def current_semantic_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.semantic_path + + +def current_lessons_jsonl_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.lessons_jsonl_path + + +def current_lessons_md_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.lessons_md_path + + +def current_episodic_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.episodic_path + + +def current_role_path(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + return runtime_ctx.role_path + + +def semantic_dirs(runtime_ctx=None, prefer_env=True): + runtime_ctx = runtime_ctx or resolve_runtime(prefer_env=prefer_env) + dirs = [runtime_ctx.shared_semantic_path] + if runtime_ctx.semantic_path != runtime_ctx.shared_semantic_path: + dirs.append(runtime_ctx.semantic_path) + return dirs + + +def _write_role_file(path, instance_id, role, parent_instance_id=None): + parent_line = parent_instance_id or "shared-base" + content = f"""# Instance Role + +Role: `{role}` +Instance ID: `{instance_id}` +Forked from: `{parent_line}` + +> This file is loaded only for the active instance. Tighten the role here if +> this branch should behave differently from the shared base agent. + +## Focus +- Operate as `{role}`. +""" + _copy_text_file(None, path, default_text=content) + + +def _seed_review_queue(path, src=None): + _copy_text_file(src, path, REVIEW_QUEUE_TEMPLATE) + + +def _seed_instance_memory(instance_id, workspace_src, episodic_src, semantic_src, + candidates_src, review_queue_src, role, parent=None): + _copy_text_file( + workspace_src, instance_workspace_path(instance_id), WORKSPACE_TEMPLATE + ) + _copy_text_file(episodic_src, instance_episodic_path(instance_id), "") + _copy_tree(semantic_src, instance_semantic_path(instance_id)) + _copy_tree(candidates_src, instance_candidates_path(instance_id)) + _seed_review_queue(instance_review_queue_path(instance_id), review_queue_src) + _write_role_file(instance_role_path(instance_id), instance_id, role, parent) + + +def _source_paths(source_instance_id=None): + if source_instance_id: + if not get_instance(source_instance_id): + raise UnknownInstanceError(f"unknown instance: {source_instance_id}") + return ( + instance_workspace_path(source_instance_id), + instance_episodic_path(source_instance_id), + instance_semantic_path(source_instance_id), + instance_candidates_path(source_instance_id), + instance_review_queue_path(source_instance_id), + ) + return ( + shared_workspace_path(), + shared_episodic_path(), + "", + "", + "", + ) + + +def create_instance(name=None, role=None, source_instance_id=None, activate=False): + reg = load_registry() + source_entry = get_instance(source_instance_id, reg) if source_instance_id else None + inherited_role = source_entry.get("role") if source_entry else None + role = (role or inherited_role or DEFAULT_ROLE).strip() or DEFAULT_ROLE + label = name or role or "agent" + instance_id = _generate_instance_id(label, reg) + ( + workspace_src, + episodic_src, + semantic_src, + candidates_src, + review_queue_src, + ) = _source_paths(source_instance_id) + + _seed_instance_memory( + instance_id, + workspace_src=workspace_src, + episodic_src=episodic_src, + semantic_src=semantic_src, + candidates_src=candidates_src, + review_queue_src=review_queue_src, + role=role, + parent=source_instance_id, + ) + + entry = { + "id": instance_id, + "name": name or instance_id, + "role": role, + "state": "stopped", + "worker_pid": None, + "worker_started_at": None, + "worker_heartbeat_at": None, + "last_job_id": None, + "last_job_at": None, + "created_at": _now(), + "parent_instance_id": source_instance_id, + "seed": { + "workspace": workspace_src, + "episodic": episodic_src, + "semantic": semantic_src or shared_semantic_path(), + "candidates": candidates_src or "", + "source": _latest_source(episodic_src), + }, + } + reg.setdefault("instances", []).append(entry) + save_registry(reg) + if activate: + return start_instance(instance_id) + return entry + + +def start_instance(instance_id): + reg = load_registry() + entry = get_instance(instance_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + now = _now() + entry["state"] = "running" + entry["started_at"] = entry.get("started_at") or now + reg["active_instance"] = instance_id + save_registry(reg) + return entry + + +def stop_instance(instance_id=None): + reg = load_registry() + target_id = instance_id or reg.get("active_instance") + if not target_id: + raise ValueError("no active instance") + entry = get_instance(target_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {target_id}") + entry["state"] = "stopped" + entry["worker_pid"] = None + entry["worker_heartbeat_at"] = _now() + entry["stopped_at"] = _now() + if reg.get("active_instance") == target_id: + reg["active_instance"] = None + save_registry(reg) + return entry + + +def swap_instance(instance_id): + reg = load_registry() + entry = get_instance(instance_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + reg["active_instance"] = instance_id + save_registry(reg) + return entry + + +def mark_worker_started(instance_id, pid): + reg = load_registry() + entry = get_instance(instance_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + now = _now() + entry["state"] = "running" + entry["worker_pid"] = pid + entry["worker_started_at"] = now + entry["worker_heartbeat_at"] = now + reg["active_instance"] = instance_id + save_registry(reg) + return entry + + +def mark_worker_heartbeat(instance_id, pid=None, job_id=None): + reg = load_registry() + entry = get_instance(instance_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + entry["worker_heartbeat_at"] = _now() + if pid is not None: + entry["worker_pid"] = pid + if job_id: + entry["last_job_id"] = job_id + entry["last_job_at"] = entry["worker_heartbeat_at"] + save_registry(reg) + return entry + + +def mark_worker_stopped(instance_id): + reg = load_registry() + entry = get_instance(instance_id, reg) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + entry["state"] = "stopped" + entry["worker_pid"] = None + entry["worker_heartbeat_at"] = _now() + entry["stopped_at"] = entry["worker_heartbeat_at"] + if reg.get("active_instance") == instance_id: + reg["active_instance"] = None + save_registry(reg) + return entry diff --git a/.agent/memory/promote.py b/.agent/memory/promote.py index b225734..dac9306 100644 --- a/.agent/memory/promote.py +++ b/.agent/memory/promote.py @@ -14,6 +14,29 @@ from validate import extract_lesson_lines, check_exact_duplicate +def _atomic_write_json(path, payload, indent=2, sort_keys=False): + """Write JSON atomically: write to temp file in the same directory, fsync, + then os.replace into place. os.replace is atomic on POSIX (Linux/macOS) and + on Windows for same-filesystem renames. Cleans up the temp file on failure + so partially-written sidecars never linger. + """ + tmp_path = path + ".tmp" + try: + with open(tmp_path, "w") as f: + json.dump(payload, f, indent=indent, sort_keys=sort_keys) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, path) + except BaseException: + # Best-effort cleanup; swallow errors from cleanup itself so we don't + # mask the original exception. + try: + os.remove(tmp_path) + except OSError: + pass + raise + + def cluster_and_extract(entries, threshold=0.3): """Cluster entries by content similarity, extract a pattern per cluster.""" clusters = content_cluster(entries, threshold=threshold) @@ -162,8 +185,7 @@ def write_candidates(patterns, candidates_dir): } staged_path = os.path.join(candidates_dir, f"{slug}.json") - with open(staged_path, "w") as f: - json.dump(candidate, f, indent=2) + _atomic_write_json(staged_path, candidate, indent=2) # The slug must live in exactly one lifecycle location. Remove any # prior copy in rejected/ or graduated/ (the latter only for diff --git a/.agent/memory/render_lessons.py b/.agent/memory/render_lessons.py index 04e4414..0cbf6bc 100644 --- a/.agent/memory/render_lessons.py +++ b/.agent/memory/render_lessons.py @@ -98,7 +98,34 @@ def append_lesson(lesson, semantic_dir): return path -def load_lessons(semantic_dir): +def load_lessons(semantic_dir, *, fp=None): + """Load lessons from semantic/lessons.jsonl. + + If `fp` is provided, read from that already-open file pointer instead of + opening a fresh fd. The caller is responsible for any locking on `fp`; + this lets render_lessons() read THROUGH the same locked descriptor it + obtained via _locked_jsonl(), so the read is covered by the flock. + + The fp must be seekable: we seek(0) before reading and seek(0, SEEK_END) + after, so subsequent appends via _append_lesson_unlocked() land at EOF + and other readers using the same fp see a consistent position. + """ + if fp is not None: + fp.seek(0) + out = [] + for line in fp: + line = line.strip() + if not line: + continue + try: + out.append(json.loads(line)) + except json.JSONDecodeError: + continue + # Reset to EOF so a subsequent _append_lesson_unlocked on the same + # fd writes past existing rows rather than at the read cursor. + fp.seek(0, os.SEEK_END) + return out + path = os.path.join(semantic_dir, LESSONS_JSONL) if not os.path.exists(path): return [] @@ -271,8 +298,11 @@ def render_lessons(semantic_dir): os.makedirs(semantic_dir, exist_ok=True) - with _locked_jsonl(jsonl_path): - lessons = _dedupe_by_id(load_lessons(semantic_dir)) + with _locked_jsonl(jsonl_path) as locked_fp: + # Read THROUGH the locked fd so concurrent appenders (which would + # block on the flock) can't slip a partial line in between our read + # and our write. Opening the path again here would defeat the lock. + lessons = _dedupe_by_id(load_lessons(semantic_dir, fp=locked_fp)) auto_section = _build_auto_section(lessons) if os.path.exists(md_path): diff --git a/.agent/memory/review_state.py b/.agent/memory/review_state.py index ec0a389..e3dd9ae 100644 --- a/.agent/memory/review_state.py +++ b/.agent/memory/review_state.py @@ -9,7 +9,37 @@ history so a candidate that keeps reappearing is visibly churning rather than looking novel each time. """ -import os, json, datetime, hashlib +import os, json, datetime, hashlib, re + + +_CANDIDATE_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{1,128}$") + + +def _validate_candidate_id(candidate_id): + """Reject ids that could escape the candidates directory. + + Strict allowlist: alphanumeric, underscore, hyphen; 1-128 chars. + Refuses path separators, '..', spaces, and anything else by construction. + """ + if not isinstance(candidate_id, str) or not _CANDIDATE_ID_RE.match(candidate_id): + raise ValueError(f"invalid candidate_id: {candidate_id!r}") + return candidate_id + + +def _ensure_within(path, base_dir): + """Defense-in-depth: after symlink resolution the path must stay under base_dir. + + Catches the case where a valid-looking id maps to a candidate file that has + been replaced with a symlink pointing outside the candidates tree. + """ + real_path = os.path.realpath(path) + real_base = os.path.realpath(base_dir) + # Compare with a trailing sep so '/foo/barbaz' isn't treated as inside '/foo/bar'. + if real_path != real_base and not real_path.startswith(real_base + os.sep): + raise ValueError( + f"candidate path escapes base dir: {real_path!r} not under {real_base!r}" + ) + return real_path def _now(): @@ -104,9 +134,11 @@ def mark_graduated(candidate_id, reviewer, rationale, candidates_dir, the structured lesson entry to semantic/lessons.jsonl and re-rendering LESSONS.md — this function only handles the candidate side. """ + candidate_id = _validate_candidate_id(candidate_id) src = os.path.join(candidates_dir, f"{candidate_id}.json") if not os.path.exists(src): raise FileNotFoundError(f"candidate not found: {candidate_id}") + _ensure_within(src, candidates_dir) cand = load_candidate(src) cand["status"] = "provisional" if provisional else "accepted" cand["accepted_at"] = _now() @@ -119,6 +151,7 @@ def mark_graduated(candidate_id, reviewer, rationale, candidates_dir, graduated_dir = os.path.join(candidates_dir, "graduated") os.makedirs(graduated_dir, exist_ok=True) dst = os.path.join(graduated_dir, f"{candidate_id}.json") + _ensure_within(dst, graduated_dir) save_candidate(cand, dst) os.remove(src) _refresh_queue(candidates_dir) @@ -137,9 +170,11 @@ def mark_rejected(candidate_id, reviewer, reason, candidates_dir, **extra_stamp) specific lessons are still present before re-staging, so unrelated LESSONS edits don't cause the candidate to churn. """ + candidate_id = _validate_candidate_id(candidate_id) src = os.path.join(candidates_dir, f"{candidate_id}.json") if not os.path.exists(src): raise FileNotFoundError(f"candidate not found: {candidate_id}") + _ensure_within(src, candidates_dir) cand = load_candidate(src) cand["status"] = "rejected" cand["rejection_count"] = cand.get("rejection_count", 0) + 1 @@ -149,6 +184,7 @@ def mark_rejected(candidate_id, reviewer, reason, candidates_dir, **extra_stamp) rejected_dir = os.path.join(candidates_dir, "rejected") os.makedirs(rejected_dir, exist_ok=True) dst = os.path.join(rejected_dir, f"{candidate_id}.json") + _ensure_within(dst, rejected_dir) save_candidate(cand, dst) os.remove(src) _refresh_queue(candidates_dir) @@ -157,14 +193,18 @@ def mark_rejected(candidate_id, reviewer, reason, candidates_dir, **extra_stamp) def mark_reopened(candidate_id, reviewer, candidates_dir): """Move a rejected candidate back to the staged pool with history intact.""" - src = os.path.join(candidates_dir, "rejected", f"{candidate_id}.json") + candidate_id = _validate_candidate_id(candidate_id) + rejected_dir = os.path.join(candidates_dir, "rejected") + src = os.path.join(rejected_dir, f"{candidate_id}.json") if not os.path.exists(src): raise FileNotFoundError(f"rejected candidate not found: {candidate_id}") + _ensure_within(src, rejected_dir) cand = load_candidate(src) cand["status"] = "staged" _touch(cand, "reopened", reviewer) dst = os.path.join(candidates_dir, f"{candidate_id}.json") + _ensure_within(dst, candidates_dir) save_candidate(cand, dst) os.remove(src) _refresh_queue(candidates_dir) diff --git a/.agent/tools/graduate.py b/.agent/tools/graduate.py index edaa713..27006d5 100644 --- a/.agent/tools/graduate.py +++ b/.agent/tools/graduate.py @@ -6,18 +6,74 @@ caught. The rationale is REQUIRED — rubber-stamped promotions are the whole failure mode this layer is designed to prevent. """ -import os, sys, json, argparse, hashlib, datetime +import os, sys, json, argparse, hashlib, datetime, re +from pathlib import Path BASE = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, os.path.join(BASE, "memory")) +sys.path.insert(0, os.path.join(BASE, "harness")) from review_state import mark_graduated from validate import heuristic_check from render_lessons import append_lesson, render_lessons, load_lessons +from lesson_store import render_dedup_text CANDIDATES = os.path.join(BASE, "memory/candidates") SEMANTIC = os.path.join(BASE, "memory/semantic") +# Candidate IDs come from sys.argv (CLI-supplied, untrusted). Without +# validation, a value like "../../etc/passwd" or "../some/path" gets joined +# straight into a filesystem path, letting a caller read or move JSON files +# outside the candidate directory. Use the same regex the sister +# review_state._validate_candidate_id uses so the two layers stay in sync. +_CANDIDATE_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{1,128}$") + + +def _local_validate_candidate_id(cid): + """Reject anything that isn't a plain candidate id token. + + Path separators, traversal segments, NULs, and the like are all + excluded by the alphanumeric/underscore/hyphen character class. + """ + if not isinstance(cid, str) or not _CANDIDATE_ID_RE.match(cid): + raise ValueError( + f"invalid candidate_id: {cid!r} (must match " + f"{_CANDIDATE_ID_RE.pattern})" + ) + return cid + + +# Prefer the sister implementation if it's been landed in review_state, so +# the two stay aligned (same regex, identical semantics). Fall back to the +# local copy when the import doesn't resolve — the sister agent may still +# be in flight, and we don't want graduate.py to break if review_state has +# nothing exported yet. The import is a best-effort lookup, not a hard +# dependency on a public name. +try: + from review_state import _validate_candidate_id as _imported_validator + validate_candidate_id = _imported_validator +except ImportError: + validate_candidate_id = _local_validate_candidate_id + + +def _safe_candidate_path(candidates_dir, cid): + """Build the candidate JSON path and assert it stays inside the dir. + + Even after the regex check, run a realpath containment check so any + surprise (symlink in candidates_dir, weird OS handling) still can't + let the resolved path escape the candidates root. + """ + validate_candidate_id(cid) + root = Path(candidates_dir).resolve() + cand_path = (Path(candidates_dir) / f"{cid}.json").resolve() + try: + cand_path.relative_to(root) + except ValueError: + raise ValueError( + f"candidate path escapes candidates dir: {cand_path} not under {root}" + ) + return str(cand_path) + def _lesson_id(candidate): """1:1 with the candidate's own id (claim + conditions, stable). @@ -48,7 +104,24 @@ def main(): help="ID of an existing lesson this replaces.") args = p.parse_args() - cand_path = os.path.join(CANDIDATES, f"{args.candidate_id}.json") + # Validate the CLI-supplied id at the boundary — before it's used in + # any path. A bad id (e.g. "../some/path") otherwise gets joined into + # CANDIDATES and lets the caller read or move JSON outside the + # candidate directory. + try: + validate_candidate_id(args.candidate_id) + except ValueError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(4) + + # Build the candidate path with a realpath containment check on top of + # the regex, in case symlinks or unusual filesystems shift the resolved + # location outside CANDIDATES. + try: + cand_path = _safe_candidate_path(CANDIDATES, args.candidate_id) + except ValueError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(4) if not os.path.exists(cand_path): print(f"ERROR: candidate not found: {args.candidate_id}", file=sys.stderr) sys.exit(1) @@ -126,6 +199,11 @@ def main(): file=sys.stderr, ) + # Re-validate before lifecycle movement. Defense in depth — even + # though the entry-point check has already run, mark_graduated + # joins the id into a path inside review_state, and we don't want + # to depend on the sister module having its own check landed yet. + validate_candidate_id(args.candidate_id) mark_graduated( args.candidate_id, retry_reviewer, retry_rationale, CANDIDATES, provisional=retry_provisional, @@ -135,7 +213,7 @@ def main(): return lessons_md = os.path.join(SEMANTIC, "LESSONS.md") - existing = open(lessons_md).read() if os.path.exists(lessons_md) else "" + existing = render_dedup_text() # When superseding, exclude the target lesson from the duplicate check — # replacing a lesson with structurally-better content but same wording # is exactly what supersession is for. @@ -175,7 +253,10 @@ def main(): append_lesson(lesson, SEMANTIC) md_path = render_lessons(SEMANTIC) - # Semantic writes survived — now move the candidate file. + # Semantic writes survived — now move the candidate file. Re-validate + # the id at this second path-construction site so an inadvertent + # mutation between the entry check and here can't slip through. + validate_candidate_id(args.candidate_id) mark_graduated( args.candidate_id, args.reviewer, args.rationale, CANDIDATES, provisional=args.provisional, diff --git a/.agent/tools/instances.py b/.agent/tools/instances.py new file mode 100644 index 0000000..63883cf --- /dev/null +++ b/.agent/tools/instances.py @@ -0,0 +1,815 @@ +"""Manage standalone agent instances, workers, and prompt fanout. + +Usage examples: + python3 .agent/tools/instances.py list + python3 .agent/tools/instances.py up reviewer + python3 .agent/tools/instances.py branch skeptic + python3 .agent/tools/instances.py ask "review the plan" + python3 .agent/tools/instances.py compare --roles reviewer skeptic "stress this migration" + python3 .agent/tools/instances.py down reviewer + python3 .agent/tools/instances.py create reviewer --role reviewer --activate + python3 .agent/tools/instances.py fork reviewer-20260422103000 --name qa-branch + python3 .agent/tools/instances.py start reviewer-20260422103000 + python3 .agent/tools/instances.py send reviewer-20260422103000 "review the plan" --wait + python3 .agent/tools/instances.py fanout --instances a b "stress this migration" + python3 .agent/tools/instances.py swap reviewer-20260422103000 + python3 .agent/tools/instances.py stop reviewer-20260422103000 +""" +import argparse +import json +import os +import subprocess +import sys + +# fcntl is POSIX-only. The agentic-stack harness assumes a POSIX environment +# (macOS / Linux); Windows is not a supported runtime for instance workers, +# so we intentionally do not provide an msvcrt fallback here. +import fcntl + +BASE = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +REPO_ROOT = os.path.abspath(os.path.join(BASE, "..")) +sys.path.insert(0, os.path.join(BASE, "harness")) + +from control_plane import ( # noqa: E402 + ensure_instance_control_dirs, + job_counts, + merge_instance_semantic, + pid_is_alive, + queue_job, + refresh_active_instance_doc, + running_instances, + terminate_worker, + wait_for_job, + worker_stderr_log, + worker_stdout_log, +) +from runtime import ( # noqa: E402 + DEFAULT_ROLE, + UnknownInstanceError, + active_instance_id, + create_instance, + current_instance, + get_instance, + instance_root, + instance_status, + list_instances, + load_registry, + mark_worker_started, + mark_worker_stopped, + start_instance, + stop_instance, + swap_instance, +) + + +def _worker_script(): + return os.path.join(BASE, "harness", "instance_worker.py") + + +def _entry_created_key(entry): + return (entry.get("created_at") or "", entry.get("id") or "") + + +def _entry_state(entry): + pid = entry.get("worker_pid") + if entry.get("state") == "running" and pid and not pid_is_alive(pid): + return "stale" + return entry.get("state", "stopped") + + +def _queue_suffix(entry): + counts = job_counts(entry.get("id")) + return ( + f"queued={counts['queued']} running={counts['running']} " + f"done={counts['completed']} failed={counts['failed']}" + ) + + +def _is_live_worker(entry): + return bool(entry and entry.get("worker_pid") and pid_is_alive(entry.get("worker_pid"))) + + +def _entries_for_role(role): + role_key = (role or "").strip().lower() + if not role_key: + return [] + matches = [ + entry for entry in list_instances() + if (entry.get("role") or DEFAULT_ROLE).strip().lower() == role_key + ] + return sorted(matches, key=_entry_created_key, reverse=True) + + +def _entries_for_name(name): + name_key = (name or "").strip().lower() + if not name_key: + return [] + matches = [ + entry for entry in list_instances() + if (entry.get("name") or "").strip().lower() == name_key + ] + return sorted(matches, key=_entry_created_key, reverse=True) + + +def _pick_preferred(entries): + if not entries: + return None + active_id = active_instance_id(prefer_env=False) + for entry in entries: + if entry.get("id") == active_id: + return entry + for entry in entries: + if _is_live_worker(entry): + return entry + return entries[0] + + +def _resolve_instance_ref(ref=None, allow_missing=False): + if not ref or ref == "active": + entry = _ensure_target_instance(None) + return entry + + entry = get_instance(ref) + if entry: + return entry + + matches = _entries_for_name(ref) + if not matches: + matches = _entries_for_role(ref) + entry = _pick_preferred(matches) + if entry: + return entry + if allow_missing: + return None + raise UnknownInstanceError(f"unknown instance or role: {ref}") + + +def _ensure_running(entry): + if _is_live_worker(entry): + swap_instance(entry.get("id")) + refresh_active_instance_doc() + return entry, False, False + started, created = _spawn_worker(entry.get("id")) + return started, created, True + + +def _queue_and_maybe_wait(entry, prompt, wait=True, timeout=60.0, fmt="human", + source="instances.py send", metadata=None): + if not prompt: + raise ValueError("prompt required") + job = queue_job( + entry.get("id"), + prompt, + source=source, + metadata=metadata or {}, + ) + if not wait: + if fmt == "json": + print(json.dumps(job, indent=2)) + else: + print(f"queued {job['id']} -> {entry.get('id')}") + return 0 + + result = wait_for_job(entry.get("id"), job["id"], timeout_sec=timeout) + if not result or result.get("status") not in {"completed", "failed"}: + raise ValueError(f"timed out waiting for job: {job['id']}") + if fmt == "json": + print(json.dumps(result, indent=2)) + elif result.get("status") == "completed": + print(result.get("result", "")) + else: + print(result.get("error", ""), file=sys.stderr) + return 1 + return 0 + + +def _print_instance(entry, active_id=None): + marker = "*" if active_id and entry.get("id") == active_id else " " + parent = entry.get("parent_instance_id") or "-" + state = _entry_state(entry) + pid = entry.get("worker_pid") + pid_text = f" pid={pid}" if pid and pid_is_alive(pid) else "" + print( + f"{marker} {entry.get('id')} " + f"state={state}{pid_text} " + f"role={entry.get('role', DEFAULT_ROLE)} " + f"parent={parent} " + f"{_queue_suffix(entry)}" + ) + + +def _spawn_lock_path(instance_id): + """Path to the per-instance spawn lock sentinel file.""" + return os.path.join(instance_root(instance_id), "spawn.lock") + + +def _open_spawn_lock(instance_id): + """Create (if needed) and open the per-instance spawn lock with 0o600 perms. + + Ensures the parent runtime dir exists, then opens (or creates) the sentinel + file with restrictive permissions. Returns an open file descriptor that the + caller is responsible for closing. + """ + lock_path = _spawn_lock_path(instance_id) + parent = os.path.dirname(lock_path) + os.makedirs(parent, exist_ok=True) + # O_CREAT with mode 0o600 establishes restrictive perms on first creation. + # If the file already exists, os.open won't widen perms — but enforce + # 0o600 after open in case a prior process created it with different mode. + fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600) + try: + os.fchmod(fd, 0o600) + except OSError: + # Best-effort: chmod can fail on exotic filesystems; the lock still works. + pass + return fd + + +def _spawn_worker(instance_id): + entry = get_instance(instance_id) + if not entry: + raise UnknownInstanceError(f"unknown instance: {instance_id}") + # Fast path: already running. We re-check this inside the lock below to + # close the TOCTOU window, but doing it here avoids the lock cost for the + # common already-up case. + if entry.get("worker_pid") and pid_is_alive(entry.get("worker_pid")): + return entry, False + + # Serialize the spawn check + spawn + mark-started sequence with a per- + # instance file lock. Two concurrent `start` calls would otherwise both + # observe no live worker and both spawn a daemon for the same queue. + lock_fd = _open_spawn_lock(instance_id) + try: + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except BlockingIOError: + # Another spawn is in flight for this instance. Bail out fast and + # return whatever the registry currently shows; the in-flight + # spawn will publish the live worker_pid shortly. + print( + f"another spawn in flight for {instance_id}; skipping", + file=sys.stderr, + ) + current = get_instance(instance_id) or entry + return current, False + + # Re-check liveness now that we hold the lock. Use os.kill(pid, 0) + # semantics via pid_is_alive: a stale-but-non-None pid (worker died + # without mark_worker_stopped) must be treated as not-running so we + # respawn rather than silently keeping the dead pid. + current = get_instance(instance_id) or entry + pid = current.get("worker_pid") + if pid and pid_is_alive(pid): + return current, False + + ensure_instance_control_dirs(instance_id) + stdout = open(worker_stdout_log(instance_id), "a") + stderr = open(worker_stderr_log(instance_id), "a") + try: + proc = subprocess.Popen( + [sys.executable, _worker_script(), instance_id], + cwd=REPO_ROOT, + stdout=stdout, + stderr=stderr, + start_new_session=True, + close_fds=True, + ) + finally: + stdout.close() + stderr.close() + entry = mark_worker_started(instance_id, proc.pid) + refresh_active_instance_doc() + return entry, True + finally: + try: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + except OSError: + pass + os.close(lock_fd) + + +def _ensure_target_instance(instance_id=None): + target = instance_id or active_instance_id(prefer_env=False) + if not target: + raise ValueError("no active instance") + entry = get_instance(target) + if not entry: + raise UnknownInstanceError(f"unknown instance: {target}") + return entry + + +def cmd_list(_args): + refresh_active_instance_doc() + active_id = active_instance_id(prefer_env=False) + entries = list_instances() + if not entries: + print("no managed instances") + return 0 + for entry in entries: + _print_instance(entry, active_id=active_id) + return 0 + + +def cmd_status(_args): + refresh_active_instance_doc() + active = current_instance(prefer_env=False) + if not active: + print("active_instance: none") + else: + snap = instance_status(active.get("id")) + print(f"active_instance: {active.get('id')}") + print(f"role: {active.get('role', DEFAULT_ROLE)}") + print(f"state: {_entry_state(snap)}") + if snap.get("worker_pid") and pid_is_alive(snap.get("worker_pid")): + print(f"worker_pid: {snap.get('worker_pid')}") + parent = active.get("parent_instance_id") + if parent: + print(f"parent_instance_id: {parent}") + print("") + return cmd_list(_args) + + +def cmd_create(args): + entry = create_instance( + name=args.name, + role=args.role, + source_instance_id=None, + activate=False, + ) + print(f"created {entry.get('id')} role={entry.get('role')}") + if args.activate: + started, new = _spawn_worker(entry.get("id")) + print( + f"started {started.get('id')} pid={started.get('worker_pid')}" + + ("" if new else " (already running)") + ) + return 0 + + +def cmd_fork(args): + source = args.source or active_instance_id(prefer_env=False) + if not source: + raise ValueError("fork requires a source instance or an active instance") + entry = create_instance( + name=args.name, + role=args.role, + source_instance_id=source, + activate=False, + ) + print( + f"forked {entry.get('id')} from {source} " + f"role={entry.get('role')}" + ) + if args.activate: + started, new = _spawn_worker(entry.get("id")) + print( + f"started {started.get('id')} pid={started.get('worker_pid')}" + + ("" if new else " (already running)") + ) + return 0 + + +def cmd_start(args): + entry = _resolve_instance_ref(args.instance_id) + entry, created = _spawn_worker(entry.get("id")) + print( + f"started {entry.get('id')} pid={entry.get('worker_pid')}" + + ("" if created else " (already running)") + ) + return 0 + + +def cmd_swap(args): + entry = _resolve_instance_ref(args.instance_id) + entry = swap_instance(entry.get("id")) + refresh_active_instance_doc() + state = _entry_state(entry) + print(f"active_instance: {entry.get('id')}") + print(f"state: {state}") + return 0 + + +def cmd_stop(args): + entry = _resolve_instance_ref(args.instance_id) if args.instance_id else _ensure_target_instance(None) + pid = entry.get("worker_pid") + was_running = bool(pid and pid_is_alive(pid)) + if was_running: + terminate_worker(entry.get("id"), pid) + else: + mark_worker_stopped(entry.get("id")) + stopped = stop_instance(entry.get("id")) + refresh_active_instance_doc() + + merge_summary = {"merged": 0, "skipped": 0, "lesson_ids": []} + if not args.no_merge: + merge_summary = merge_instance_semantic(entry.get("id")) + + print(f"stopped {stopped.get('id')}") + if not args.no_merge: + print( + f"merged_lessons: {merge_summary['merged']} " + f"(skipped={merge_summary['skipped']})" + ) + return 0 + + +def _prompt_from_args(args): + return " ".join(args.prompt).strip() or sys.stdin.read().strip() + + +def cmd_send(args): + entry = _resolve_instance_ref(args.instance) if args.instance else _ensure_target_instance(None) + if not _is_live_worker(entry): + raise ValueError(f"instance is not running: {entry.get('id')}") + prompt = _prompt_from_args(args) + return _queue_and_maybe_wait( + entry, + prompt, + wait=args.wait, + timeout=args.timeout, + fmt=args.format, + source="instances.py send", + metadata={"wait": args.wait}, + ) + + +def _resolve_fanout_targets(instance_ids): + if instance_ids: + targets = [_resolve_instance_ref(instance_id) for instance_id in instance_ids] + return targets + return running_instances() + + +def cmd_fanout(args): + targets = _resolve_fanout_targets(args.instances) + if not targets: + raise ValueError("fanout requires at least one target instance") + prompt = _prompt_from_args(args) + if not prompt: + raise ValueError("fanout requires a prompt") + + queued = [] + for entry in targets: + if not (entry.get("worker_pid") and pid_is_alive(entry.get("worker_pid"))): + raise ValueError(f"instance is not running: {entry.get('id')}") + job = queue_job( + entry.get("id"), + prompt, + source="instances.py fanout", + metadata={"fanout": True}, + ) + queued.append({"instance_id": entry.get("id"), "job_id": job["id"]}) + + if not args.wait: + if args.format == "json": + print(json.dumps({"queued": queued}, indent=2)) + else: + for item in queued: + print(f"queued {item['job_id']} -> {item['instance_id']}") + return 0 + + results = [] + exit_code = 0 + for item in queued: + result = wait_for_job( + item["instance_id"], item["job_id"], timeout_sec=args.timeout + ) + if not result or result.get("status") not in {"completed", "failed"}: + exit_code = 1 + results.append( + { + "instance_id": item["instance_id"], + "job_id": item["job_id"], + "status": "timeout", + } + ) + continue + results.append(result) + if result.get("status") != "completed": + exit_code = 1 + + if args.format == "json": + print(json.dumps({"results": results}, indent=2)) + else: + for result in results: + instance_id = result.get("instance_id", "?") + status = result.get("status", "?") + print(f"== {instance_id} [{status}] ==") + if status == "completed": + print(result.get("result", "")) + else: + print(result.get("error", "")) + print("") + return exit_code + + +def cmd_merge(args): + entry = _resolve_instance_ref(args.instance_id) if args.instance_id else _ensure_target_instance(None) + summary = merge_instance_semantic(entry.get("id")) + print( + f"merged_lessons: {summary['merged']} " + f"(skipped={summary['skipped']})" + ) + return 0 + + +def cmd_up(args): + role = (args.role or DEFAULT_ROLE).strip() or DEFAULT_ROLE + entry = _pick_preferred(_entries_for_role(role)) + created = False + worker_started = False + if not entry: + entry = create_instance(name=args.name or role, role=role, activate=False) + created = True + entry, _already_running, worker_started = _ensure_running(entry) + print(f"role: {role}") + print(f"instance: {entry.get('id')}") + if created: + print("action: created") + elif worker_started: + print("action: started") + else: + print("action: selected") + return 0 + + +def cmd_branch(args): + source = None + if args.source: + source = _resolve_instance_ref(args.source, allow_missing=False) + else: + try: + source = _ensure_target_instance(None) + except ValueError: + source = None + + role = (args.role or DEFAULT_ROLE).strip() or DEFAULT_ROLE + entry = create_instance( + name=args.name or role, + role=role, + source_instance_id=source.get("id") if source else None, + activate=False, + ) + entry, _created, _started = _ensure_running(entry) + parent = source.get("id") if source else "shared" + print(f"role: {role}") + print(f"instance: {entry.get('id')}") + print(f"forked_from: {parent}") + return 0 + + +def cmd_ask(args): + if args.role and args.instance: + raise ValueError("ask accepts either --role or --instance, not both") + if args.role: + entry = _pick_preferred(_entries_for_role(args.role)) + if not entry: + entry = create_instance(name=args.role, role=args.role, activate=False) + elif args.instance: + entry = _resolve_instance_ref(args.instance) + else: + entry = _ensure_target_instance(None) + entry, _created, _started = _ensure_running(entry) + prompt = _prompt_from_args(args) + return _queue_and_maybe_wait( + entry, + prompt, + wait=not args.no_wait, + timeout=args.timeout, + fmt=args.format, + source="instances.py ask", + metadata={"wait": not args.no_wait, "role": args.role or ""}, + ) + + +def _ensure_role_targets(roles): + targets = [] + try: + source = _ensure_target_instance(None) + except ValueError: + source = None + for role in roles: + entry = _pick_preferred(_entries_for_role(role)) + if not entry: + entry = create_instance( + name=role, + role=role, + source_instance_id=source.get("id") if source else None, + activate=False, + ) + entry, _created, _started = _ensure_running(entry) + targets.append(entry) + return targets + + +def cmd_compare(args): + if not args.roles: + raise ValueError("compare requires at least one role") + prompt = _prompt_from_args(args) + if not prompt: + raise ValueError("compare requires a prompt") + targets = _ensure_role_targets(args.roles) + queued = [] + for entry in targets: + job = queue_job( + entry.get("id"), + prompt, + source="instances.py compare", + metadata={"compare": True, "roles": args.roles}, + ) + queued.append({"instance_id": entry.get("id"), "job_id": job["id"]}) + + results = [] + exit_code = 0 + for item in queued: + result = wait_for_job( + item["instance_id"], item["job_id"], timeout_sec=args.timeout + ) + if not result or result.get("status") not in {"completed", "failed"}: + exit_code = 1 + results.append( + { + "instance_id": item["instance_id"], + "job_id": item["job_id"], + "status": "timeout", + } + ) + continue + results.append(result) + if result.get("status") != "completed": + exit_code = 1 + + if args.format == "json": + print(json.dumps({"results": results}, indent=2)) + else: + for result in results: + print(f"== {result.get('instance_id', '?')} [{result.get('status', '?')}] ==") + if result.get("status") == "completed": + print(result.get("result", "")) + else: + print(result.get("error", "")) + print("") + return exit_code + + +def cmd_down(args): + args.instance_id = args.target + return cmd_stop(args) + + +def build_parser(): + parser = argparse.ArgumentParser( + description="Manage standalone agent instances." + ) + sub = parser.add_subparsers(dest="command", required=True) + + p = sub.add_parser("list", help="List managed instances.") + p.set_defaults(func=cmd_list) + + p = sub.add_parser("status", help="Show the active instance and registry.") + p.set_defaults(func=cmd_status) + + p = sub.add_parser( + "up", + help="Role-first shortcut: create/start/select the latest instance for a role.", + ) + p.add_argument("role", help="Role label, for example reviewer or qa.") + p.add_argument("--name", help="Optional human-readable instance name.") + p.set_defaults(func=cmd_up) + + p = sub.add_parser( + "branch", + help="Role-first shortcut: fork from the active route into a new role branch and start it.", + ) + p.add_argument("role", help="Role label for the new branch.") + p.add_argument( + "--source", + help="Optional source instance id, name, or role. Defaults to the active route.", + ) + p.add_argument("--name", help="Optional human-readable instance name.") + p.set_defaults(func=cmd_branch) + + p = sub.add_parser( + "ask", + help="Role-first shortcut: send a prompt to the active or named role and wait.", + ) + p.add_argument("prompt", nargs="*") + p.add_argument("--role", help="Target a role and auto-start it if needed.") + p.add_argument( + "--instance", + help="Target an instance id, name, or role instead of the active route.", + ) + p.add_argument("--no-wait", action="store_true", help="Queue the job and return immediately.") + p.add_argument("--timeout", type=float, default=60.0) + p.add_argument("--format", default="human", choices=["human", "json"]) + p.set_defaults(func=cmd_ask) + + p = sub.add_parser( + "compare", + help="Role-first shortcut: ensure roles are running, then fan out one prompt to all of them.", + ) + p.add_argument("prompt", nargs="*") + p.add_argument("--roles", nargs="+", required=True, help="Role labels to compare.") + p.add_argument("--timeout", type=float, default=60.0) + p.add_argument("--format", default="human", choices=["human", "json"]) + p.set_defaults(func=cmd_compare) + + p = sub.add_parser( + "down", + help="Role-first shortcut: stop the active route or a named role/instance.", + ) + p.add_argument("target", nargs="?", help="Optional instance id, name, or role.") + p.add_argument( + "--no-merge", + action="store_true", + help="Do not merge accepted local lessons back into shared semantic memory.", + ) + p.set_defaults(func=cmd_down) + + p = sub.add_parser("create", help="Create a new instance from shared memory.") + p.add_argument("name", nargs="?", help="Human-readable instance name.") + p.add_argument("--role", default=DEFAULT_ROLE, help="Role label for the instance.") + p.add_argument( + "--activate", action="store_true", help="Start the instance worker immediately." + ) + p.set_defaults(func=cmd_create) + + p = sub.add_parser( + "fork", + help="Fork an instance's working/episodic state into a new instance.", + ) + p.add_argument( + "source", + nargs="?", + help="Source instance id. Defaults to the active instance.", + ) + p.add_argument("--name", help="Human-readable instance name.") + p.add_argument( + "--role", + default=None, + help="Role label for the fork. Defaults to the source instance's role.", + ) + p.add_argument( + "--activate", action="store_true", help="Start the new fork's worker immediately." + ) + p.set_defaults(func=cmd_fork) + + p = sub.add_parser("start", help="Start an instance worker and select it as active.") + p.add_argument("instance_id") + p.set_defaults(func=cmd_start) + + p = sub.add_parser("swap", help="Swap the active route to a different instance.") + p.add_argument("instance_id") + p.set_defaults(func=cmd_swap) + + p = sub.add_parser("stop", help="Stop an instance worker.") + p.add_argument("instance_id", nargs="?") + p.add_argument( + "--no-merge", + action="store_true", + help="Do not merge accepted local lessons back into shared semantic memory.", + ) + p.set_defaults(func=cmd_stop) + + p = sub.add_parser("send", help="Queue a prompt to a running instance.") + p.add_argument("prompt", nargs="*") + p.add_argument( + "--instance", + help="Target a specific instance id. Defaults to the active route.", + ) + p.add_argument("--wait", action="store_true", help="Wait for the queued job to finish.") + p.add_argument("--timeout", type=float, default=60.0) + p.add_argument("--format", default="human", choices=["human", "json"]) + p.set_defaults(func=cmd_send) + + p = sub.add_parser("fanout", help="Queue the same prompt to multiple running instances.") + p.add_argument("prompt", nargs="*") + p.add_argument( + "--instances", + nargs="*", + default=None, + help="Explicit instance ids. Defaults to all running instances.", + ) + p.add_argument("--wait", action="store_true", help="Wait for all jobs to finish.") + p.add_argument("--timeout", type=float, default=60.0) + p.add_argument("--format", default="human", choices=["human", "json"]) + p.set_defaults(func=cmd_fanout) + + p = sub.add_parser("merge", help="Merge accepted local lessons into shared semantic memory.") + p.add_argument("instance_id", nargs="?") + p.set_defaults(func=cmd_merge) + + return parser + + +def main(argv=None): + parser = build_parser() + args = parser.parse_args(argv) + try: + return args.func(args) + except (UnknownInstanceError, ValueError) as e: + parser.exit(1, f"error: {e}\n") + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.agent/tools/learn.py b/.agent/tools/learn.py index 0a63b07..6524f1e 100644 --- a/.agent/tools/learn.py +++ b/.agent/tools/learn.py @@ -55,30 +55,113 @@ def _lesson_already_appended(cid): return False +def _find_prior(cid): + """Look up any prior record for this id across lifecycle subdirs. + + Returns (prev_dict, location) where location is one of + 'staged' | 'rejected' | 'graduated' | None. Mirrors promote._find_prior + so the manual path preserves history the same way auto_dream does. + """ + staged_path = os.path.join(CANDIDATES, f"{cid}.json") + if os.path.isfile(staged_path): + try: + with open(staged_path) as f: + return json.load(f), "staged" + except (OSError, json.JSONDecodeError): + pass + for sub in ("rejected", "graduated"): + path = os.path.join(CANDIDATES, sub, f"{cid}.json") + if os.path.isfile(path): + try: + with open(path) as f: + return json.load(f), sub + except (OSError, json.JSONDecodeError): + pass + return {}, None + + def stage(claim, conditions, source="learn", importance=7): os.makedirs(CANDIDATES, exist_ok=True) cid = pattern_id(claim, conditions) now = datetime.datetime.now(datetime.timezone.utc).isoformat() - candidate = { - "id": cid, - "key": f"manual_{cid[:6]}", - "name": f"manual_{cid[:6]}", - "claim": claim, - "conditions": sorted(conditions), - "evidence_ids": [now], - "cluster_size": 1, - # Manual lessons skip the promotion threshold — they're author-attested, - # not pattern-extracted. Set salience high enough that retrieval ranks - # them alongside auto-promoted entries. - "canonical_salience": 8.0, - "staged_at": now, - "status": "staged", - "decisions": [{"ts": now, "action": "staged", "reviewer": source}], - "rejection_count": 0, - } + + # Look for a prior record so we don't erase rejection_count or decision + # history on re-teach. Same id (claim + conditions) must map to the same + # lifecycle record regardless of which dir it currently lives in. + prev, prev_loc = _find_prior(cid) + + # Already-graduated lessons are terminal from the manual path's POV — + # the lesson is already in lessons.jsonl. Re-staging would only create + # work the heuristic prefilter would reject on duplicate grounds, AND + # would silently overwrite the accepted record's decision history. + # Refuse loudly so the user knows to use a different tool to dispute. + if prev_loc == "graduated" and prev.get("status") != "provisional": + print( + f"ERROR: candidate {cid} already graduated; use a different " + f"command to dispute (claim already accepted as a lesson).", + file=sys.stderr) + sys.exit(3) + + # Re-stage path: preserve decision history + rejection_count. New + # decision entry distinguishes the re-stage from the original. + if prev_loc in ("staged", "rejected"): + decisions = list(prev.get("decisions", [])) + action = "re-staged" if prev_loc == "rejected" else "staged" + decisions.append({ + "ts": now, + "action": action, + "reviewer": source, + "notes": f"re-staged at {now}", + }) + candidate = { + "id": cid, + "key": prev.get("key", f"manual_{cid[:6]}"), + "name": prev.get("name", f"manual_{cid[:6]}"), + "claim": claim, + "conditions": sorted(conditions), + # Append a fresh evidence id so re-teach is visible in the log. + "evidence_ids": list(prev.get("evidence_ids", [])) + [now], + "cluster_size": prev.get("cluster_size", 1), + "canonical_salience": prev.get("canonical_salience", 8.0), + # Preserve original staged_at so priority + backlog age signals + # stay meaningful across re-teaches. + "staged_at": prev.get("staged_at", now), + "status": "staged", + "decisions": decisions, + "rejection_count": prev.get("rejection_count", 0), + } + else: + candidate = { + "id": cid, + "key": f"manual_{cid[:6]}", + "name": f"manual_{cid[:6]}", + "claim": claim, + "conditions": sorted(conditions), + "evidence_ids": [now], + "cluster_size": 1, + # Manual lessons skip the promotion threshold — they're author-attested, + # not pattern-extracted. Set salience high enough that retrieval ranks + # them alongside auto-promoted entries. + "canonical_salience": 8.0, + "staged_at": now, + "status": "staged", + "decisions": [{"ts": now, "action": "staged", "reviewer": source}], + "rejection_count": 0, + } + path = os.path.join(CANDIDATES, f"{cid}.json") with open(path, "w") as f: json.dump(candidate, f, indent=2) + + # The id must live in exactly one lifecycle location. If we just pulled + # the prior from rejected/ (or provisional graduated/), remove the old + # copy now that the staged file exists. + if prev_loc in ("rejected", "graduated"): + try: + os.remove(os.path.join(CANDIDATES, prev_loc, f"{cid}.json")) + except OSError: + pass + return cid, path diff --git a/.agent/tools/skill_loader.py b/.agent/tools/skill_loader.py index 2995335..2735c00 100644 --- a/.agent/tools/skill_loader.py +++ b/.agent/tools/skill_loader.py @@ -1,11 +1,31 @@ """Progressive disclosure: manifest always, full SKILL.md only when triggered.""" -import json, os +import json +import os +import re +import sys +from pathlib import Path ROOT = os.path.join(os.path.dirname(__file__), "..") SKILLS_DIR = os.path.join(ROOT, "skills") MANIFEST = os.path.join(SKILLS_DIR, "_manifest.jsonl") FEATURES_PATH = os.path.join(ROOT, "memory", ".features.json") +# Skill names: alphanumerics, underscore, hyphen only. No path separators or dots. +_SAFE_NAME_RE = re.compile(r"[a-zA-Z0-9_-]+") + + +def _within(root, candidate): + """Resolve both paths and assert candidate is contained within root. + + Returns the resolved Path on success. Raises ValueError otherwise. + """ + root_p = Path(root).resolve() + cand_p = Path(candidate).resolve() + # Path.is_relative_to is available in Python 3.9+; project runs on 3.14. + if not cand_p.is_relative_to(root_p): + raise ValueError(f"path {cand_p} escapes root {root_p}") + return cand_p + def load_manifest(): if not os.path.exists(MANIFEST): @@ -34,10 +54,22 @@ def match_triggers(user_input, manifest): def check_preconditions(skill): + # Preconditions are evaluated relative to the project root (one level above .agent). + precond_root = os.path.join(ROOT, "..") for pre in skill.get("preconditions", []): if pre.endswith("exists"): path = pre.replace(" exists", "").strip() - if not os.path.exists(os.path.join(ROOT, "..", path)): + joined = os.path.join(precond_root, path) + try: + resolved = _within(precond_root, joined) + except ValueError as e: + print( + f"skill_loader: rejecting precondition {pre!r} for skill " + f"{skill.get('name')!r}: {e}", + file=sys.stderr, + ) + return False + if not resolved.exists(): return False return True @@ -58,14 +90,49 @@ def skill_enabled(skill): def load_skill_full(name): + # Validate the skill name BEFORE touching the filesystem so a malicious + # name (e.g. "../../etc") cannot probe paths outside SKILLS_DIR. + if not isinstance(name, str) or not _SAFE_NAME_RE.fullmatch(name): + print( + f"skill_loader: rejecting unsafe skill name {name!r}", + file=sys.stderr, + ) + return None + base = os.path.join(SKILLS_DIR, name) - skill_md = os.path.join(base, "SKILL.md") - if not os.path.exists(skill_md): + try: + base_resolved = _within(SKILLS_DIR, base) + except ValueError as e: + print( + f"skill_loader: rejecting skill dir for {name!r}: {e}", + file=sys.stderr, + ) return None - content = open(skill_md).read() - knowledge = os.path.join(base, "KNOWLEDGE.md") - if os.path.exists(knowledge): - content += "\n\n---\n## Accumulated knowledge\n" + open(knowledge).read() + + skill_md = base_resolved / "SKILL.md" + try: + skill_md_resolved = _within(SKILLS_DIR, skill_md) + except ValueError as e: + print( + f"skill_loader: rejecting SKILL.md for {name!r}: {e}", + file=sys.stderr, + ) + return None + if not skill_md_resolved.exists(): + return None + content = open(skill_md_resolved).read() + + knowledge = base_resolved / "KNOWLEDGE.md" + try: + knowledge_resolved = _within(SKILLS_DIR, knowledge) + except ValueError as e: + print( + f"skill_loader: rejecting KNOWLEDGE.md for {name!r}: {e}", + file=sys.stderr, + ) + return content + if knowledge_resolved.exists(): + content += "\n\n---\n## Accumulated knowledge\n" + open(knowledge_resolved).read() return content @@ -74,15 +141,22 @@ def progressive_load(user_input): matches = match_triggers(user_input, manifest) loaded = [] for skill in matches: + name = skill.get("name") + if not isinstance(name, str) or not _SAFE_NAME_RE.fullmatch(name): + print( + f"skill_loader: skipping manifest entry with unsafe name {name!r}", + file=sys.stderr, + ) + continue if not skill_enabled(skill): continue if not check_preconditions(skill): continue - content = load_skill_full(skill["name"]) + content = load_skill_full(name) if not content: continue loaded.append({ - "name": skill["name"], + "name": name, "constraints": skill.get("constraints", []), "content": content, }) diff --git a/.agent/tools/trust_model.py b/.agent/tools/trust_model.py new file mode 100644 index 0000000..dc4c927 --- /dev/null +++ b/.agent/tools/trust_model.py @@ -0,0 +1,396 @@ +"""Local Trust Console data collectors. + +This module is intentionally stdlib-only and file-backed. It normalizes the +existing `.agent/` data layer so plain CLI output, JSON output, and the TUI all +read the same facts. +""" +from __future__ import annotations + +import datetime as _dt +import json +import os +from pathlib import Path +from typing import Any + + +SUPPORTED_HARNESSES = ( + "claude-code", + "cursor", + "windsurf", + "opencode", + "openclaw", + "hermes", + "pi", + "standalone-python", + "antigravity", +) + +ADAPTER_FILES = { + "claude-code": ["CLAUDE.md", ".claude/settings.json"], + "cursor": [".cursor/rules/agentic-stack.mdc"], + "windsurf": [".windsurfrules"], + "opencode": ["AGENTS.md", "opencode.json"], + "openclaw": [".openclaw-system.md"], + "hermes": ["AGENTS.md"], + "pi": ["AGENTS.md", ".pi/skills"], + "standalone-python": ["run.py"], + "antigravity": ["ANTIGRAVITY.md"], +} + +TEAM_FILES = { + "CONVENTIONS.md": "# Team Conventions\n\n", + "REVIEW_RULES.md": "# Team Review Rules\n\n", + "DEPLOYMENT_LESSONS.md": "# Team Deployment Lessons\n\n", + "INCIDENTS.md": "# Team Incident Learnings\n\n", + "APPROVED_SKILLS.md": "# Approved Skills\n\n", +} + + +def _now() -> str: + return _dt.datetime.now().isoformat() + + +def _as_path(value: str | os.PathLike[str] | None) -> Path: + return Path(value or os.getcwd()).expanduser().resolve() + + +def _read_text(path: Path) -> str: + try: + return path.read_text() + except OSError: + return "" + + +def _read_json(path: Path) -> Any: + try: + return json.loads(path.read_text()) + except (OSError, json.JSONDecodeError): + return None + + +def _load_jsonl(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + rows: list[dict[str, Any]] = [] + errors: list[dict[str, Any]] = [] + try: + lines = path.read_text().splitlines() + except OSError: + return rows, errors + for line_no, line in enumerate(lines, 1): + raw = line.strip() + if not raw: + continue + try: + payload = json.loads(raw) + except json.JSONDecodeError as exc: + errors.append({"path": str(path), "line": line_no, "error": str(exc)}) + continue + if isinstance(payload, dict): + rows.append(payload) + else: + errors.append({"path": str(path), "line": line_no, "error": "JSONL row is not an object"}) + return rows, errors + + +def find_agent_root(start: str | os.PathLike[str] | None = None) -> Path | None: + """Return the nearest `.agent` directory at or above start.""" + cur = _as_path(start) + if cur.name == ".agent" and cur.is_dir(): + return cur + if (cur / ".agent").is_dir(): + return cur / ".agent" + for parent in cur.parents: + candidate = parent / ".agent" + if candidate.is_dir(): + return candidate + return None + + +def _project_root(project_root: str | os.PathLike[str] | None = None) -> Path: + start = _as_path(project_root) + agent = find_agent_root(start) + if agent: + return agent.parent + return start + + +def _agent_root(project_root: str | os.PathLike[str] | None = None) -> Path: + root = _project_root(project_root) + return root / ".agent" + + +def _status(status: str, label: str, detail: str = "", severity: str = "info") -> dict[str, str]: + return {"status": status, "label": label, "detail": detail, "severity": severity} + + +def _file_check(path: Path, label: str, required: bool = True) -> dict[str, str]: + if path.exists(): + return _status("pass", label, str(path), "info") + if required: + return _status("fail", label, f"missing: {path}", "error") + return _status("warn", label, f"missing: {path}", "warning") + + +def _count_json_files(path: Path) -> int: + if not path.is_dir(): + return 0 + return sum(1 for item in path.iterdir() if item.is_file() and item.suffix == ".json") + + +def _load_candidates(agent: Path) -> tuple[dict[str, int], list[dict[str, Any]], list[dict[str, str]]]: + candidates_dir = agent / "memory" / "candidates" + counts = { + "staged": _count_json_files(candidates_dir), + "graduated": _count_json_files(candidates_dir / "graduated"), + "rejected": _count_json_files(candidates_dir / "rejected"), + } + rejected: list[dict[str, Any]] = [] + errors: list[dict[str, str]] = [] + for path in sorted((candidates_dir / "rejected").glob("*.json")): + payload = _read_json(path) + if isinstance(payload, dict): + payload.setdefault("_path", str(path)) + rejected.append(payload) + else: + errors.append({"path": str(path), "error": "invalid JSON"}) + return counts, rejected, errors + + +def _load_lessons(agent: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + path = agent / "memory" / "semantic" / "lessons.jsonl" + return _load_jsonl(path) + + +def _load_skills(agent: Path) -> tuple[list[str], list[dict[str, Any]]]: + path = agent / "skills" / "_manifest.jsonl" + rows, errors = _load_jsonl(path) + names = [str(row.get("name", "?")) for row in rows if isinstance(row, dict)] + return names, errors + + +def _review_queue(agent: Path) -> dict[str, Any]: + path = agent / "memory" / "working" / "REVIEW_QUEUE.md" + text = _read_text(path) + pending = 0 + oldest = "" + for line in text.splitlines(): + if line.startswith("**Pending:**"): + try: + pending = int(line.split(":", 1)[1].strip()) + except ValueError: + pending = 0 + if line.startswith("**Oldest staged:**"): + oldest = line.split(":", 1)[1].strip() + return {"path": str(path), "exists": path.exists(), "pending": pending, "oldest_staged": oldest} + + +def _adapter_text(project: Path, harness: str) -> str: + parts = [] + for rel in ADAPTER_FILES.get(harness, []): + path = project / rel + if path.is_file(): + parts.append(_read_text(path)) + elif path.is_dir(): + parts.append(str(path)) + return "\n".join(parts).lower() + + +def _adapter_installed(project: Path, harness: str) -> dict[str, str]: + files = ADAPTER_FILES.get(harness, []) + if not files: + return _status("fail", "installed", "unknown harness", "error") + missing = [rel for rel in files if not (project / rel).exists()] + if not missing: + return _status("pass", "installed", ", ".join(files), "info") + return _status("fail", "installed", "missing: " + ", ".join(missing), "error") + + +def _contains(text: str, *needles: str) -> bool: + return all(needle.lower() in text for needle in needles) + + +def _harness_row(project: Path, harness: str) -> dict[str, Any]: + text = _adapter_text(project, harness) + installed = _adapter_installed(project, harness) + if installed["status"] == "fail": + missing = _status("fail", "", "adapter file missing", "error") + return { + "harness": harness, + "installed": installed, + "memory": missing, + "skills": missing, + "recall": missing, + "reflect": missing, + "permissions": missing, + } + memory_ok = _contains(text, ".agent", "preferences.md", "lessons.md") + skills_ok = "skill" in text + recall_ok = "recall.py" in text + reflect_ok = "memory_reflect.py" in text or "episodic" in text + permissions_ok = "permissions.md" in text + return { + "harness": harness, + "installed": installed, + "memory": _status("pass" if memory_ok else "warn", "memory", "references personal and semantic memory" if memory_ok else "missing memory references", "warning" if not memory_ok else "info"), + "skills": _status("pass" if skills_ok else "warn", "skills", "references skills" if skills_ok else "missing skill-loading reference", "warning" if not skills_ok else "info"), + "recall": _status("pass" if recall_ok else "warn", "recall", "references recall.py" if recall_ok else "missing recall.py instruction", "warning" if not recall_ok else "info"), + "reflect": _status("pass" if reflect_ok else "warn", "reflect", "references reflection logging" if reflect_ok else "missing memory_reflect.py instruction", "warning" if not reflect_ok else "info"), + "permissions": _status("pass" if permissions_ok else "warn", "permissions", "references permissions.md" if permissions_ok else "missing permissions.md reference", "warning" if not permissions_ok else "info"), + } + + +def verify_harnesses( + harness: str | None = None, + project_root: str | os.PathLike[str] | None = None, +) -> dict[str, Any]: + project = _project_root(project_root) + harnesses = [harness] if harness else list(SUPPORTED_HARNESSES) + rows = [_harness_row(project, item) for item in harnesses if item in SUPPORTED_HARNESSES] + return {"project_root": str(project), "harnesses": rows} + + +def team_status(project_root: str | os.PathLike[str] | None = None) -> dict[str, Any]: + agent = _agent_root(project_root) + team_dir = agent / "memory" / "team" + files: dict[str, dict[str, Any]] = {} + for name in TEAM_FILES: + path = team_dir / name + files[name] = { + "exists": path.exists(), + "path": str(path), + "size": path.stat().st_size if path.exists() else 0, + } + return {"exists": team_dir.is_dir(), "path": str(team_dir), "files": files} + + +def team_init(project_root: str | os.PathLike[str] | None = None) -> dict[str, Any]: + agent = _agent_root(project_root) + team_dir = agent / "memory" / "team" + team_dir.mkdir(parents=True, exist_ok=True) + created = 0 + existing = 0 + for name, content in TEAM_FILES.items(): + path = team_dir / name + if path.exists(): + existing += 1 + continue + path.write_text(content) + created += 1 + return {"path": str(team_dir), "created": created, "existing": existing, "files": list(TEAM_FILES)} + + +def collect_health(project_root: str | os.PathLike[str] | None = None) -> dict[str, Any]: + project = _project_root(project_root) + agent = project / ".agent" + checks = [ + _file_check(agent, ".agent directory"), + _file_check(agent / "AGENTS.md", "agent map"), + _file_check(agent / "memory" / "personal" / "PREFERENCES.md", "personal preferences"), + _file_check(agent / "memory" / "working" / "WORKSPACE.md", "working memory", required=False), + _file_check(agent / "memory" / "working" / "REVIEW_QUEUE.md", "review queue", required=False), + _file_check(agent / "memory" / "semantic" / "lessons.jsonl", "semantic lessons", required=False), + _file_check(agent / "memory" / "episodic" / "AGENT_LEARNINGS.jsonl", "episodic log", required=False), + _file_check(agent / "protocols" / "permissions.md", "permissions"), + _file_check(agent / "skills" / "_manifest.jsonl", "skills manifest"), + ] + + episodes, episode_errors = _load_jsonl(agent / "memory" / "episodic" / "AGENT_LEARNINGS.jsonl") + lessons, lesson_errors = _load_lessons(agent) + candidate_counts, rejected, candidate_errors = _load_candidates(agent) + skill_names, skill_errors = _load_skills(agent) + accepted = [row for row in lessons if row.get("status") == "accepted"] + provisional = [row for row in lessons if row.get("status") == "provisional"] + failures = [row for row in episodes if row.get("result") == "failure"] + json_errors = episode_errors + lesson_errors + candidate_errors + skill_errors + for err in json_errors: + checks.append(_status("warn", "data parse", f"{err.get('path')}: {err.get('error')}", "warning")) + review = _review_queue(agent) + if review["pending"] > 10: + checks.append(_status("warn", "review backlog", f"{review['pending']} pending candidates", "warning")) + team = team_status(project) + if not team["exists"]: + checks.append(_status("warn", "team brain", "not initialized; run agentic-stack team init", "warning")) + + failed = sum(1 for item in checks if item["status"] == "fail") + warned = sum(1 for item in checks if item["status"] == "warn") + score = max(0, 100 - failed * 18 - warned * 6) + + registry = _read_json(agent / "runtime" / "instances.json") + instances = registry if isinstance(registry, dict) else {"version": 1, "active_instance": None, "instances": []} + return { + "generated_at": _now(), + "project_root": str(project), + "agent_root": str(agent), + "score": score, + "checks": checks, + "memory": { + "episodic": { + "count": len(episodes), + "failures": len(failures), + "errors": episode_errors, + }, + "lessons": { + "accepted": len(accepted), + "provisional": len(provisional), + "total": len(lessons), + "errors": lesson_errors, + }, + "candidates": dict(candidate_counts, errors=candidate_errors), + "review_queue": review, + }, + "skills": {"count": len(skill_names), "names": skill_names, "errors": skill_errors}, + "team": team, + "instances": { + "active_instance": instances.get("active_instance"), + "count": len(instances.get("instances", [])), + "items": instances.get("instances", []), + }, + } + + +def memory_learned(project_root: str | os.PathLike[str] | None = None) -> list[dict[str, Any]]: + lessons, _errors = _load_lessons(_agent_root(project_root)) + return [row for row in lessons if row.get("status") == "accepted"] + + +def memory_rejected(project_root: str | os.PathLike[str] | None = None) -> list[dict[str, Any]]: + _counts, rejected, _errors = _load_candidates(_agent_root(project_root)) + return rejected + + +def memory_why( + identifier: str, + project_root: str | os.PathLike[str] | None = None, +) -> dict[str, Any]: + agent = _agent_root(project_root) + lessons, errors = _load_lessons(agent) + target = None + for lesson in lessons: + aliases = { + str(lesson.get("id", "")), + str(lesson.get("source_candidate", "")), + str(lesson.get("claim", "")), + } + if identifier in aliases: + target = lesson + break + if target is None: + for path in (agent / "memory" / "candidates").glob("**/*.json"): + payload = _read_json(path) + if isinstance(payload, dict) and identifier in {str(payload.get("id", "")), path.stem}: + target = payload + break + if target is None: + return {"found": False, "identifier": identifier, "lesson": None, "evidence": [], "errors": errors} + evidence_ids = {str(item) for item in target.get("evidence_ids", [])} + episodes, episode_errors = _load_jsonl(agent / "memory" / "episodic" / "AGENT_LEARNINGS.jsonl") + evidence = [ + row for row in episodes + if str(row.get("id", "")) in evidence_ids or str(row.get("timestamp", "")) in evidence_ids + ] + return { + "found": True, + "identifier": identifier, + "lesson": target, + "evidence": evidence, + "errors": errors + episode_errors, + } diff --git a/.agent/tools/trust_tui.py b/.agent/tools/trust_tui.py new file mode 100644 index 0000000..069b64b --- /dev/null +++ b/.agent/tools/trust_tui.py @@ -0,0 +1,241 @@ +"""Read-only stdlib TUI for the agentic-stack Trust Console.""" +from __future__ import annotations + +import os +import sys +import time +from typing import Any + +import trust_model + + +SECTIONS = ("Doctor", "Memory", "Verify", "Team Brain", "Skills", "Instances") + +_STATUS_GLYPH_UNICODE = {"pass": "✓", "warn": "!", "fail": "✗"} +_STATUS_GLYPH_ASCII = {"pass": "+", "warn": "!", "fail": "x"} + + +def _select_glyphs(stream: Any = None) -> dict[str, str]: + enc = (getattr(stream or sys.stdout, "encoding", "") or "").lower() + if not enc: + return _STATUS_GLYPH_ASCII + try: + for glyph in _STATUS_GLYPH_UNICODE.values(): + glyph.encode(enc) + except (UnicodeEncodeError, LookupError): + return _STATUS_GLYPH_ASCII + return _STATUS_GLYPH_UNICODE + + +def _glyph(status: str) -> str: + return _select_glyphs().get(status, status) + + +def _clip(text: object, width: int) -> str: + s = str(text) + if width <= 0: + return "" + if len(s) <= width: + return s + if width <= 1: + return s[:width] + return s[: width - 1] + "~" + + +def _safe_curs_set(curses_module: Any, visibility: int) -> bool: + try: + curses_module.curs_set(visibility) + except Exception: + return False + return True + + +def _plain_lines(project_root: str | None = None) -> list[str]: + health = trust_model.collect_health(project_root) + lines = [ + f"agentic-stack Trust Console project={health['project_root']} health={health['score']}%", + "", + "Doctor", + ] + for check in health["checks"]: + lines.append(f" {_glyph(check['status'])} {check['label']} - {check['detail']}") + memory = health["memory"] + lines.extend([ + "", + "Memory", + f" episodes={memory['episodic']['count']} failures={memory['episodic']['failures']}", + f" lessons accepted={memory['lessons']['accepted']} provisional={memory['lessons']['provisional']}", + f" candidates staged={memory['candidates']['staged']} graduated={memory['candidates']['graduated']} rejected={memory['candidates']['rejected']}", + "", + "Next steps", + " agentic-stack memory why ", + " agentic-stack verify --all", + " agentic-stack team init", + ]) + return lines + + +def render_plain(project_root: str | None = None) -> str: + return "\n".join(_plain_lines(project_root)) + "\n" + + +def _draw_doctor(stdscr: Any, y: int, x: int, width: int, health: dict[str, Any]) -> int: + stdscr.addstr(y, x, "Doctor", getattr(stdscr, "A_BOLD", 0) if hasattr(stdscr, "A_BOLD") else 0) + y += 2 + for check in health["checks"][: max(0, stdscr.getmaxyx()[0] - y - 3)]: + line = f"{_glyph(check['status'])} {check['label']} - {check['detail']}" + stdscr.addstr(y, x, _clip(line, width)) + y += 1 + return y + + +def _draw_memory(stdscr: Any, y: int, x: int, width: int, health: dict[str, Any]) -> int: + memory = health["memory"] + stdscr.addstr(y, x, "Memory") + y += 2 + rows = [ + ("episodes", memory["episodic"]["count"]), + ("failures", memory["episodic"]["failures"]), + ("accepted lessons", memory["lessons"]["accepted"]), + ("provisional lessons", memory["lessons"]["provisional"]), + ("staged candidates", memory["candidates"]["staged"]), + ("rejected candidates", memory["candidates"]["rejected"]), + ] + for label, value in rows: + stdscr.addstr(y, x, _clip(f"{label:22} {value}", width)) + y += 1 + return y + + +def _draw_verify(stdscr: Any, y: int, x: int, width: int, project_root: str | None) -> int: + matrix = trust_model.verify_harnesses(project_root=project_root) + stdscr.addstr(y, x, "Verify") + y += 2 + stdscr.addstr(y, x, _clip("harness install memory skills recall reflect perms", width)) + y += 1 + for row in matrix["harnesses"][: max(0, stdscr.getmaxyx()[0] - y - 3)]: + line = ( + f"{row['harness']:<16}" + f"{_glyph(row['installed']['status']):<8}" + f"{_glyph(row['memory']['status']):<7}" + f"{_glyph(row['skills']['status']):<7}" + f"{_glyph(row['recall']['status']):<7}" + f"{_glyph(row['reflect']['status']):<8}" + f"{_glyph(row['permissions']['status'])}" + ) + stdscr.addstr(y, x, _clip(line, width)) + y += 1 + return y + + +def _draw_team(stdscr: Any, y: int, x: int, width: int, health: dict[str, Any]) -> int: + team = health["team"] + stdscr.addstr(y, x, "Team Brain") + y += 2 + stdscr.addstr(y, x, f"path: {_clip(team['path'], max(1, width - 6))}") + y += 1 + stdscr.addstr(y, x, f"exists: {team['exists']}") + y += 2 + for name, meta in team["files"].items(): + status = "present" if meta["exists"] else "missing" + stdscr.addstr(y, x, _clip(f"{name:<24} {status}", width)) + y += 1 + return y + + +def _draw_skills(stdscr: Any, y: int, x: int, width: int, health: dict[str, Any]) -> int: + stdscr.addstr(y, x, "Skills") + y += 2 + stdscr.addstr(y, x, f"loaded: {health['skills']['count']}") + y += 2 + for name in health["skills"]["names"][: max(0, stdscr.getmaxyx()[0] - y - 3)]: + stdscr.addstr(y, x, _clip(f"- {name}", width)) + y += 1 + return y + + +def _draw_instances(stdscr: Any, y: int, x: int, width: int, health: dict[str, Any]) -> int: + inst = health["instances"] + stdscr.addstr(y, x, "Instances") + y += 2 + stdscr.addstr(y, x, f"active: {inst.get('active_instance') or 'none'}") + y += 1 + stdscr.addstr(y, x, f"count: {inst.get('count', 0)}") + y += 2 + for row in inst.get("items", [])[: max(0, stdscr.getmaxyx()[0] - y - 3)]: + stdscr.addstr(y, x, _clip(f"{row.get('id')} state={row.get('state')} pid={row.get('worker_pid')}", width)) + y += 1 + return y + + +def _draw(stdscr: Any, section: int, project_root: str | None) -> None: + health = trust_model.collect_health(project_root) + h, w = stdscr.getmaxyx() + stdscr.erase() + header = f"agentic-stack Trust Console health={health['score']}% project={health['project_root']}" + stdscr.addstr(0, 0, _clip(header, w - 1)) + stdscr.addstr(1, 0, "-" * max(0, w - 1)) + rail_w = min(18, max(12, w // 5)) + for idx, name in enumerate(SECTIONS): + marker = ">" if idx == section else " " + stdscr.addstr(3 + idx, 1, _clip(f"{marker} {name}", rail_w - 2)) + content_x = rail_w + 1 + content_w = max(10, w - content_x - 1) + y = 3 + if SECTIONS[section] == "Doctor": + _draw_doctor(stdscr, y, content_x, content_w, health) + elif SECTIONS[section] == "Memory": + _draw_memory(stdscr, y, content_x, content_w, health) + elif SECTIONS[section] == "Verify": + _draw_verify(stdscr, y, content_x, content_w, project_root) + elif SECTIONS[section] == "Team Brain": + _draw_team(stdscr, y, content_x, content_w, health) + elif SECTIONS[section] == "Skills": + _draw_skills(stdscr, y, content_x, content_w, health) + else: + _draw_instances(stdscr, y, content_x, content_w, health) + footer = "up/down move r refresh q quit ? help" + stdscr.addstr(h - 1, 0, _clip(footer, w - 1)) + stdscr.refresh() + + +def run(project_root: str | None = None, plain: bool = False) -> int: + if plain or not sys.stdin.isatty() or not sys.stdout.isatty(): + sys.stdout.write(render_plain(project_root)) + return 0 + try: + import curses + except ImportError: + sys.stdout.write(render_plain(project_root)) + return 0 + + def _main(stdscr: Any) -> None: + _safe_curs_set(curses, 0) + stdscr.keypad(True) + section = 0 + while True: + _draw(stdscr, section, project_root) + key = stdscr.getch() + if key in (ord("q"), 27): + return + if key in (ord("j"), curses.KEY_DOWN): + section = min(len(SECTIONS) - 1, section + 1) + elif key in (ord("k"), curses.KEY_UP): + section = max(0, section - 1) + elif key == ord("r"): + time.sleep(0.05) + elif key == ord("?"): + stdscr.erase() + stdscr.addstr(0, 0, "agentic-stack Trust Console help") + stdscr.addstr(2, 0, "This TUI is read-only in v1.") + stdscr.addstr(3, 0, "Use explicit CLI commands for memory decisions:") + stdscr.addstr(5, 2, "python3 .agent/tools/graduate.py --rationale ...") + stdscr.addstr(6, 2, "python3 .agent/tools/reject.py --reason ...") + _g = _select_glyphs() + stdscr.addstr(8, 0, f"Status: {_g['pass']} pass {_g['warn']} warn {_g['fail']} fail") + stdscr.addstr(10, 0, "Press any key to return.") + stdscr.refresh() + stdscr.getch() + + curses.wrapper(_main) + return 0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..96e1971 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + verifiers: + name: Verifier scripts + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + if: hashFiles('requirements.txt') != '' + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run test_claude_code_hook.py + if: hashFiles('test_claude_code_hook.py') != '' + run: python test_claude_code_hook.py + + - name: Run verify_codex_fixes.py + if: hashFiles('verify_codex_fixes.py') != '' + run: python verify_codex_fixes.py + + - name: Run verify_instances.py + if: hashFiles('verify_instances.py') != '' + run: python verify_instances.py + + installer-smoke: + name: Installer smoke (bash) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install with explicit target dir + run: | + bash install.sh claude-code "$RUNNER_TEMP/agt-test" --yes + test -f "$RUNNER_TEMP/agt-test/.agent/harness/runtime.py" + + - name: Install with --yes from inside target dir (guards --yes parsing) + run: | + mkdir -p "$RUNNER_TEMP/agt-test2" + cd "$RUNNER_TEMP/agt-test2" + bash "$GITHUB_WORKSPACE/install.sh" claude-code --yes + if [ -d "$RUNNER_TEMP/agt-test2/--yes" ]; then + echo "ERROR: install.sh treated --yes as a target dir" >&2 + exit 1 + fi + + installer-windows-pwsh: + name: Installer smoke (Windows pwsh) + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install via install.ps1 + shell: pwsh + run: | + pwsh -File install.ps1 claude-code "$env:RUNNER_TEMP\agt-test" -Yes + if (-not (Test-Path "$env:RUNNER_TEMP\agt-test\.agent\harness\runtime.py")) { + Write-Error "install.ps1 did not produce .agent/harness/runtime.py" + exit 1 + } diff --git a/.gitignore b/.gitignore index 6ebdc45..b453b9d 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,9 @@ tests/ .agent/memory/**/__pycache__/ .agent/memory/**/*.py[cod] +# Runtime backups (auto_dream rewrites). Never versioned. +.agent/memory/**/*.bak + # Test scratch dir — the CLI round-trip test creates shim scripts here. .tmp/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ed0ee8..533dc5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ All notable changes to this project. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- **Trust Console local monitoring layer.** New `agentic-stack doctor`, + `agentic-stack tui`, `agentic-stack memory ...`, `agentic-stack verify`, + and `agentic-stack team ...` commands inspect the existing file-backed + `.agent/` data layer without requiring a daemon or web UI. The same + normalized collectors power plain output, JSON output, and the read-only + stdlib TUI. +- **Status glyphs in the TUI.** Doctor and Verify panes render `✓ / ! / ✗` + instead of `PASS / WARN / FAIL` text. Encoding-aware fallback to + `+ / ! / x` when stdout cannot encode Unicode (e.g. `PYTHONIOENCODING=ascii`). + +### Fixed +- Adapter conformance (`agentic-stack verify`) now requires every listed + adapter file before marking install pass; previously any single file + was sufficient and produced false positives for opencode/pi. +- `agentic-stack doctor --json` preserves non-zero exit code when checks + fail (was always exiting 0 even on failure, masking CI signals). + ## [0.13.0] — 2026-05-02 Minor release. Adds an onboarding-style transfer wizard for moving a portable diff --git a/Formula/agentic-stack.rb b/Formula/agentic-stack.rb index a5f9089..15427c5 100644 --- a/Formula/agentic-stack.rb +++ b/Formula/agentic-stack.rb @@ -9,6 +9,7 @@ class AgenticStack < Formula def install # install the brain + adapters alongside install.sh so relative paths hold pkgshare.install ".agent", "adapters", "harness_manager", "scripts", "install.sh", + "agentic_stack_cli.py", "onboard.py", "onboard_ui.py", "onboard_widgets.py", "onboard_render.py", "onboard_write.py", "onboard_features.py" @@ -24,10 +25,24 @@ def install output = shell_output("#{bin}/agentic-stack 2>&1", 2) assert_match "usage", output assert_match "agentic-stack transfer", shell_output("#{bin}/agentic-stack transfer --help") - # Wizard --yes must write PREFERENCES.md AND .features.json into a temp project dir - (testpath/".agent/memory/personal").mkpath + + # Explicit-target form: wizard --yes must copy the full .agent/ tree and + # write both PREFERENCES.md and .features.json into the target dir. system "#{bin}/agentic-stack", "claude-code", testpath.to_s, "--yes" assert_predicate testpath/".agent/memory/personal/PREFERENCES.md", :exist? assert_predicate testpath/".agent/memory/.features.json", :exist? + assert_predicate testpath/".agent/harness/runtime.py", :exist? + + # Documented no-path form: `agentic-stack claude-code --yes` run from inside + # the project dir must install into cwd (not interpret "--yes" as a path) + # and copy the full .agent/ tree. + nopath = testpath/"nopath" + nopath.mkpath + Dir.chdir(nopath) do + system "#{bin}/agentic-stack", "claude-code", "--yes" + end + refute_predicate nopath/"--yes", :exist? + assert_predicate nopath/".agent/harness/runtime.py", :exist? + assert_predicate nopath/".agent/memory/personal/PREFERENCES.md", :exist? end end diff --git a/README.md b/README.md index b9d1728..8050482 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,24 @@ cd agentic-stack brew update && brew upgrade agentic-stack ``` +## Trust Console + +agentic-stack now ships a local monitoring layer for the `.agent/` brain: + +```bash +agentic-stack doctor # health, memory, skills, candidates +agentic-stack tui # read-only terminal console +agentic-stack memory learned # accepted lessons +agentic-stack memory rejected # rejected candidates +agentic-stack memory why # evidence trail for a lesson/candidate +agentic-stack verify --all # adapter conformance matrix +agentic-stack team status # shared team-brain files +``` + +Every diagnostic command is local-first and file-backed. Use `--json` on +`doctor`, `verify`, `memory`, and `team` commands when you need CI-friendly +or pasteable output. + ### Clone instead? ```bash @@ -339,6 +357,8 @@ The index is stored at `.agent/memory/.index/` and gitignored. ├── learn.py # one-shot lesson teaching (stage + graduate) ├── recall.py # surface lessons relevant to an intent ├── show.py # colorful brain-state dashboard + ├── trust_model.py # normalized Trust Console data collectors + ├── trust_tui.py # read-only stdlib terminal UI ├── data_layer_export.py # local cross-harness dashboard/data export ├── data_flywheel_export.py # approved runs -> traces/cards/evals/JSONL ├── list_candidates.py diff --git a/agentic_stack_cli.py b/agentic_stack_cli.py new file mode 100755 index 0000000..8692c39 --- /dev/null +++ b/agentic_stack_cli.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +"""agentic-stack command front door.""" +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +from typing import Any + +HERE = os.path.dirname(os.path.abspath(__file__)) +TOOLS = os.path.join(HERE, ".agent", "tools") +if TOOLS not in sys.path: + sys.path.insert(0, TOOLS) + +import trust_model # noqa: E402 +import trust_tui # noqa: E402 + + +ADAPTERS = set(trust_model.SUPPORTED_HARNESSES) + + +def _dump_json(payload: Any) -> int: + print(json.dumps(payload, indent=2, sort_keys=True)) + return 0 + + +def _project_arg(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--project", default=None, help="Project root to inspect. Defaults to cwd.") + + +def _print_doctor(payload: dict[str, Any]) -> None: + print(f"agentic-stack doctor health={payload['score']}%") + print(f"project: {payload['project_root']}") + print("") + for check in payload["checks"]: + marker = {"pass": "PASS", "warn": "WARN", "fail": "FAIL"}.get(check["status"], check["status"].upper()) + print(f"{marker:4} {check['label']}: {check['detail']}") + memory = payload["memory"] + print("") + print("memory:") + print(f" episodes: {memory['episodic']['count']} ({memory['episodic']['failures']} failures)") + print(f" lessons: {memory['lessons']['accepted']} accepted, {memory['lessons']['provisional']} provisional") + print( + " candidates: " + f"{memory['candidates']['staged']} staged, " + f"{memory['candidates']['graduated']} graduated, " + f"{memory['candidates']['rejected']} rejected" + ) + print("") + print("next:") + print(" agentic-stack tui") + print(" agentic-stack verify --all") + print(" agentic-stack memory learned") + + +def _cmd_doctor(args: argparse.Namespace) -> int: + payload = trust_model.collect_health(args.project) + failed = any(check["status"] == "fail" for check in payload["checks"]) + if args.json: + _dump_json(payload) + else: + _print_doctor(payload) + return 1 if failed else 0 + + +def _cmd_tui(args: argparse.Namespace) -> int: + return trust_tui.run(project_root=args.project, plain=args.plain) + + +def _print_lessons(rows: list[dict[str, Any]]) -> None: + if not rows: + print("No accepted lessons.") + return + for row in rows: + print(f"{row.get('id', '-')}: {row.get('claim', '')}") + rationale = row.get("rationale") + if rationale: + print(f" rationale: {rationale}") + + +def _print_rejected(rows: list[dict[str, Any]]) -> None: + if not rows: + print("No rejected candidates.") + return + for row in rows: + print(f"{row.get('id', '-')}: {row.get('claim', '')}") + decisions = row.get("decisions") or [] + if decisions: + latest = decisions[-1] + reason = latest.get("reason") or latest.get("rationale") or "" + if reason: + print(f" reason: {reason}") + + +def _print_why(payload: dict[str, Any]) -> None: + if not payload.get("found"): + print(f"No lesson or candidate found for {payload.get('identifier')}") + return + lesson = payload["lesson"] + print(f"id: {lesson.get('id', '-')}") + print(f"claim: {lesson.get('claim', '')}") + print(f"status: {lesson.get('status', '-')}") + if lesson.get("reviewer"): + print(f"reviewer: {lesson.get('reviewer')}") + if lesson.get("rationale"): + print(f"rationale: {lesson.get('rationale')}") + evidence_ids = lesson.get("evidence_ids") or [] + print(f"evidence ids: {', '.join(map(str, evidence_ids)) if evidence_ids else '-'}") + print(f"matched evidence: {len(payload.get('evidence', []))}") + for entry in payload.get("evidence", [])[:5]: + print(f" - {entry.get('timestamp', '-')}: {entry.get('action', entry.get('reflection', ''))}") + + +def _cmd_memory(args: argparse.Namespace) -> int: + if args.memory_cmd == "learned": + rows = trust_model.memory_learned(args.project) + if args.json: + return _dump_json({"lessons": rows}) + _print_lessons(rows) + return 0 + if args.memory_cmd == "rejected": + rows = trust_model.memory_rejected(args.project) + if args.json: + return _dump_json({"rejected": rows}) + _print_rejected(rows) + return 0 + if args.memory_cmd == "why": + payload = trust_model.memory_why(args.identifier, args.project) + if args.json: + return _dump_json(payload) + _print_why(payload) + return 0 if payload.get("found") else 1 + if args.memory_cmd == "status": + payload = trust_model.collect_health(args.project)["memory"] + if args.json: + return _dump_json(payload) + print(json.dumps(payload, indent=2, sort_keys=True)) + return 0 + raise ValueError(f"unknown memory command: {args.memory_cmd}") + + +def _print_verify(payload: dict[str, Any]) -> None: + print(f"agentic-stack verify project={payload['project_root']}") + print("") + print(f"{'harness':<18} {'install':<8} {'memory':<7} {'skills':<7} {'recall':<7} {'reflect':<8} permissions") + for row in payload["harnesses"]: + print( + f"{row['harness']:<18} " + f"{row['installed']['status']:<8} " + f"{row['memory']['status']:<7} " + f"{row['skills']['status']:<7} " + f"{row['recall']['status']:<7} " + f"{row['reflect']['status']:<8} " + f"{row['permissions']['status']}" + ) + + +def _cmd_verify(args: argparse.Namespace) -> int: + harness = None if args.all else args.harness + payload = trust_model.verify_harnesses(harness=harness, project_root=args.project) + if args.json: + return _dump_json(payload) + _print_verify(payload) + return 0 + + +def _print_team(payload: dict[str, Any]) -> None: + print(f"team brain: {'present' if payload['exists'] else 'missing'}") + print(f"path: {payload['path']}") + for name, meta in payload["files"].items(): + status = "present" if meta["exists"] else "missing" + print(f" {name:<24} {status}") + + +def _cmd_team(args: argparse.Namespace) -> int: + if args.team_cmd == "status": + payload = trust_model.team_status(args.project) + if args.json: + return _dump_json(payload) + _print_team(payload) + return 0 + if args.team_cmd == "init": + payload = trust_model.team_init(args.project) + if args.json: + return _dump_json(payload) + print(f"team brain initialized: {payload['path']}") + print(f"created={payload['created']} existing={payload['existing']}") + return 0 + raise ValueError(f"unknown team command: {args.team_cmd}") + + +def _dispatch_install(argv: list[str]) -> int: + script = os.path.join(HERE, "install.sh") + if not os.path.exists(script): + print("install.sh not found next to agentic_stack_cli.py", file=sys.stderr) + return 1 + proc = subprocess.run([script, *argv], cwd=os.getcwd()) + return proc.returncode + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="agentic-stack", + description="Portable memory, skill, and trust layer for coding agents.", + ) + sub = parser.add_subparsers(dest="command") + + doctor = sub.add_parser("doctor", help="Inspect local .agent health.") + _project_arg(doctor) + doctor.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + doctor.set_defaults(func=_cmd_doctor) + + tui = sub.add_parser("tui", help="Open the read-only Trust Console TUI.") + _project_arg(tui) + tui.add_argument("--plain", action="store_true", help="Render plain text instead of curses.") + tui.set_defaults(func=_cmd_tui) + + memory = sub.add_parser("memory", help="Inspect accepted/rejected memory.") + memory_sub = memory.add_subparsers(dest="memory_cmd", required=True) + for name in ("status", "learned", "rejected"): + p = memory_sub.add_parser(name) + _project_arg(p) + p.add_argument("--json", action="store_true") + p.set_defaults(func=_cmd_memory) + why = memory_sub.add_parser("why") + _project_arg(why) + why.add_argument("--json", action="store_true") + why.add_argument("identifier") + why.set_defaults(func=_cmd_memory) + + verify = sub.add_parser("verify", help="Verify harness conformance.") + _project_arg(verify) + verify.add_argument("harness", nargs="?", choices=sorted(ADAPTERS), help="Harness to verify.") + verify.add_argument("--all", action="store_true", help="Verify all supported harnesses.") + verify.add_argument("--json", action="store_true") + verify.set_defaults(func=_cmd_verify) + + team = sub.add_parser("team", help="Inspect or initialize team brain files.") + team_sub = team.add_subparsers(dest="team_cmd", required=True) + for name in ("status", "init"): + p = team_sub.add_parser(name) + _project_arg(p) + p.add_argument("--json", action="store_true") + p.set_defaults(func=_cmd_team) + + install = sub.add_parser("install", help="Install an adapter into a project.") + install.add_argument("install_args", nargs=argparse.REMAINDER) + install.set_defaults(func=lambda args: _dispatch_install(args.install_args)) + + return parser + + +def main(argv: list[str] | None = None) -> int: + argv = list(sys.argv[1:] if argv is None else argv) + if argv and argv[0] in ADAPTERS: + return _dispatch_install(argv) + parser = _build_parser() + args = parser.parse_args(argv) + if not hasattr(args, "func"): + parser.print_help() + return 2 + if args.command == "verify" and not args.all and not args.harness: + args.all = True + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/superpowers/plans/2026-05-05-trust-console-tui-implementation.md b/docs/superpowers/plans/2026-05-05-trust-console-tui-implementation.md new file mode 100644 index 0000000..465bf16 --- /dev/null +++ b/docs/superpowers/plans/2026-05-05-trust-console-tui-implementation.md @@ -0,0 +1,109 @@ +# Trust Console TUI Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build the first local Trust Console data layer and TUI/CLI front door for agentic-stack. + +**Architecture:** Add a Python stdlib model layer that reads existing `.agent/` files and returns normalized health, memory, verification, and team-brain data. Add a root CLI dispatcher that preserves legacy adapter install shorthand while exposing `doctor`, `memory`, `verify`, `team`, and `tui` commands. Keep the TUI read-only and backed by the same model used for JSON/plain output. + +**Tech Stack:** Python 3 stdlib, file-backed JSON/Markdown data, existing shell installer, Homebrew formula wrapper. + +--- + +### Task 1: Trust Model Tests + +**Files:** +- Create: `verify_trust_console.py` + +- [x] **Step 1: Write failing tests** + +Create `verify_trust_console.py` with tempfile fixtures that assert: +- `trust_model.collect_health()` reports memory, candidates, lessons, skills, adapters, and team status. +- `trust_model.memory_why()` resolves a lesson by id and includes evidence ids. +- `trust_model.verify_harnesses()` catches missing recall instructions. +- `agentic_stack_cli.py doctor --json --project ` returns JSON. + +- [x] **Step 2: Run tests and verify RED** + +Run: `python3 verify_trust_console.py` +Expected: FAIL with missing `trust_model` or missing CLI file. + +### Task 2: Model Layer + +**Files:** +- Create: `.agent/tools/trust_model.py` + +- [x] **Step 1: Implement minimal model** + +Implement: +- `find_agent_root(start)` +- `collect_health(project_root=None)` +- `memory_learned(project_root=None)` +- `memory_rejected(project_root=None)` +- `memory_why(identifier, project_root=None)` +- `verify_harnesses(harness=None, project_root=None)` +- `team_status(project_root=None)` +- `team_init(project_root=None)` + +- [x] **Step 2: Run tests and verify GREEN for model behavior** + +Run: `python3 verify_trust_console.py` +Expected: remaining failures only for CLI commands until Task 3. + +### Task 3: CLI and TUI + +**Files:** +- Create: `.agent/tools/trust_tui.py` +- Create: `agentic_stack_cli.py` + +- [x] **Step 1: Implement command dispatcher** + +Add commands: +- `doctor [--json] [--project]` +- `memory learned|rejected|why` +- `verify [--all|harness] [--json] [--project]` +- `team status|init` +- `tui` +- `install ` +- legacy `` shorthand that dispatches to `install.sh` + +- [x] **Step 2: Implement read-only stdlib TUI** + +Use `curses` when TTY is available. Fall back to plain `doctor` output when +non-TTY, curses is unavailable, or `--plain` is passed. + +- [x] **Step 3: Run tests and verify GREEN** + +Run: `python3 verify_trust_console.py` +Expected: PASS. + +### Task 4: Packaging and Docs + +**Files:** +- Modify: `Formula/agentic-stack.rb` +- Modify: `README.md` + +- [x] **Step 1: Update Homebrew formula** + +Install `agentic_stack_cli.py` plus new `.agent/tools/trust_model.py` and +`.agent/tools/trust_tui.py`, and make `bin/agentic-stack` execute the Python +dispatcher. + +- [x] **Step 2: Update README** + +Document: +- `agentic-stack doctor` +- `agentic-stack tui` +- `agentic-stack memory learned` +- `agentic-stack verify --all` +- `agentic-stack team status` + +- [x] **Step 3: Verify commands** + +Run: +- `python3 verify_trust_console.py` +- `python3 agentic_stack_cli.py doctor --json` +- `python3 agentic_stack_cli.py verify --all --json` +- `python3 agentic_stack_cli.py team status` + +Expected: all pass or return useful local status without crashing. diff --git a/docs/superpowers/specs/2026-05-05-agentic-stack-trust-console-tui-design.md b/docs/superpowers/specs/2026-05-05-agentic-stack-trust-console-tui-design.md new file mode 100644 index 0000000..e3c5273 --- /dev/null +++ b/docs/superpowers/specs/2026-05-05-agentic-stack-trust-console-tui-design.md @@ -0,0 +1,349 @@ +# agentic-stack Trust Console TUI Design + +Date: 2026-05-05 +Status: proposed +Owner: product/design + +## Summary + +The next agentic-stack product surface should be a TUI-first Trust Console: +a local terminal interface for inspecting agent memory, verifying harness +conformance, and separating personal knowledge from team knowledge. + +The product promise is: + +> Know what your agent remembers. Prove each harness respects it. Share the +> right lessons with your team. + +The first version should stay local, fast, and scriptable. It should feel +closer to OpenClaw's terminal tooling than a web dashboard: compact status +views, strong `doctor` semantics, clear command help, JSON output for CI, and +an optional interactive `tui` mode for daily inspection. + +## Inspiration + +OpenClaw's CLI provides the design reference: + +- `openclaw doctor` is the model for guided health checks and repairs. +- `openclaw status --json` is the model for pasteable and machine-readable + diagnostics. +- `openclaw memory status/search/index` is the model for a memory-specific + command family. +- `openclaw tui` is the model for a gateway-connected terminal workspace. +- Its command copy is concise, operational, and memorable without hiding the + task. + +agentic-stack should borrow the operating pattern, not the implementation: +local-first, terminal-native, status-oriented, and safe by default. + +## Goals + +- Give users a clear one-command view of whether `.agent/` is healthy. +- Make memory inspectable: accepted lessons, rejected candidates, evidence, + stale queues, recent changes, and recall influence. +- Turn adapter support into a conformance standard that can be tested per + harness. +- Introduce a team-brain layer without mixing team lessons into personal + preferences. +- Preserve the existing lightweight install story and avoid requiring Node, + Electron, or a daemon for the first milestone. + +## Non-Goals + +- No web UI in this milestone. +- No cloud sync. +- No multi-user permission server. +- No unattended promotion of memories. +- No dependency-heavy TUI framework unless the implementation plan shows a + clear reason. + +## Command Surface + +The install wrapper should evolve from only adapter installation into a real +CLI front door: + +```bash +agentic-stack install claude-code +agentic-stack doctor +agentic-stack tui +agentic-stack memory status +agentic-stack memory learned +agentic-stack memory rejected +agentic-stack memory why +agentic-stack memory diff --since 2026-05-01 +agentic-stack verify claude-code +agentic-stack verify cursor +agentic-stack verify opencode +agentic-stack team status +``` + +For backward compatibility, existing adapter shorthand should continue to +work: + +```bash +agentic-stack claude-code --yes +``` + +Internally, this can dispatch to `install claude-code`. + +## TUI Layout + +The interactive TUI should use a stable three-pane terminal layout: + +```text +agentic-stack Trust Console project: /repo health: 92% +---------------------------------------------------------------------------- + Doctor Memory Selected: utc-timestamps + Memory ┌─ Overview ────────────────────────────────────────────────┐ + Verify │ accepted rejected pending episodes stale queues │ + Team Brain │ 12 4 1 348 0 │ + Skills └───────────────────────────────────────────────────────────┘ + Settings + + ┌─ Lessons ─────────────────────┬─ Evidence ───────────────┐ + │ utc-timestamps accepted │ source: learn.py │ + │ deploy-approval accepted │ rationale: ... │ + │ flaky-test-triage rejected │ evidence ids: 3 │ + └────────────────────────────────┴─────────────────────────┘ + +---------------------------------------------------------------------------- +↑↓ move enter open / search r refresh j/k next q quit ? help +``` + +The design should be dense and operational: + +- left rail for sections +- top status line for project, active instance, and health score +- main area for tables and summaries +- right or bottom detail panel for evidence and next actions +- fixed footer for keyboard help + +## Section Design + +### Doctor + +Purpose: answer "is this brain healthy enough to trust?" + +Shows: + +- `.agent/` found or missing +- personal preferences present +- working memory present +- review queue age and count +- accepted lessons count +- candidate lifecycle counts +- malformed JSONL or candidate files +- hook configuration status +- adapter files present by harness +- ignored derived files present in `.gitignore` +- stale active instance or worker registry issues + +Actions: + +- refresh +- open detail +- run safe repair where available +- print pasteable report +- export JSON + +### Memory + +Purpose: answer "what did the agent learn, why, and what changed?" + +Views: + +- Overview +- Learned +- Rejected +- Pending +- Timeline +- Diff +- Why + +`memory why ` must show: + +- claim +- conditions +- source +- reviewer +- rationale +- evidence ids +- decision history +- render location +- whether it is shared, team, or personal + +### Verify + +Purpose: answer "does this harness actually use the brain?" + +Initial checks should be deterministic and local: + +- expected adapter file exists +- adapter file contains startup instructions +- adapter references `.agent/AGENTS.md` +- adapter references `PREFERENCES.md` +- adapter references `LESSONS.md` +- adapter references `permissions.md` +- adapter tells the harness to run `recall.py` before high-risk work +- adapter tells the harness to write reflections after significant actions + +Later checks can add active harness probes where possible. + +The TUI should present a matrix: + +```text +harness installed memory skills recall reflect permissions +claude-code pass pass pass pass pass pass +cursor pass pass pass warn warn n/a +openclaw pass pass pass pass warn varies +``` + +### Team Brain + +Purpose: answer "what knowledge is shared with the team, and what stays local?" + +Proposed layout: + +```text +.agent/memory/team/ + CONVENTIONS.md + REVIEW_RULES.md + DEPLOYMENT_LESSONS.md + INCIDENTS.md + APPROVED_SKILLS.md +``` + +Rules: + +- `personal/` remains local-user preference memory. +- `team/` contains reviewed shared knowledge intended for Git. +- `semantic/` remains distilled learned memory. +- Team files are read before semantic lessons but after personal preferences + when building context, so user preferences can still override team defaults. +- The TUI clearly labels each memory item as personal, team, semantic, or + episodic. + +## Data Flow + +The TUI should not invent a parallel data store. + +It reads from existing files: + +- `.agent/AGENTS.md` +- `.agent/memory/personal/PREFERENCES.md` +- `.agent/memory/working/REVIEW_QUEUE.md` +- `.agent/memory/semantic/lessons.jsonl` +- `.agent/memory/semantic/LESSONS.md` +- `.agent/memory/episodic/AGENT_LEARNINGS.jsonl` +- `.agent/memory/candidates/**` +- `.agent/skills/_manifest.jsonl` +- `.agent/protocols/permissions.md` +- adapter files in the project root + +It can call existing tools: + +- `show.py` +- `list_candidates.py` +- `recall.py` +- `memory_search.py` +- `graduate.py` +- `reject.py` +- `reopen.py` + +New shared logic should live in reusable modules, not inside terminal drawing +code. The same collectors should power: + +- human TUI +- plain text reports +- JSON output +- future web UI + +## Implementation Shape + +The first implementation should stay Python-first because the repo already +ships a Python onboarding wizard, Python memory tooling, and a Homebrew wrapper +that installs Python files into `pkgshare`. + +Recommended split: + +```text +agentic_stack_cli.py # main command router +.agent/tools/trust_model.py # collectors and normalized health models +.agent/tools/trust_tui.py # interactive terminal surface +.agent/tools/verify.py # conformance checks +.agent/tools/team.py # team-brain status/init helpers +``` + +The current `agentic-stack ` shorthand remains valid. New commands +route through `agentic_stack_cli.py`. + +If a richer TUI framework is introduced later, it should consume the same +`trust_model.py` data model. + +## TUI Interaction Rules + +Follow these rules from the terminal-ui guide: + +- Batch terminal output instead of flickering clear/redraw loops. +- Always provide escape routes: `q`, `esc`, and `ctrl-c`. +- Show progress for operations that can take more than one second. +- Support non-TTY fallback with plain text output. +- Support `--json` for every diagnostic command. +- Use color semantically: green pass, amber warning, red failure, blue active. +- Restore terminal state on exit. +- Use stable dimensions so changing table content does not shift the layout. + +## Error Handling + +- Missing `.agent/`: show install guidance and exit non-zero for `doctor`, + but keep `agentic-stack install ` available. +- Malformed JSONL: report exact file and line where possible; do not delete. +- Corrupt candidates: report as quarantined or unreadable; do not silently skip + in the TUI. +- Unknown harness: list supported harnesses and suggest `verify --all`. +- Non-TTY: render plain text, not an interactive screen. +- CI: default to non-interactive and JSON-friendly behavior. + +## Testing + +Add focused tests around the model layer before terminal rendering: + +- doctor detects missing required files +- doctor detects stale review queue +- memory model loads accepted and rejected decisions +- `why` resolves lesson metadata and evidence references +- verify matrix catches missing recall instructions +- team status distinguishes personal, team, semantic, and episodic memory +- CLI preserves backward-compatible `agentic-stack claude-code --yes` +- non-TTY commands do not attempt interactive rendering + +Terminal rendering can be verified with snapshot-style text tests for plain +mode first. Interactive key handling can be narrower: smoke-test startup, +navigation, and quit behavior. + +## First Milestone + +Ship a useful non-daemon TUI: + +- `agentic-stack doctor` +- `agentic-stack doctor --json` +- `agentic-stack tui` +- `agentic-stack memory learned` +- `agentic-stack memory rejected` +- `agentic-stack memory why ` +- `agentic-stack verify --all` +- `agentic-stack team status` + +This milestone should feel complete even without the later web dashboard. + +## V1 Decisions + +- `team/` is created only by `agentic-stack team init` in v1. Onboarding can + mention Team Brain, but it should not create shared team files without an + explicit user action. +- `tui` is read-only for memory lifecycle decisions in v1. Graduation, + rejection, and reopening remain explicit CLI commands so review actions stay + auditable and easy to reproduce. +- The default release uses a Python stdlib TUI. A richer optional TUI + dependency can be considered later only if it consumes the same model layer + and does not weaken the Homebrew/install simplicity. diff --git a/onboard.py b/onboard.py index 2ac0223..dc4f476 100644 --- a/onboard.py +++ b/onboard.py @@ -17,10 +17,24 @@ def _is_ci(): return any(os.environ.get(v) for v in _CI_VARS) -def _parse_args(): - args = sys.argv[1:] - flags = {a for a in args if a.startswith("-")} - pos = [a for a in args if not a.startswith("-")] +_KNOWN_FLAGS = {"--yes", "-y", "--force", "--reconfigure"} + + +def _parse_args(argv=None): + # Whitelist known flags + honor `--` separator so paths beginning with `-` + # (e.g. a target accidentally set to `--yes` by install.sh) are not consumed. + args = sys.argv[1:] if argv is None else list(argv) + flags, pos, sep_seen = set(), [], False + for a in args: + if not sep_seen and a == "--": + sep_seen = True + continue + if not sep_seen and a in _KNOWN_FLAGS: + flags.add(a) + else: + if not sep_seen and a.startswith("-"): + print(f"[onboard] warning: unknown flag {a!r} treated as path", file=sys.stderr) + pos.append(a) return ( pos[0] if pos else os.getcwd(), "--yes" in flags or "-y" in flags, diff --git a/verify_trust_console.py b/verify_trust_console.py new file mode 100755 index 0000000..669e9d4 --- /dev/null +++ b/verify_trust_console.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Regression checks for the local Trust Console data layer and CLI. + +Runs directly, not through pytest: + + python3 verify_trust_console.py +""" +import json +import os +import subprocess +import sys +import tempfile + +HERE = os.path.dirname(os.path.abspath(__file__)) +TOOLS = os.path.join(HERE, ".agent", "tools") +if TOOLS not in sys.path: + sys.path.insert(0, TOOLS) + +PASS = "\033[32m✓\033[0m" +FAIL = "\033[31m✗\033[0m" + + +def _write(path, text): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + f.write(text) + + +def _write_json(path, payload): + _write(path, json.dumps(payload, indent=2) + "\n") + + +def _fixture_project(): + root = tempfile.mkdtemp(prefix="agentic-stack-trust-") + agent = os.path.join(root, ".agent") + _write(os.path.join(agent, "AGENTS.md"), "# Agent map\n") + _write(os.path.join(agent, "memory", "personal", "PREFERENCES.md"), "# Preferences\n") + _write(os.path.join(agent, "memory", "working", "WORKSPACE.md"), "# Workspace\n") + _write(os.path.join(agent, "memory", "working", "REVIEW_QUEUE.md"), "# Review Queue\n\n_No pending candidates._\n") + _write(os.path.join(agent, "protocols", "permissions.md"), "# Permissions\n") + _write( + os.path.join(agent, "skills", "_manifest.jsonl"), + json.dumps({"name": "debug-investigator"}) + "\n" + + json.dumps({"name": "memory-manager"}) + "\n", + ) + _write( + os.path.join(agent, "memory", "episodic", "AGENT_LEARNINGS.jsonl"), + json.dumps({ + "timestamp": "2026-05-05T10:00:00", + "result": "success", + "action": "proactive-recall: timestamps", + "reflection": "Used UTC timestamp lesson.", + }) + "\n" + + json.dumps({ + "timestamp": "2026-05-05T11:00:00", + "result": "failure", + "action": "deploy", + "reflection": "Deploy failed.", + }) + "\n", + ) + lesson = { + "id": "lesson_utc", + "claim": "Always serialize timestamps in UTC", + "conditions": ["timestamp", "utc"], + "evidence_ids": ["2026-05-05T10:00:00"], + "status": "accepted", + "accepted_at": "2026-05-05T10:01:00", + "reviewer": "learn.py", + "rationale": "avoids cross-region bugs", + "source_candidate": "utc-candidate", + } + _write(os.path.join(agent, "memory", "semantic", "lessons.jsonl"), json.dumps(lesson) + "\n") + _write(os.path.join(agent, "memory", "semantic", "LESSONS.md"), "- Always serialize timestamps in UTC\n") + _write_json(os.path.join(agent, "memory", "candidates", "rejected", "bad.json"), { + "id": "bad", + "claim": "Too specific to keep", + "status": "rejected", + "decisions": [{"decision": "rejected", "reason": "too narrow"}], + }) + _write_json(os.path.join(agent, "memory", "candidates", "pending.json"), { + "id": "pending", + "claim": "Pending review", + "status": "staged", + }) + _write(os.path.join(root, ".gitignore"), ".agent/memory/.index/\n") + _write( + os.path.join(root, "CLAUDE.md"), + "Read .agent/AGENTS.md, PREFERENCES.md, LESSONS.md, permissions.md. " + "Run recall.py before risky work. Use memory_reflect.py after actions. " + "Skill loading via .agent/skills/_manifest.jsonl.\n", + ) + _write_json(os.path.join(root, ".claude", "settings.json"), {"hooks": {}}) + _write( + os.path.join(root, ".openclaw-system.md"), + "Read .agent/AGENTS.md and PREFERENCES.md and LESSONS.md and permissions.md.\n", + ) + # AGENTS.md exists but opencode.json does NOT — exercises the all-files install rule. + _write(os.path.join(root, "AGENTS.md"), "Read .agent/PREFERENCES.md and LESSONS.md and permissions.md.\n") + return root + + +def main(): + failures = [] + + def check(name, condition, detail=""): + mark = PASS if condition else FAIL + print(f" {mark} {name}" + (f" - {detail}" if detail and not condition else "")) + if not condition: + failures.append(name) + + import trust_model + + project = _fixture_project() + + print("\n1. collect_health returns normalized local monitoring data") + health = trust_model.collect_health(project) + check("agent root detected", health["agent_root"].endswith(".agent"), health.get("agent_root", "")) + check("health score is numeric", isinstance(health["score"], int), repr(health.get("score"))) + check("episodic count is collected", health["memory"]["episodic"]["count"] == 2) + check("lesson count is collected", health["memory"]["lessons"]["accepted"] == 1) + check("candidate count is collected", health["memory"]["candidates"]["staged"] == 1) + check("rejected count is collected", health["memory"]["candidates"]["rejected"] == 1) + check("skills are loaded", health["skills"]["count"] == 2) + check("team missing is reported", health["team"]["exists"] is False) + + print("\n2. memory why resolves accepted lesson evidence") + why = trust_model.memory_why("lesson_utc", project) + check("why returns lesson", why["found"] is True) + check("why includes evidence id", "2026-05-05T10:00:00" in why["lesson"]["evidence_ids"]) + check("why includes matching evidence entry", len(why["evidence"]) == 1) + + print("\n3. verify_harnesses reports adapter conformance") + matrix = trust_model.verify_harnesses(project_root=project) + by_name = {row["harness"]: row for row in matrix["harnesses"]} + check("claude-code installed", by_name["claude-code"]["installed"]["status"] == "pass") + check("claude-code recall pass", by_name["claude-code"]["recall"]["status"] == "pass") + check("openclaw missing recall warning", by_name["openclaw"]["recall"]["status"] == "warn") + check( + "opencode partial install reports fail", + by_name["opencode"]["installed"]["status"] == "fail" + and "opencode.json" in by_name["opencode"]["installed"]["detail"], + by_name["opencode"]["installed"], + ) + check("hermes single-file install passes", by_name["hermes"]["installed"]["status"] == "pass") + + print("\n4. team init creates explicit team brain files") + init = trust_model.team_init(project) + check("team init creates files", init["created"] >= 5) + team = trust_model.team_status(project) + check("team status exists after init", team["exists"] is True) + check("team conventions present", "CONVENTIONS.md" in team["files"]) + + print("\n5. CLI returns JSON diagnostics") + proc = subprocess.run( + [sys.executable, os.path.join(HERE, "agentic_stack_cli.py"), "doctor", "--json", "--project", project], + capture_output=True, + text=True, + cwd=HERE, + ) + check("doctor --json exits 0", proc.returncode == 0, proc.stderr) + try: + payload = json.loads(proc.stdout) + except json.JSONDecodeError as exc: + payload = {} + check("doctor --json emits parseable JSON", False, str(exc)) + else: + check("doctor --json includes score", isinstance(payload.get("score"), int), proc.stdout[:200]) + + proc = subprocess.run( + [sys.executable, os.path.join(HERE, "agentic_stack_cli.py"), "verify", "--all", "--json", "--project", project], + capture_output=True, + text=True, + cwd=HERE, + ) + check("verify --all --json exits 0", proc.returncode == 0, proc.stderr) + try: + payload = json.loads(proc.stdout) + except json.JSONDecodeError as exc: + check("verify --json emits parseable JSON", False, str(exc)) + else: + check("verify --json includes harnesses", len(payload.get("harnesses", [])) >= 3) + + broken = tempfile.mkdtemp(prefix="agentic-stack-broken-") + proc = subprocess.run( + [sys.executable, os.path.join(HERE, "agentic_stack_cli.py"), "doctor", "--json", "--project", broken], + capture_output=True, + text=True, + cwd=HERE, + ) + check("doctor --json exits 1 on missing .agent", proc.returncode == 1, f"rc={proc.returncode}") + try: + payload = json.loads(proc.stdout) + check("doctor --json broken still emits parseable payload", isinstance(payload.get("score"), int)) + except json.JSONDecodeError as exc: + check("doctor --json broken emits parseable JSON", False, str(exc)) + + print("\n6. TUI tolerates terminals without cursor visibility support") + import trust_tui + + class FakeCurses: + class error(Exception): + pass + + @staticmethod + def curs_set(_visibility): + raise FakeCurses.error("unsupported terminal") + + try: + trust_tui._safe_curs_set(FakeCurses, 0) + except Exception as exc: + check("safe cursor update ignores curses errors", False, str(exc)) + else: + check("safe cursor update ignores curses errors", True) + + print("\n7. Glyph fallback adapts to stdout encoding") + + class _FakeStream: + def __init__(self, encoding): + self.encoding = encoding + + ascii_glyphs = trust_tui._select_glyphs(stream=_FakeStream("ascii")) + utf8_glyphs = trust_tui._select_glyphs(stream=_FakeStream("utf-8")) + none_glyphs = trust_tui._select_glyphs(stream=_FakeStream(None)) + check("ASCII encoding falls back to ASCII glyphs", ascii_glyphs == trust_tui._STATUS_GLYPH_ASCII) + check("UTF-8 encoding uses Unicode glyphs", utf8_glyphs == trust_tui._STATUS_GLYPH_UNICODE) + check("None encoding falls back to ASCII glyphs", none_glyphs == trust_tui._STATUS_GLYPH_ASCII) + + if failures: + print(f"\n{len(failures)} failure(s)") + return 1 + print("\nall pass") + return 0 + + +if __name__ == "__main__": + sys.exit(main())