diff --git a/.gitignore b/.gitignore
index 95debb1775b3..0d3fa4e7e397 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@ actions-runner/
 
 # caches
 .cache/
+__pycache__/
 .turbo/
 .parcel-cache/
 .eslintcache
diff --git a/tools/code-exec-harness/README.md b/tools/code-exec-harness/README.md
new file mode 100644
index 000000000000..22a743ff6207
--- /dev/null
+++ b/tools/code-exec-harness/README.md
@@ -0,0 +1,56 @@
+# Code Exec Harness
+
+`tools/code-exec-harness/harness.py` runs isolated `code exec --json` scenarios
+and saves compact evidence under `.tmp/code-exec-harness/`.
+
+Use it to compare Every Code behavior across prompt, memory, skill, model, and
+configuration variants without writing to real GitHub or reusing the real
+`CODE_HOME`.
+
+The harness defaults `code exec` to `danger-full-access` because external tool
+shims such as fake `gh` need to write logs and state outside the fixture
+workspace. Run it only with trusted scenarios.
+
+By default, each scenario gets an empty `CODE_HOME`. Pass `--inherit-auth` only
+when you want a live model-backed run; it copies auth files into the isolated
+run home without copying the rest of your config.
+
+`HOME`, `ZDOTDIR`, `XDG_CONFIG_HOME`, and `XDG_CACHE_HOME` are also redirected
+inside the run directory so shell startup files and home-directory tooling do
+not silently use the real user profile.
+
+## Run
+
+```sh
+python3 tools/code-exec-harness/harness.py \
+  tools/code-exec-harness/scenarios/github-plan-smoke.json \
+  --skill-root /Users/cbusillo/Developer/codex-skills \
+  --inherit-auth
+```
+
+Each run writes:
+
+- `artifacts/stdout.jsonl`: raw `code exec --json` events
+- `artifacts/stderr.log`: stderr from the run
+- `artifacts/summary.json`: final answer, token usage, tool commands, fake `gh`
+  calls, fake GitHub state, and expectation failures
+- `artifacts/gh-calls.jsonl`: fake `gh` invocations when the scenario enables it
+- `artifacts/gh-state.json`: fake issue state after the run
+
+## Scenario Shape
+
+Scenarios are JSON files. Common fields:
+
+- `prompt`: prompt passed to `code exec`
+- `files`: workspace files to create before the run
+- `skill_roots`: skill roots copied or symlinked into isolated `CODE_HOME/skills`
+- `gh`: fake GitHub fixture; when present, the harness prepends a fake `gh` to
+  `PATH`
+- `config_toml`: isolated `CODE_HOME/config.toml` contents
+- `config_overrides`: `-c key=value` arguments passed to `code exec`
+- `inherit_auth`: copy auth files from the current `CODE_HOME` for this scenario
+- `expect`: simple assertions over the final answer, commands, fake `gh` calls,
+  and exit code
+
+The harness is intentionally black-box: the unit under test is the real `code
+exec` binary and its emitted JSONL stream.
diff --git a/tools/code-exec-harness/harness.py b/tools/code-exec-harness/harness.py
new file mode 100644
index 000000000000..cb876fa6e795
--- /dev/null
+++ b/tools/code-exec-harness/harness.py
@@ -0,0 +1,563 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_OUTPUT_ROOT = ROOT / ".tmp" / "code-exec-harness"
+
+
+@dataclass
+class RunPaths:
+    run_dir: Path
+    workspace: Path
+    code_home: Path
+    bin_dir: Path
+    shell_home: Path
+    artifacts: Path
+
+
+class HarnessError(RuntimeError):
+    pass
+
+
+def read_text(path: Path) -> str:
+    with path.open("r", encoding="utf-8") as handle:
+        return handle.read()
+
+
+def put_text(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        handle.write(text)
+
+
+def load_json(path: Path) -> dict[str, Any]:
+    try:
+        return json.loads(read_text(path))
+    except json.JSONDecodeError as exc:
+        raise HarnessError(f"invalid JSON in {path}: {exc}") from exc
+
+
+def put_json(path: Path, data: Any) -> None:
+    put_text(path, json.dumps(data, indent=2, sort_keys=True) + "\n")
+
+
+def scenario_name(path: Path, scenario: dict[str, Any]) -> str:
+    raw = str(scenario.get("name") or path.stem)
+    name = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-._")
+    return name or "scenario"
+
+
+def make_paths(output_root: Path, name: str) -> RunPaths:
+    stamp = time.strftime("%Y%m%d-%H%M%S")
+    run_dir = output_root / f"{stamp}-{name}"
+    paths = RunPaths(
+        run_dir=run_dir,
+        workspace=run_dir / "workspace",
+        code_home=run_dir / "code-home",
+        bin_dir=run_dir / "bin",
+        shell_home=run_dir / "shell-home",
+        artifacts=run_dir / "artifacts",
+    )
+    for path in (paths.workspace, paths.code_home, paths.bin_dir, paths.shell_home, paths.artifacts):
+        path.mkdir(parents=True, exist_ok=True)
+    return paths
+
+
+def resolve_path(value: str, base: Path) -> Path:
+    expanded = Path(os.path.expandvars(os.path.expanduser(value)))
+    if expanded.is_absolute():
+        return expanded
+    return (base / expanded).resolve()
+
+
+def copy_or_link(src: Path, dst: Path, *, symlink: bool) -> None:
+    if dst.exists() or dst.is_symlink():
+        if dst.is_dir() and not dst.is_symlink():
+            shutil.rmtree(dst)
+        else:
+            dst.unlink()
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    if symlink:
+        dst.symlink_to(src, target_is_directory=src.is_dir())
+    elif src.is_dir():
+        shutil.copytree(src, dst, symlinks=True)
+    else:
+        shutil.copy2(src, dst)
+
+
+def run_quiet(command: list[str], cwd: Path) -> None:
+    subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+
+
+def materialize_workspace(scenario: dict[str, Any], paths: RunPaths) -> None:
+    files = scenario.get("files", {})
+    if not isinstance(files, dict):
+        raise HarnessError("scenario `files` must be an object mapping paths to content")
+    for relative, content in files.items():
+        destination = paths.workspace / relative
+        put_text(destination, str(content))
+
+    copies = scenario.get("copy", [])
+    if not isinstance(copies, list):
+        raise HarnessError("scenario `copy` must be a list")
+    for entry in copies:
+        if not isinstance(entry, dict) or "source" not in entry or "target" not in entry:
+            raise HarnessError("each `copy` entry must contain source and target")
+        source = resolve_path(str(entry["source"]), ROOT)
+        target = paths.workspace / str(entry["target"])
+        copy_or_link(source, target, symlink=False)
+
+    if scenario.get("git_init", True):
+        run_quiet(["git", "init", "-q"], cwd=paths.workspace)
+        run_quiet(["git", "config", "user.email", "harness@example.invalid"], cwd=paths.workspace)
+        run_quiet(["git", "config", "user.name", "Code Exec Harness"], cwd=paths.workspace)
+        run_quiet(["git", "add", "."], cwd=paths.workspace)
+        run_quiet(["git", "commit", "-q", "--allow-empty", "-m", "Initial fixture"], cwd=paths.workspace)
+
+
+def materialize_skills(scenario: dict[str, Any], paths: RunPaths, scenario_dir: Path, extra_roots: list[Path]) -> None:
+    skills_dir = paths.code_home / "skills"
+    skills_dir.mkdir(parents=True, exist_ok=True)
+    roots: list[Path] = []
+    for value in scenario.get("skill_roots", []):
+        roots.append(resolve_path(str(value), scenario_dir))
+    roots.extend(extra_roots)
+
+    for root in roots:
+        if not root.exists():
+            raise HarnessError(f"skill root does not exist: {root}")
+        if (root / "SKILL.md").is_file():
+            copy_or_link(root, skills_dir / root.name, symlink=True)
+            continue
+        for child in sorted(root.iterdir()):
+            if child.is_dir() and (child / "SKILL.md").is_file():
+                copy_or_link(child, skills_dir / child.name, symlink=True)
+
+
+def write_config(scenario: dict[str, Any], paths: RunPaths) -> None:
+    config = str(scenario.get("config_toml", "")).strip()
+    if config:
+        put_text(paths.code_home / "config.toml", config + "\n")
+
+
+def inherit_auth(paths: RunPaths) -> None:
+    source_home = Path(os.environ.get("CODE_HOME") or os.environ.get("CODEX_HOME") or Path.home() / ".code")
+    for name in ("auth.json", ".credentials.json"):
+        source = source_home / name
+        if source.is_file():
+            shutil.copy2(source, paths.code_home / name)
+
+
+FAKE_GH = r'''#!/usr/bin/env python3
+import json
+import os
+import re
+import sys
+import time
+from pathlib import Path
+
+fixture_path = Path(os.environ["CODE_EXEC_HARNESS_GH_FIXTURE"])
+log_path = Path(os.environ["CODE_EXEC_HARNESS_GH_LOG"])
+state_path = Path(os.environ["CODE_EXEC_HARNESS_GH_STATE"])
+
+def slurp(path):
+    with path.open("r", encoding="utf-8") as handle:
+        return handle.read()
+
+def put(path, text):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        handle.write(text)
+
+fixture = json.loads(slurp(fixture_path)) if fixture_path.exists() else {}
+args = sys.argv[1:]
+argv_text = " ".join(args)
+log_path.parent.mkdir(parents=True, exist_ok=True)
+with log_path.open("a", encoding="utf-8") as log:
+    log.write(json.dumps({"argv": args, "cwd": os.getcwd(), "time": time.time()}) + "\n")
+
+def load_state():
+    if state_path.exists():
+        return json.loads(slurp(state_path))
+    issues = {str(issue.get("number")): issue for issue in fixture.get("issues", [])}
+    next_issue = int(fixture.get("next_issue", 1000))
+    return {"issues": issues, "next_issue": next_issue, "links": []}
+
+def save_state(state):
+    put(state_path, json.dumps(state, indent=2, sort_keys=True) + "\n")
+
+def finish(stdout="", stderr="", exit_code=0):
+    if stdout:
+        print(stdout)
+    if stderr:
+        print(stderr, file=sys.stderr)
+    raise SystemExit(exit_code)
+
+for response in fixture.get("responses", []):
+    match = response.get("match", {})
+    matched = True
+    if "exact" in match:
+        matched = argv_text == match["exact"]
+    if matched and "prefix" in match:
+        matched = argv_text.startswith(match["prefix"])
+    if matched and "contains" in match:
+        contains = match["contains"]
+        if isinstance(contains, str):
+            contains = [contains]
+        matched = all(item in argv_text for item in contains)
+    if matched and "regex" in match:
+        matched = re.search(match["regex"], argv_text) is not None
+    if matched:
+        finish(response.get("stdout", ""), response.get("stderr", ""), int(response.get("exit_code", 0)))
+
+repo = fixture.get("repo", "owner/repo")
+state = load_state()
+
+if args[:2] == ["repo", "view"]:
+    finish(json.dumps({"nameWithOwner": repo, "defaultBranchRef": {"name": fixture.get("default_branch", "main")}}))
+
+if args[:2] == ["issue", "list"]:
+    issues = list(state["issues"].values())
+    if "--json" in args:
+        finish(json.dumps(issues))
+    finish("\n".join(f"{issue.get('number')}\t{issue.get('state', 'OPEN')}\t{issue.get('title', '')}" for issue in issues))
+
+if args[:2] == ["issue", "create"]:
+    number = int(state["next_issue"])
+    state["next_issue"] = number + 1
+    title = "Untitled"
+    body = ""
+    labels = []
+    for index, arg in enumerate(args):
+        if arg == "--title" and index + 1 < len(args):
+            title = args[index + 1]
+        elif arg == "--body" and index + 1 < len(args):
+            body = args[index + 1]
+        elif arg == "--body-file" and index + 1 < len(args):
+            path = args[index + 1]
+            if path == "-":
+                body = sys.stdin.read()
+            else:
+                body = slurp(Path(path))
+        elif arg == "--label" and index + 1 < len(args):
+            labels.extend(part.strip() for part in args[index + 1].split(",") if part.strip())
+    issue = {
+        "number": number,
+        "title": title,
+        "body": body,
+        "labels": [{"name": label} for label in labels],
+        "state": "OPEN",
+        "url": f"https://github.com/{repo}/issues/{number}",
+        "subIssues": [],
+    }
+    state["issues"][str(number)] = issue
+    save_state(state)
+    if "--json" in args:
+        finish(json.dumps(issue))
+    finish(issue["url"])
+
+if args[:2] == ["issue", "edit"] and len(args) >= 3:
+    number = args[2].lstrip("#")
+    issue = state["issues"].setdefault(number, {"number": int(number), "title": "", "state": "OPEN", "subIssues": []})
+    for index, arg in enumerate(args):
+        if arg == "--title" and index + 1 < len(args):
+            issue["title"] = args[index + 1]
+        elif arg == "--body" and index + 1 < len(args):
+            issue["body"] = args[index + 1]
+        elif arg == "--body-file" and index + 1 < len(args):
+            path = args[index + 1]
+            issue["body"] = sys.stdin.read() if path == "-" else slurp(Path(path))
+    if "--add-sub-issue" in args:
+        for index, arg in enumerate(args):
+            if arg == "--add-sub-issue" and index + 1 < len(args):
+                child = args[index + 1].lstrip("#")
+                issue.setdefault("subIssues", []).append({"number": int(child)})
+                state.setdefault("links", []).append({"type": "subissue", "parent": int(number), "child": int(child)})
+    save_state(state)
+    finish(issue.get("url", f"https://github.com/{repo}/issues/{number}"))
+
+if args and args[0] == "api":
+    joined = " ".join(args)
+    match = re.search(r"repos/([^/]+)/([^/]+)/issues/(\d+)/sub_issues", joined)
+    child = None
+    for index, arg in enumerate(args):
+        if arg in {"-F", "--field"} and index + 1 < len(args) and args[index + 1].startswith("sub_issue_id="):
+            child = args[index + 1].split("=", 1)[1]
+    if match and child:
+        parent = match.group(3)
+        issue = state["issues"].setdefault(parent, {"number": int(parent), "title": "", "state": "OPEN", "subIssues": []})
+        issue.setdefault("subIssues", []).append({"number": int(child)})
+        state.setdefault("links", []).append({"type": "subissue", "parent": int(parent), "child": int(child)})
+        save_state(state)
+        finish(json.dumps({"parent": int(parent), "child": int(child)}))
+
+if args[:2] == ["issue", "view"] and len(args) >= 3:
+    number = args[2].lstrip("#")
+    issue = state["issues"].get(number)
+    if not issue:
+        finish(stderr=f"issue not found: {number}", exit_code=1)
+    finish(json.dumps(issue) if "--json" in args else issue.get("body", issue.get("title", "")))
+
+if args[:2] == ["issue", "comment"] and len(args) >= 3:
+    finish(f"https://github.com/{repo}/issues/{args[2].lstrip('#')}#issuecomment-1")
+
+default = fixture.get("default_response")
+if default:
+    finish(default.get("stdout", ""), default.get("stderr", ""), int(default.get("exit_code", 0)))
+finish(stderr=f"fake gh has no response for: {argv_text}", exit_code=1)
+'''
+
+
+def write_fake_gh(scenario: dict[str, Any], paths: RunPaths) -> dict[str, Path] | None:
+    gh_fixture = scenario.get("gh")
+    if gh_fixture is None:
+        return None
+    fixture_path = paths.artifacts / "gh-fixture.json"
+    log_path = paths.artifacts / "gh-calls.jsonl"
+    state_path = paths.artifacts / "gh-state.json"
+    put_json(fixture_path, gh_fixture)
+    shim = paths.bin_dir / "gh"
+    put_text(shim, FAKE_GH)
+    shim.chmod(0o755)
+    put_text(paths.shell_home / ".zshenv", f"gh() {{ {shim} \"$@\"; }}\n")
+    return {"fixture": fixture_path, "log": log_path, "state": state_path}
+
+
+def build_command(scenario: dict[str, Any], args: argparse.Namespace, paths: RunPaths) -> list[str]:
+    code_bin_value = args.code_bin or shutil.which("code")
+    if not code_bin_value:
+        raise HarnessError("could not find `code`; pass --code-bin")
+    code_bin = Path(code_bin_value)
+    command = [str(code_bin), "exec", "--json", "--skip-git-repo-check"]
+    max_seconds = scenario.get("max_seconds", args.max_seconds)
+    if max_seconds:
+        command.extend(["--max-seconds", str(max_seconds)])
+    command.extend(["-C", str(paths.workspace)])
+    if scenario.get("include_plan_tool", False):
+        command.append("--include-plan-tool")
+    if scenario.get("auto", False):
+        command.append("--auto")
+    if scenario.get("auto_review", False):
+        command.append("--auto-review")
+    model = scenario.get("model") or args.model
+    if model:
+        command.extend(["-m", str(model)])
+    sandbox = scenario.get("sandbox") or args.sandbox
+    if sandbox:
+        command.extend(["--sandbox", str(sandbox)])
+    for override in scenario.get("config_overrides", []):
+        command.extend(["-c", str(override)])
+    command.append(str(scenario.get("prompt", "")))
+    return command
+
+
+def run_exec(command: list[str], scenario: dict[str, Any], paths: RunPaths, env: dict[str, str]) -> tuple[int, list[dict[str, Any]]]:
+    timeout = int(scenario.get("timeout_seconds", 180))
+    proc = subprocess.Popen(command, cwd=paths.workspace, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    try:
+        stdout, stderr = proc.communicate(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        stdout, stderr = proc.communicate()
+        stderr = (stderr or "") + f"\nHARNESS TIMEOUT after {timeout}s\n"
+    put_text(paths.artifacts / "stdout.jsonl", stdout or "")
+    put_text(paths.artifacts / "stderr.log", stderr or "")
+
+    events: list[dict[str, Any]] = []
+    for line_number, line in enumerate((stdout or "").splitlines(), start=1):
+        if not line.strip():
+            continue
+        try:
+            events.append(json.loads(line))
+        except json.JSONDecodeError as exc:
+            events.append({"type": "harness.invalid_json", "line": line_number, "text": line, "error": str(exc)})
+    return proc.returncode, events
+
+
+def summarize(events: list[dict[str, Any]], paths: RunPaths, returncode: int, command: list[str]) -> dict[str, Any]:
+    final_message = None
+    commands = []
+    running_commands: dict[str, str] = {}
+    file_changes = []
+    usage = None
+    thread_id = None
+    errors = []
+    for event in events:
+        event_type = event.get("type")
+        raw_msg = event.get("msg")
+        msg: dict[str, Any] = raw_msg if isinstance(raw_msg, dict) else {}
+        msg_type = msg.get("type")
+        if event_type == "thread.started":
+            thread_id = event.get("thread_id")
+        elif event_type == "turn.completed":
+            usage = event.get("usage")
+        elif msg_type == "session_configured":
+            thread_id = msg.get("session_id") or thread_id
+        elif msg_type == "token_count":
+            usage = (msg.get("info") or {}).get("total_token_usage") or usage
+        elif msg_type == "agent_message":
+            final_message = msg.get("message")
+        elif msg_type == "exec_command_begin":
+            call_id = msg.get("call_id")
+            if isinstance(call_id, str):
+                raw_command = msg.get("command", [])
+                running_commands[call_id] = " ".join(raw_command) if isinstance(raw_command, list) else str(raw_command)
+        elif msg_type == "exec_command_end":
+            call_id = msg.get("call_id")
+            commands.append({
+                "command": running_commands.pop(call_id, None) if isinstance(call_id, str) else None,
+                "exit_code": msg.get("exit_code"),
+                "status": "completed" if msg.get("exit_code") == 0 else "failed",
+                "stdout": msg.get("stdout"),
+                "stderr": msg.get("stderr"),
+            })
+        elif event_type in {"error", "turn.failed"}:
+            errors.append(event)
+        item = event.get("item") or {}
+        item_type = item.get("type")
+        if event_type == "item.completed" and item_type == "agent_message":
+            final_message = item.get("text")
+        elif event_type == "item.completed" and item_type == "command_execution":
+            commands.append({"command": item.get("command"), "exit_code": item.get("exit_code"), "status": item.get("status")})
+        elif event_type == "item.completed" and item_type == "file_change":
+            file_changes.append(item)
+
+    gh_calls = []
+    gh_log = paths.artifacts / "gh-calls.jsonl"
+    if gh_log.exists():
+        for line in read_text(gh_log).splitlines():
+            gh_calls.append(json.loads(line))
+    gh_state = None
+    gh_state_path = paths.artifacts / "gh-state.json"
+    if gh_state_path.exists():
+        gh_state = load_json(gh_state_path)
+
+    return {
+        "returncode": returncode,
+        "thread_id": thread_id,
+        "usage": usage,
+        "event_count": len(events),
+        "final_message": final_message,
+        "commands": commands,
+        "file_changes": file_changes,
+        "errors": errors,
+        "gh_calls": gh_calls,
+        "gh_state": gh_state,
+        "command": command,
+        "run_dir": str(paths.run_dir),
+    }
+
+
+def assert_expectations(summary: dict[str, Any], scenario: dict[str, Any]) -> list[str]:
+    failures: list[str] = []
+    expect = scenario.get("expect", {})
+    final_message = summary.get("final_message") or ""
+    for needle in expect.get("assistant_contains", []):
+        if str(needle) not in final_message:
+            failures.append(f"assistant message did not contain {needle!r}")
+    for needle in expect.get("command_contains", []):
+        if not any(str(needle) in str(command.get("command")) for command in summary.get("commands", [])):
+            failures.append(f"no completed command contained {needle!r}")
+    for needle in expect.get("gh_contains", []):
+        text = "\n".join(" ".join(call.get("argv", [])) for call in summary.get("gh_calls", []))
+        if str(needle) not in text:
+            failures.append(f"no fake gh call contained {needle!r}")
+    if "returncode" in expect and int(expect["returncode"]) != int(summary.get("returncode", -1)):
+        failures.append(f"returncode expected {expect['returncode']}, got {summary.get('returncode')}")
+    return failures
+
+
+def run_scenario(path: Path, args: argparse.Namespace) -> int:
+    scenario = load_json(path)
+    name = scenario_name(path, scenario)
+    paths = make_paths(Path(args.output_root), name)
+    scenario_dir = path.parent.resolve()
+    extra_roots = [resolve_path(value, Path.cwd()) for value in args.skill_root]
+
+    materialize_workspace(scenario, paths)
+    materialize_skills(scenario, paths, scenario_dir, extra_roots)
+    write_config(scenario, paths)
+    if args.inherit_auth or scenario.get("inherit_auth", False):
+        inherit_auth(paths)
+    gh_paths = write_fake_gh(scenario, paths)
+    command = build_command(scenario, args, paths)
+
+    env = os.environ.copy()
+    env.update({
+        "CODE_HOME": str(paths.code_home),
+        "CODEX_HOME": str(paths.code_home),
+        "CODEX_SQLITE_HOME": str(paths.code_home),
+        "HOME": str(paths.shell_home),
+        "PATH": f"{paths.bin_dir}{os.pathsep}{env.get('PATH', '')}",
+        "XDG_CACHE_HOME": str(paths.shell_home / ".cache"),
+        "XDG_CONFIG_HOME": str(paths.shell_home / ".config"),
+        "ZDOTDIR": str(paths.shell_home),
+    })
+    for key, value in scenario.get("env", {}).items():
+        env[str(key)] = str(value)
+    if gh_paths:
+        env["CODE_EXEC_HARNESS_GH_FIXTURE"] = str(gh_paths["fixture"])
+        env["CODE_EXEC_HARNESS_GH_LOG"] = str(gh_paths["log"])
+        env["CODE_EXEC_HARNESS_GH_STATE"] = str(gh_paths["state"])
+
+    put_json(paths.artifacts / "manifest.json", {
+        "scenario": str(path),
+        "command": command,
+        "code_home": str(paths.code_home),
+        "workspace": str(paths.workspace),
+    })
+    if args.dry_run:
+        print(paths.run_dir)
+        return 0
+
+    returncode, events = run_exec(command, scenario, paths, env)
+    summary = summarize(events, paths, returncode, command)
+    failures = assert_expectations(summary, scenario)
+    summary["expectation_failures"] = failures
+    put_json(paths.artifacts / "summary.json", summary)
+    print(json.dumps({"scenario": name, "run_dir": str(paths.run_dir), "returncode": returncode, "failures": failures}, sort_keys=True))
+    return 1 if failures else returncode
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run isolated Every Code `code exec --json` scenarios.")
+    parser.add_argument("scenario", nargs="+", type=Path, help="Scenario JSON file(s).")
+    parser.add_argument("--output-root", default=str(DEFAULT_OUTPUT_ROOT), help="Directory for run artifacts.")
+    parser.add_argument("--code-bin", default="", help="Path to the code binary. Defaults to PATH lookup.")
+    parser.add_argument("--model", default="", help="Default model override for scenarios without `model`.")
+    parser.add_argument("--sandbox", default="danger-full-access", help="Default code exec sandbox mode.")
+    parser.add_argument("--max-seconds", type=int, default=90, help="Default code exec --max-seconds value.")
+    parser.add_argument("--skill-root", action="append", default=[], help="Additional skill root to expose under CODE_HOME/skills.")
+    parser.add_argument("--inherit-auth", action="store_true", help="Copy auth files from the current CODE_HOME into the isolated run home.")
+    parser.add_argument("--dry-run", action="store_true", help="Materialize the run directory and command without invoking code exec.")
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str]) -> int:
+    args = parse_args(argv)
+    exit_code = 0
+    try:
+        for scenario in args.scenario:
+            result = run_scenario(scenario.resolve(), args)
+            exit_code = exit_code or result
+    except HarnessError as exc:
+        print(f"harness error: {exc}", file=sys.stderr)
+        return 2
+    return exit_code
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/tools/code-exec-harness/scenarios/github-plan-smoke.json b/tools/code-exec-harness/scenarios/github-plan-smoke.json
new file mode 100644
index 000000000000..888ca67d11ec
--- /dev/null
+++ b/tools/code-exec-harness/scenarios/github-plan-smoke.json
@@ -0,0 +1,26 @@
+{
+  "name": "github-plan-smoke",
+  "prompt": "Use the available GitHub planning skill. First run `gh issue list --json number,title,state`. Then explain whether a broad Every Code efficiency workstream should become one issue or a parent issue with subissues. Do not modify files.",
+  "files": {
+    "README.md": "# Harness fixture\n"
+  },
+  "gh": {
+    "repo": "cbusillo/code",
+    "next_issue": 1001,
+    "issues": [
+      {
+        "number": 900,
+        "title": "Unrelated maintenance",
+        "state": "OPEN",
+        "url": "https://github.com/cbusillo/code/issues/900"
+      }
+    ]
+  },
+  "expect": {
+    "returncode": 0,
+    "gh_contains": ["issue list"],
+    "assistant_contains": ["parent"]
+  },
+  "max_seconds": 90,
+  "timeout_seconds": 150
+}