From 6fbf18239d542623e44b760500ec1884ab969cfa Mon Sep 17 00:00:00 2001
From: burrows99 <raunakburrows@gmail.com>
Date: Fri, 19 Jun 2026 08:27:15 +0100
Subject: [PATCH] fix(trace): make empty/aborted runs fail loudly instead of
 silently "running"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A chrome run that captured nothing and recorded nothing was reported as
success (ok:true, exit 0) and could sit on the dashboard's "running" badge
forever — the agent had no signal it was broken. Root cause: the failure
signals lived in stderr logs or dashboard-only state, never in the JSON
envelope the agent reads. This closes those gaps and the matching violations
of the logger's own two-channel contract (codes.ts: the stderr log and the
envelope diagnostic for one event should carry the SAME code).

- Terminal envelope on abort. If a run throws (attach failed, engine crashed,
  recording threw), DynamicCommand now emits a terminal envelope via onProgress
  — no `running` flag, ok:false, an ENGINE_FATAL diagnostic — so the collector
  resolves the session instead of leaving its initial "running" partial
  orphaned. Cli flushes that emit before exiting non-zero.

- Recording outcomes are now envelope diagnostics, not just stderr. RECORD_EMPTY
  (no frames → empty video) and RECORD (render/upload threw) were log-only, so
  "no video" was invisible to a --json reader. Both now push a warn diagnostic
  carrying the same code as the log.

- Upload failure no longer masquerades as a clean local save. S3ArtifactStore
  swallows failures and returns null, which #record could not tell apart from
  "no S3 configured". It now distinguishes the two and emits an UPLOAD
  diagnostic when a configured upload fails (link missing, local copy kept).

- ENGINE_FATAL is now logged as well as diagnosed, so it appears in both
  channels per the contract.

Adds test/dynamic-diagnostics.test.js (fake-tracer unit tests for the abort
terminal envelope, captured-fatal, and clean-empty cases). typecheck + build
clean; 44/45 tests pass (1 DB round-trip skipped).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/cli/Cli.ts                     | 28 +++++++++++-----
 src/cli/commands/DynamicCommand.ts | 54 +++++++++++++++++++++++++++---
 test/dynamic-diagnostics.test.js   | 51 ++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+), 14 deletions(-)
 create mode 100644 test/dynamic-diagnostics.test.js

diff --git a/src/cli/Cli.ts b/src/cli/Cli.ts
index 3bc593e..c618c5c 100644
--- a/src/cli/Cli.ts
+++ b/src/cli/Cli.ts
@@ -128,15 +128,25 @@ export class Cli {
       ? (envelope: unknown) => { emitChain = emitChain.then(async () => { const result = await Collector.emit(collector, envelope); if (!result.ok) { emitFailureCount++; lastEmitFailure = result; } }); }
       : undefined;
 
-    const { trace } = await this.#dynamic.run({
-      target, port, launch,
-      breakpoints: options.breakpoint, exprs: options.expression,
-      steps, curl: options.curl,
-      root: options.root, maxHits: options.maxHits,
-      recordOut: options.output,
-      args: { target, ...(launch ? { launch: true } : { port }), breakpoints: options.breakpoint, ...(options.root ? { root: options.root } : {}), ...(options.maxHits ? { maxHits: options.maxHits } : {}), ...(steps.length ? { steps: steps.map(redactStep) } : {}), ...(options.curl ? { curl: options.curl } : {}) },
-      ...(emitToCollector ? { onProgress: (intermediateTrace: Trace) => emitToCollector(intermediateTrace.toJSON()) } : {}),
-    });
+    let trace: Trace;
+    try {
+      ({ trace } = await this.#dynamic.run({
+        target, port, launch,
+        breakpoints: options.breakpoint, exprs: options.expression,
+        steps, curl: options.curl,
+        root: options.root, maxHits: options.maxHits,
+        recordOut: options.output,
+        args: { target, ...(launch ? { launch: true } : { port }), breakpoints: options.breakpoint, ...(options.root ? { root: options.root } : {}), ...(options.maxHits ? { maxHits: options.maxHits } : {}), ...(steps.length ? { steps: steps.map(redactStep) } : {}), ...(options.curl ? { curl: options.curl } : {}) },
+        ...(emitToCollector ? { onProgress: (intermediateTrace: Trace) => emitToCollector(intermediateTrace.toJSON()) } : {}),
+      }));
+    } catch (error) {
+      // The run threw (attach failed, engine crashed, recording threw). It already emitted a TERMINAL envelope
+      // via onProgress that clears the dashboard's "running" session — flush the chain so that POST actually
+      // lands before we exit, then surface the failure (non-zero exit + the same ENGINE_FATAL code in the log).
+      if (emitToCollector) await emitChain;
+      log.error("dynamic trace aborted before completion", { code: Code.ENGINE_FATAL, err: error });
+      process.exit(1);
+    }
 
     // Flush the final (complete) envelope and all pending emits BEFORE rendering, so a rejected emit
     // (a 400 schema error, a 503 dead store) becomes a visible diagnostic in the printed/--json envelope
diff --git a/src/cli/commands/DynamicCommand.ts b/src/cli/commands/DynamicCommand.ts
index 3f16464..07ac940 100644
--- a/src/cli/commands/DynamicCommand.ts
+++ b/src/cli/commands/DynamicCommand.ts
@@ -79,11 +79,33 @@ export class DynamicCommand extends TraceCommand<DynamicRequest, DynamicResult>
       const trace = this.#toTrace(capture, { sessionId, args: request.args ?? {}, startedAtMs });
       if (isChrome) await this.#record(capture, trace, sessionId, request.recordOut);
       return { trace, capture };
+    } catch (error) {
+      // A throw here (attach failed, engine crashed, recording threw) would otherwise leave the initial
+      // `running` partial (emitted above) orphaned in the dashboard forever — the session never resolves.
+      // Emit a TERMINAL envelope (no `running` flag → meta.running absent; ok:false via the error diagnostic)
+      // so the dashboard flips it to failed, and surface the cause in the stderr trail with the same code.
+      log.error("trace run aborted before completion", { code: Code.ENGINE_FATAL, sessionId, err: error });
+      request.onProgress?.(this.#abortedTrace(error, context));
+      throw error;
     } finally {
       launched?.kill();
     }
   }
 
+  /** A terminal envelope for a run that threw: empty data, an ENGINE_FATAL error, and crucially NO `running`
+   *  flag, so the collector resolves the session instead of leaving its initial running partial hanging. */
+  #abortedTrace(error: unknown, context: RunCtx): Trace {
+    return this.envelope({
+      command: `run.${context.target}`,
+      data: new TraceData({ events: [] }),
+      diagnostics: [Diagnostic.error(Code.ENGINE_FATAL, String((error as Error)?.message ?? error).split("\n")[0])],
+      sessionId: context.sessionId,
+      args: context.args,
+      startedAtMs: context.startedAtMs,
+      target: new TargetReference({ kind: context.target, source: "cdp", trigger: context.trigger }),
+    });
+  }
+
   /**
    * A partial, mid-run envelope: the same shape as the finished trace but flagged `running` and carrying only
    * the events captured so far (lineage/recording/diagnostics are computed once at the end in {@link #toTrace}).
@@ -103,7 +125,10 @@ export class DynamicCommand extends TraceCommand<DynamicRequest, DynamicResult>
   #toTrace(capture: CaptureResult, context: { sessionId: string; args: Record<string, unknown>; startedAtMs: number }): Trace {
     const source = "cdp";
     const diagnostics: Diagnostic[] = [];
-    if (capture.fatal) diagnostics.push(Diagnostic.error(Code.ENGINE_FATAL, String(capture.fatal).split("\n")[0]));
+    if (capture.fatal) {
+      log.error("engine reported a fatal error", { code: Code.ENGINE_FATAL, sessionId: context.sessionId, fatal: String(capture.fatal).split("\n")[0] });
+      diagnostics.push(Diagnostic.error(Code.ENGINE_FATAL, String(capture.fatal).split("\n")[0]));
+    }
     // A failed journey step (selector not found / timed out) flips the envelope's `ok` — same gate the old
     // `journey` command applied to its exit code, now expressed as an error diagnostic.
     for (const step of capture.steps ?? []) if (!step.ok) diagnostics.push(Diagnostic.error(Code.STEP_FAILED, `#${step.sequence} ${step.step}${step.note ? " — " + step.note : ""}`));
@@ -150,13 +175,32 @@ export class DynamicCommand extends TraceCommand<DynamicRequest, DynamicResult>
     try {
       const videoOutputPath = outputPath ?? join(tmpdir(), `trace-${sessionId}.mp4`);
       const videoPath = await Recorder.renderJourney(capture.frames ?? [], capture.traced ?? [], videoOutputPath);
-      if (!videoPath) { log.warn("no frames captured — nothing to record", { code: Code.RECORD_EMPTY, sessionId }); return; }
-      const upload = this.artifacts && this.artifacts.isConfigured() ? await this.artifacts.upload(videoPath, `recordings/${sessionId}.mp4`, "video/mp4") : null;
+      if (!videoPath) {
+        // Both channels carry the same code: the stderr trail AND an envelope diagnostic, so an agent reading
+        // --json learns the chrome run produced no video (instead of inferring it from a missing `recording`).
+        log.warn("no frames captured — nothing to record", { code: Code.RECORD_EMPTY, sessionId });
+        trace.diagnostics.push(Diagnostic.warn(Code.RECORD_EMPTY, "no frames captured — the debug-replay video is empty (no breakpoint hits, or the journey produced no frames)."));
+        return;
+      }
+      const uploadConfigured = this.artifacts?.isConfigured() ?? false;
+      const upload = uploadConfigured ? await this.artifacts!.upload(videoPath, `recordings/${sessionId}.mp4`, "video/mp4") : null;
       trace.data.recording = upload ? new Recording({ url: upload.url, bytes: upload.bytes }) : new Recording({ path: videoPath });
-      if (upload) log.info("recording uploaded", { sessionId, url: upload.url, bytes: upload.bytes });
-      else log.info("recording saved locally — set S3_ENDPOINT to upload + get a link", { sessionId, path: videoPath });
+      if (upload) {
+        log.info("recording uploaded", { sessionId, url: upload.url, bytes: upload.bytes });
+      } else if (uploadConfigured) {
+        // S3 WAS configured but upload() returned null → it failed (the error was logged inside the store).
+        // The video is still saved locally, but the dashboard gets no link — surface that instead of
+        // reporting a clean local save, so "no video link" isn't silently indistinguishable from success.
+        log.warn("recording upload failed — keeping local copy", { code: Code.UPLOAD, sessionId, path: videoPath });
+        trace.diagnostics.push(Diagnostic.warn(Code.UPLOAD, `recording upload failed — video saved locally at ${videoPath}, no dashboard link (check S3_ENDPOINT / credentials).`));
+      } else {
+        log.info("recording saved locally — set S3_ENDPOINT to upload + get a link", { sessionId, path: videoPath });
+      }
     } catch (error: any) {
+      // Render or upload threw. Surface it in the envelope too (a warn — the trace data is still valid, only
+      // the replay is missing) so "no video" is never silent. Previously this was a stderr log the agent never saw.
       log.error("recording failed", { code: Code.RECORD, sessionId, err: error });
+      trace.diagnostics.push(Diagnostic.warn(Code.RECORD, `debug-replay recording failed — ${String(error?.message ?? error).split("\n")[0]}`));
     }
   }
 }
diff --git a/test/dynamic-diagnostics.test.js b/test/dynamic-diagnostics.test.js
new file mode 100644
index 0000000..dca0dc7
--- /dev/null
+++ b/test/dynamic-diagnostics.test.js
@@ -0,0 +1,51 @@
+// DynamicCommand diagnostics: a trace run must make its failures legible in the envelope (not just stderr),
+// and a thrown run must emit a TERMINAL envelope so the dashboard's "running" session resolves instead of
+// hanging forever. Injects a fake tracer so we exercise the envelope/diagnostic logic without a real CDP target.
+import "reflect-metadata";
+import { test } from "node:test";
+import assert from "node:assert/strict";
+
+import { DynamicCommand } from "../dist/cli/commands/DynamicCommand.js";
+import { TargetKind } from "../dist/domain/Target.js";
+
+const fakeTracer = (behavior) => ({
+  async traceNode() { return behavior(); },
+  async traceChrome() { return behavior(); },
+});
+
+const nodeCapture = (over = {}) => ({ target: TargetKind.Node, trigger: "curl localhost", breakpoints: [], events: [], ...over });
+
+test("a thrown run emits a TERMINAL envelope (running cleared, ENGINE_FATAL) so the dashboard resolves", async () => {
+  const seen = [];
+  const cmd = new DynamicCommand(fakeTracer(() => { throw new Error("attach failed: ECONNREFUSED"); }));
+
+  await assert.rejects(
+    cmd.run({ target: TargetKind.Node, port: 9229, onProgress: (t) => seen.push(t) }),
+    /attach failed/,
+  );
+
+  // The first envelope is the initial running partial; the last must be the terminal abort.
+  assert.ok(seen.length >= 2, "expected an initial running partial AND a terminal abort envelope");
+  assert.equal(seen[0].meta.running, true, "the first envelope is the running partial");
+  const terminal = seen[seen.length - 1];
+  assert.notEqual(terminal.meta.running, true, "the terminal envelope is NOT running — the session resolves");
+  assert.equal(terminal.ok, false, "a terminal abort is not ok");
+  assert.ok(
+    terminal.diagnostics.some((d) => d.code === "ENGINE_FATAL" && d.level === "error"),
+    "the terminal envelope carries an ENGINE_FATAL error",
+  );
+});
+
+test("a captured fatal (no throw) yields ok:false + an ENGINE_FATAL diagnostic in the envelope", async () => {
+  const cmd = new DynamicCommand(fakeTracer(() => nodeCapture({ fatal: "debugger disconnected" })));
+  const { trace } = await cmd.run({ target: TargetKind.Node, port: 9229 });
+  assert.equal(trace.ok, false);
+  assert.ok(trace.diagnostics.some((d) => d.code === "ENGINE_FATAL"));
+});
+
+test("a clean empty node trace stays ok:true and not running (no false alarms)", async () => {
+  const cmd = new DynamicCommand(fakeTracer(() => nodeCapture()));
+  const { trace } = await cmd.run({ target: TargetKind.Node, port: 9229 });
+  assert.equal(trace.ok, true);
+  assert.equal(trace.meta.running, undefined, "the final envelope is not flagged running");
+});