Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#
# Bringing the stack up runs the whole end-to-end demo too (Node + React + Chrome targets, all in
# one demo container), no host setup — `docker compose up --build` starts everything; `docker compose down` stops it all:
# docker compose up --build # then watch all 3 traces (incl. the React video) at :4747
# docker compose up --build # then watch all 3 traces (incl. the React video) at :14747
services:
dashboard:
build: .
Expand Down
22 changes: 18 additions & 4 deletions src/cli/Cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { ManifestCommand } from "./commands/ManifestCommand.js";
import { SchemaCommand } from "./commands/SchemaCommand.js";
import { ServeCommand } from "./commands/ServeCommand.js";
import { Tracer } from "../engine/Tracer.js";
import { Collector } from "../collector/Collector.js";
import { Collector, type EmitResult } from "../collector/Collector.js";
import { S3ArtifactStore } from "../storage/S3ArtifactStore.js";
import { VERSION } from "../shared/version.js";
import { TargetKind } from "../domain/Target.js";
Expand Down Expand Up @@ -120,7 +120,10 @@ export class Cli {
// dashboard updates live as it runs.
const collector = await Collector.resolve(options.emit);
let emitChain: Promise<unknown> = Promise.resolve();
const emitToCollector = collector ? (envelope: unknown) => { emitChain = emitChain.then(() => Collector.emit(collector, envelope).catch(() => false)); } : undefined;
const emitFailures: EmitResult[] = [];
const emitToCollector = collector
? (envelope: unknown) => { emitChain = emitChain.then(async () => { const result = await Collector.emit(collector, envelope); if (!result.ok) emitFailures.push(result); }); }
: undefined;

const { trace } = await this.#dynamic.run({
Comment on lines +124 to 128
target, port, launch,
Expand All @@ -132,8 +135,19 @@ export class Cli {
...(emitToCollector ? { onProgress: (intermediateTrace: Trace) => emitToCollector(intermediateTrace.toJSON()) } : {}),
});

// Flush the final (complete) envelope and all pending emits BEFORE rendering, so a rejected emit
// (a 400 schema error, a 503 dead store) becomes a visible diagnostic in the printed/--json envelope
// instead of vanishing into an info log — the gap that sent a debugging loop chasing the wrong cause.
if (emitToCollector) {
emitToCollector(trace.toJSON());
await emitChain;
if (emitFailures.length) {
const last = emitFailures[emitFailures.length - 1];
const reason = last.status ? `HTTP ${last.status}${last.body ? ` — ${last.body.slice(0, 200)}` : ""}` : (last.error ?? "unknown error");
trace.diagnostics.push(Diagnostic.warn(Code.EMIT, `collector ${collector} rejected ${emitFailures.length} emit(s): ${reason}`));
}
}
emit(trace, () => this.#dynamic.render(trace), options);
if (emitToCollector) { emitToCollector(trace.toJSON()); await emitChain; } // final, complete envelope; then flush all pending emits
process.exit(trace.hasErrors() ? 1 : 0);
}

Expand Down Expand Up @@ -233,7 +247,7 @@ export class Cli {
.option("--output <mp4>", "chrome: output path for the screen + trace-panel recording (default: a temp file)")
.option("--emit <url>", "stream the trace to a collector (POST /v1/traces) — the session appears live as it runs (default: env TRACE_COLLECTOR_URL, else a locally-running collector is auto-detected)")
.option("--json [path]", "envelope as JSON: to a file if a path is given, else to stdout")
.option("--concise", "trim the --json envelope for token-tight agent reads: per hit, locals collapse to key names and the call stack keeps its top 2 frames (watched --expression values, location & timing kept). Re-run --detailed for everything.")
.option("--concise", "trim the PRINTED --json envelope (stdout/file) for token-tight agent reads: per hit, locals collapse to key names and the call stack keeps its top 2 frames (watched --expression values, location & timing kept). Does NOT affect --emit — the collector always receives the full envelope. Re-run --detailed for everything.")
.option("--detailed", "full --json envelope: every local's value and the complete call stack at each hit (the default)")
.action((options) => this.#runDynamic(options));

Expand Down
7 changes: 7 additions & 0 deletions src/cli/commands/DynamicCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ export class DynamicCommand extends TraceCommand<DynamicRequest, DynamicResult>
for (const breakpoint of capture.breakpoints.filter((breakpoint) => !breakpoint.bound)) {
diagnostics.push(Diagnostic.warn(Code.BP_UNBOUND, `${breakpoint.file}:${breakpoint.line} did not bind${breakpoint.note ? " — " + breakpoint.note : ""}`));
}
// Bound-but-unhit: the breakpoint attached but no event fired. Without this an agent reading `--json`
// sees only `bound:true, events:[]` with no diagnostic and has to guess "no trigger" vs "wrong line" —
// exactly the fork that stalls a debugging loop. Mirror the human renderer's "no breakpoints hit" line.
const boundCount = capture.breakpoints.filter((breakpoint) => breakpoint.bound).length;
if (boundCount > 0 && capture.events.length === 0) {
diagnostics.push(Diagnostic.warn(Code.BP_BOUND_UNHIT, `${boundCount} breakpoint(s) bound but never hit — the trigger may not have exercised this path (wrong route/branch, or the trigger didn't run).`));
}
const lineage = LineageAnalyzer.compute(capture.events);
const data = new TraceData({
breakpoints: capture.breakpoints,
Expand Down
6 changes: 5 additions & 1 deletion src/cli/commands/ServeCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ export class ServeCommand extends CliCommand<ServeOptions, void> {
throw new Error(`dashboard not built — run \`npm run build\` (expected ${SERVER_ENTRY}).`);
}

log.info("starting dashboard", { url: `http://localhost:${port}`, host });
// Print the exact emit URL to copy. Auto-discovery only probes the well-known ports (14747 / the 4000
// default); on any other --port a run must point at this URL explicitly, so don't make the user guess it.
const collectorUrl = `http://localhost:${port}`;
log.info("starting dashboard", { url: collectorUrl, host });
log.info("stream traces here", { hint: `trace dynamic … --emit ${collectorUrl} (or: export TRACE_COLLECTOR_URL=${collectorUrl})` });
Comment on lines +40 to +42
const child = spawn(process.execPath, [SERVER_ENTRY], {
stdio: "inherit",
env: { ...process.env, PORT: String(port), HOSTNAME: host, DATABASE_URL: databaseUrl },
Expand Down
22 changes: 18 additions & 4 deletions src/collector/Collector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ import { Code } from "../shared/codes.js";
const log = logger.child({ component: "collector" });
const CT_JSON = "application/json";

/**
* The outcome of one emit. `ok` mirrors the old boolean; `status`/`body` (on an HTTP rejection) and `error`
* (on a network/throw failure) let the caller surface *why* an emit failed — into the envelope's diagnostics
* and the logs — instead of silently dropping it (a 400 used to log at `info` and the bool was swallowed).
*/
export interface EmitResult { ok: boolean; status?: number; body?: string; error?: string }

/** Well-known local collector URLs probed for auto-discovery, in priority order: the docker/dashboard port
* (14747, the compose-published host port from README/compose/scenarios), then the native `trace serve` default (4000). */
const DEFAULT_CANDIDATES = ["http://localhost:14747", "http://localhost:4000"];
Expand All @@ -19,15 +26,22 @@ const PROBE_TIMEOUT_MS = 500;
*/
export class Collector {
/** POST an envelope to a collector's /v1/traces (used when TRACE_COLLECTOR_URL / --emit is set). */
static async emit(url: string, envelope: unknown): Promise<boolean> {
static async emit(url: string, envelope: unknown): Promise<EmitResult> {
const endpoint = url.replace(/\/+$/, "") + "/v1/traces";
try {
const response = await fetch(endpoint, { method: "POST", headers: { "content-type": CT_JSON }, body: JSON.stringify(envelope) });
log.info("emitted envelope", { endpoint, status: response.status });
return response.ok;
if (response.ok) {
log.info("emitted envelope", { endpoint, status: response.status });
return { ok: true, status: response.status };
}
// A rejection is a real failure — log it at error with the collector's reason (e.g. "invalid envelope",
// "trace store unavailable"), not at info. The caller folds this into the envelope's diagnostics.
const body = await response.text().catch(() => "");
log.error("emit rejected", { code: Code.EMIT, endpoint, status: response.status, body: body.slice(0, 500) });
return { ok: false, status: response.status, body };
Comment on lines +37 to +41
} catch (error: any) {
log.error("emit failed", { code: Code.EMIT, endpoint, err: error });
return false;
return { ok: false, error: String(error?.message ?? error) };
}
}

Expand Down
1 change: 1 addition & 0 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ export type { StoreOptions } from "./collector/createSessionStore.js";
export { summarize } from "./collector/SessionStore.js";
export type { SessionStore, SessionSummary, EnvelopePlain } from "./collector/SessionStore.js";
export { Trace } from "./domain/Trace.js";
export { Code } from "./shared/codes.js"; // shared issue-code vocabulary, so ingest responses carry the same codes as CLI diagnostics
3 changes: 3 additions & 0 deletions src/shared/codes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ export const Code = {
STEP_FAILED: "STEP_FAILED",
/** a requested breakpoint never bound at the target (bad file:line, or that line never loads) */
BP_UNBOUND: "BP_UNBOUND",
/** breakpoint(s) bound but no event fired — the trigger likely didn't exercise this path (the JSON
* counterpart of the renderer's "no breakpoints hit" line, so a `--json` reader sees the why too) */
BP_BOUND_UNHIT: "BP_BOUND_UNHIT",
/** writing the Chrome screen + trace-panel recording failed */
RECORD: "RECORD_FAILED",
/** nothing was captured to record (no frames) — usually no breakpoint hit during the journey */
Expand Down
11 changes: 11 additions & 0 deletions test/output.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import assert from "node:assert/strict";

import { condense } from "../dist/cli/Cli.js";
import { Code } from "../dist/shared/codes.js";
import { Collector } from "../dist/collector/Collector.js";

/** A trace-shaped plain envelope with two breakpoint hits (full locals + a 5-frame stack + a watched expr). */
function fullEnvelope() {
Expand Down Expand Up @@ -68,5 +69,15 @@ test("Code registry: stable, unique, greppable values shared by both channels",
// the envelope's existing diagnostic codes are part of the one vocabulary
assert.equal(Code.CODEGRAPH_FAILED, "CODEGRAPH_FAILED");
assert.equal(Code.BP_UNBOUND, "BP_UNBOUND");
assert.equal(Code.BP_BOUND_UNHIT, "BP_BOUND_UNHIT");
assert.equal(Code.SCHEMA, "E_SCHEMA");
});

test("Collector.emit: a failed POST resolves to a rich result (never throws, never a bare bool)", async () => {
// Port 1 is unbound → fetch rejects (connection refused). emit must catch it and return a structured
// result so the caller can surface *why* an emit failed instead of swallowing a `false`.
const result = await Collector.emit("http://127.0.0.1:1", { tool: "trace" });
assert.equal(result.ok, false, "a refused emit is not ok");
assert.equal(typeof result.error, "string", "the failure reason is carried back, not dropped");
assert.ok(result.error.length > 0, "error message is non-empty");
});
33 changes: 23 additions & 10 deletions ui/app/v1/traces/route.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { NextResponse } from "next/server";
import { getStore } from "@/lib/store";
import { Trace } from "trace-cli/server";
import { Trace, Code } from "trace-cli/server";

export const runtime = "nodejs";
export const dynamic = "force-dynamic";
Expand All @@ -13,29 +13,42 @@ const MAX_BYTES = 64 * 1024 * 1024;
* contract the CLI emits to (`Collector.emit` → `<url>/v1/traces`). The ingress boundary: hydrate untrusted
* JSON into a Trace and enforce the envelope contract (`validate()`) before it touches the store. Reuses the
* CLI's domain so validation + persistence are identical to a direct `trace … --emit` against the old collector.
*
* Status codes separate fault domains so a caller can tell *whose* problem it is — and never has to string-match:
* • 4xx + a `code` ⇒ the CALLER's envelope is wrong (too large / malformed JSON / schema-invalid / no sessionId)
* • 503 STORE_UNAVAILABLE ⇒ the SERVER's store (Postgres) is down or misconfigured — a transient infra fault,
* NOT a bad envelope. Previously every failure (including a dead DB) collapsed to one 400, which made an
* infra outage indistinguishable from a schema error. The internal error is logged, never returned.
*/
export async function POST(request: Request) {
const declared = Number(request.headers.get("content-length") ?? 0);
if (declared > MAX_BYTES) return NextResponse.json({ error: "envelope too large" }, { status: 413 });
if (declared > MAX_BYTES) return NextResponse.json({ error: "envelope too large", code: Code.INGEST }, { status: 413 });

let body: string;
try {
body = await request.text();
} catch (error) {
return NextResponse.json({ error: (error as Error).message }, { status: 400 });
return NextResponse.json({ error: (error as Error).message, code: Code.INGEST }, { status: 400 });
}
if (body.length > MAX_BYTES) return NextResponse.json({ error: "envelope too large" }, { status: 413 });
if (body.length > MAX_BYTES) return NextResponse.json({ error: "envelope too large", code: Code.INGEST }, { status: 413 });

// Parse + validate the untrusted envelope. Any failure here is the caller's fault → 4xx.
let envelope: Trace;
try {
const envelope = Trace.fromPlain(JSON.parse(body));
const problems = envelope.validate();
if (problems.length) return NextResponse.json({ error: "invalid envelope", problems }, { status: 400 });
envelope = Trace.fromPlain(JSON.parse(body));
} catch (error) {
return NextResponse.json({ error: (error as Error).message, code: Code.INGEST }, { status: 400 });
}
const problems = envelope.validate();
if (problems.length) return NextResponse.json({ error: "invalid envelope", code: Code.INGEST_INVALID, problems }, { status: 400 });

// Persist. A failure here is the server's fault (store/DB) → 503, with the real cause logged, not leaked.
try {
const summary = await getStore().ingest(envelope.toJSON());
if (!summary) return NextResponse.json({ error: "envelope has no meta.sessionId" }, { status: 400 });

if (!summary) return NextResponse.json({ error: "envelope has no meta.sessionId", code: Code.INGEST_NO_SESSION }, { status: 400 });
return NextResponse.json({ ok: true, sessionId: summary.sessionId });
} catch (error) {
return NextResponse.json({ error: (error as Error).message }, { status: 400 });
console.error("[collector] ingest failed — trace store unavailable", error);
return NextResponse.json({ error: "trace store unavailable", code: Code.STORE }, { status: 503 });
}
}