logic-md/packages/core/executor.ts at main · SingularityAI-Dev/logic-md · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
// =============================================================================
// LOGIC.md v1.1 - Dry-Run Executor
// =============================================================================
// Pure function executor that walks a compiled workflow without LLM calls,
// validating contracts, quality gates, and producing an execution trace.
// =============================================================================

import { compileWorkflow, estimateTokens } from "./compiler.js";
import { resolve } from "./dag.js";
import type { CompiledWorkflow, LogicSpec, RetryPolicy, Step, WorkflowContext } from "./types.js";

// =============================================================================
// Public Types
// =============================================================================

/** Options for dry-run execution */
export interface DryRunOptions {
	/** Mock inputs keyed by step name, or a single input for the first step */
	mockInputs?: Record<string, unknown>;
	/** Mock outputs keyed by step name — what each step "returns" */
	mockOutputs?: Record<string, unknown>;
	/** Whether to run quality gate checks against mock outputs */
	validateGates?: boolean;
}

/** Execution trace for a single step */
export interface StepTrace {
	/** Step name */
	stepName: string;
	/** DAG depth level (0 = root) */
	dagLevel: number;
	/** Execution status */
	status: "executed" | "skipped" | "failed";
	/** Estimated token count for the system prompt segment */
	promptSegmentLength: number;
	/** Output schema for the step, or null if not defined */
	outputSchema: object | null;
	/** Quality gate check results */
	qualityGateResults: Array<{ passed: boolean; message?: string }>;
	/** Contract violations if output validation failed */
	contractViolations: string[];
	/** Retry policy for this step, or null if not configured */
	retryPolicy: RetryPolicy | null;
	/** Estimated token count for this step */
	tokenEstimate: number;
}

/** Complete dry-run result with execution trace and metadata */
export interface DryRunResult {
	/** Whether the dry-run succeeded */
	ok: boolean;
	/** Spec name */
	specName: string;
	/** Total number of steps in the workflow */
	totalSteps: number;
	/** Total number of DAG levels */
	totalLevels: number;
	/** Flat execution order (all steps) */
	executionOrder: string[];
	/** Steps grouped by DAG level */
	dagLevels: string[][];
	/** Execution trace for each step */
	steps: StepTrace[];
	/** Global quality gate results */
	globalGateResults: Array<{ passed: boolean; message?: string }>;
	/** Warnings (missing mocks, token budget exceeded, etc.) */
	warnings: string[];
	/** Errors encountered during dry-run */
	errors: string[];
}

// =============================================================================
// Internal Helpers
// =============================================================================

/**
 * Validate that a mock output matches the expected schema.
 * Returns an array of violation messages (empty if valid).
 */
function validateAgainstSchema(output: unknown, schema: object | null): string[] {
	const violations: string[] = [];

	if (!schema) {
		return violations; // no schema = no violations
	}

	const schemaObj = schema as Record<string, unknown>;

	// Check required fields
	if (schemaObj.required && Array.isArray(schemaObj.required)) {
		if (typeof output !== "object" || output === null) {
			violations.push(
				`Output is not an object, but schema requires fields: ${(schemaObj.required as string[]).join(", ")}`,
			);
		} else {
			const outputObj = output as Record<string, unknown>;
			for (const field of schemaObj.required as string[]) {
				if (!(field in outputObj)) {
					violations.push(`Missing required field: "${field}"`);
				}
			}
		}
	}

	// Check type constraint
	if (schemaObj.type) {
		let actualType: string = typeof output;
		if (output === null) actualType = "null";
		else if (Array.isArray(output)) actualType = "array";

		const expectedType = schemaObj.type as string;
		if (expectedType !== actualType) {
			violations.push(`Expected type "${expectedType}", got "${actualType}"`);
		}
	}

	return violations;
}

/**
 * Execute quality gates against a mock output.
 * Returns array of gate results.
 */
function executeQualityGates(
	gates: Array<{ check: string; message?: string }>,
	output: unknown,
): Array<{ passed: boolean; message?: string }> {
	const results: Array<{ passed: boolean; message?: string }> = [];

	for (const gate of gates) {
		// In dry-run, we can't evaluate expressions without an expression engine context.
		// We record the gate as a placeholder -- in reality, this would use the
		// expression evaluator with `{ output }` context.
		results.push({
			passed: true, // optimistic: assume gates pass with valid mock output
			message: gate.message,
		});
	}

	return results;
}

// =============================================================================
// Public API
// =============================================================================

/**
 * Execute a dry-run of a workflow, walking through steps without LLM calls.
 *
 * Pure function -- no I/O, no side effects. Returns a trace of what would happen.
 */
export function dryRun(spec: LogicSpec, options: DryRunOptions = {}): DryRunResult {
	const result: DryRunResult = {
		ok: true,
		specName: spec.name,
		totalSteps: 0,
		totalLevels: 0,
		executionOrder: [],
		dagLevels: [],
		steps: [],
		globalGateResults: [],
		warnings: [],
		errors: [],
	};

	// Validate that we have steps
	if (!spec.steps || Object.keys(spec.steps).length === 0) {
		result.totalSteps = 0;
		result.totalLevels = 0;
		result.ok = true;
		return result;
	}

	// Resolve DAG to get execution order and levels
	const dagResult = resolve(spec.steps);
	if (!dagResult.ok) {
		result.ok = false;
		for (const err of dagResult.errors) {
			result.errors.push(`${err.type}: ${err.message}`);
		}
		return result;
	}

	result.totalSteps = Object.keys(spec.steps).length;
	result.totalLevels = dagResult.levels.length;
	result.executionOrder = dagResult.order;
	result.dagLevels = dagResult.levels;

	// Create WorkflowContext for compilation
	const workflowContext: WorkflowContext = {
		currentStep: "", // will be set per step
		previousOutputs: options.mockOutputs ?? {},
		input: null,
		attemptNumber: 1,
		branchReason: null,
		previousFailureReason: null,
		totalSteps: result.totalSteps,
		completedSteps: [],
		dagLevels: dagResult.levels,
	};

	// Compile the workflow
	let compiled: CompiledWorkflow;
	try {
		compiled = compileWorkflow(spec, workflowContext);
	} catch (err) {
		result.ok = false;
		result.errors.push(`Compilation failed: ${err instanceof Error ? err.message : String(err)}`);
		return result;
	}

	// Build a map of step name to CompiledStep
	const stepMap = new Map<string, (typeof compiled.steps)[0]>();
	for (const compiledStep of compiled.steps) {
		stepMap.set(compiledStep.metadata.stepName, compiledStep);
	}

	// Build a map of step name to original Step
	const originalStepMap = spec.steps;

	// Track previous outputs for context
	const previousOutputs: Record<string, unknown> = options.mockOutputs ?? {};

	// Walk through steps in DAG order
	for (const stepName of dagResult.order) {
		const compiledStep = stepMap.get(stepName);
		const originalStep = originalStepMap[stepName];

		if (!compiledStep || !originalStep) {
			result.errors.push(`Step "${stepName}" not found in compilation or spec`);
			result.ok = false;
			continue;
		}

		const dagLevel = compiledStep.metadata.dagLevel;
		const tokenEstimate = estimateTokens(compiledStep.systemPromptSegment);

		// Check if we have a mock output for this step
		const hasMockOutput = stepName in previousOutputs;
		const mockOutput = previousOutputs[stepName];

		const trace: StepTrace = {
			stepName,
			dagLevel,
			status: hasMockOutput ? "executed" : "skipped",
			promptSegmentLength: compiledStep.systemPromptSegment.length,
			outputSchema: compiledStep.outputSchema,
			qualityGateResults: [],
			contractViolations: [],
			retryPolicy: compiledStep.retryPolicy,
			tokenEstimate,
		};

		// If validateGates is enabled and we have a mock output, run quality gates
		if (options.validateGates && hasMockOutput) {
			// Validate against output schema
			const violations = validateAgainstSchema(mockOutput, compiledStep.outputSchema);
			trace.contractViolations = violations;

			// Execute quality gate checks
			const gateDefinitions: Array<{ check: string; message?: string }> = [];

			if (originalStep.verification) {
				gateDefinitions.push({
					check: originalStep.verification.check,
					message: originalStep.verification.on_fail_message,
				});
			}

			if (spec.quality_gates?.pre_output) {
				for (const gate of spec.quality_gates.pre_output) {
					gateDefinitions.push({ check: gate.check, message: gate.message });
				}
			}

			trace.qualityGateResults = executeQualityGates(gateDefinitions, mockOutput);

			// Check for failures
			const anyGateFailed = trace.qualityGateResults.some((r) => !r.passed);
			if (anyGateFailed || violations.length > 0) {
				trace.status = "failed";
				result.ok = false;
			}
		} else if (!hasMockOutput && options.validateGates) {
			// Warn about missing mock output
			result.warnings.push(`Step "${stepName}" has no mock output for validation`);
		}

		// Warn if token estimate exceeds budget
		if (tokenEstimate > 2000) {
			result.warnings.push(
				`Step "${stepName}" prompt segment is ~${tokenEstimate} tokens (exceeds 2000 token budget)`,
			);
		}

		result.steps.push(trace);
	}

	// Execute global quality gates
	const globalGateDefinitions: Array<{ check: string; message?: string }> = [];
	if (spec.quality_gates?.pre_output) {
		for (const gate of spec.quality_gates.pre_output) {
			globalGateDefinitions.push({ check: gate.check, message: gate.message });
		}
	}

	if (globalGateDefinitions.length > 0 && options.validateGates) {
		// Use a synthetic output combining all mock outputs
		const combinedOutput = previousOutputs;
		result.globalGateResults = executeQualityGates(globalGateDefinitions, combinedOutput);

		const anyGlobalGateFailed = result.globalGateResults.some((r) => !r.passed);
		if (anyGlobalGateFailed) {
			result.ok = false;
		}
	}

	return result;
}