From f53ae3dd04b6d8790a3c7e5a602fd0462cfc014b Mon Sep 17 00:00:00 2001 From: Hunter B Date: Fri, 5 Jun 2026 20:54:09 -0700 Subject: [PATCH] feat(whaleflow): add typed workflow spec IR Add the explicit WorkflowSpec/WorkflowNode metadata surface requested for the v0.9 WhaleFlow IR, including budget, permission, model, and promotion policy records plus serde roundtrip coverage. Runtime execution, replay, and worktree application remain out of scope. Refs #2668, #2482, #2486. Co-authored-by: AdityaVG13 <44177453+AdityaVG13@users.noreply.github.com> --- CHANGELOG.md | 7 +- crates/tui/CHANGELOG.md | 7 +- crates/whaleflow/src/lib.rs | 306 ++++++++++++++++++++++++++++++++++++ 3 files changed, 314 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 778ec00ef..b13a9e1cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,9 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 config/IR validation and deterministic phase ordering tests. This preserves the WhaleFlow direction from #2482/#2486 without exposing a runtime `workflow_run` tool until cancellation, replay, and worktree semantics are - release-safe. The foundation now includes serializable branch, leaf, and - control-node result records toward the #2668 TraceStore contract. Thanks - @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. + release-safe. The foundation now includes explicit `WorkflowSpec`, + `WorkflowNode`, branch/leaf/policy metadata structs, plus serializable branch, + leaf, and control-node result records toward the #2668 TraceStore contract. + Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The migration creates persistence shape only; workflow execution and replay diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 778ec00ef..b13a9e1cf 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -37,9 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 config/IR validation and deterministic phase ordering tests. This preserves the WhaleFlow direction from #2482/#2486 without exposing a runtime `workflow_run` tool until cancellation, replay, and worktree semantics are - release-safe. The foundation now includes serializable branch, leaf, and - control-node result records toward the #2668 TraceStore contract. Thanks - @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. + release-safe. The foundation now includes explicit `WorkflowSpec`, + `WorkflowNode`, branch/leaf/policy metadata structs, plus serializable branch, + leaf, and control-node result records toward the #2668 TraceStore contract. + Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The migration creates persistence shape only; workflow execution and replay diff --git a/crates/whaleflow/src/lib.rs b/crates/whaleflow/src/lib.rs index 8502db149..9417eb169 100644 --- a/crates/whaleflow/src/lib.rs +++ b/crates/whaleflow/src/lib.rs @@ -31,6 +31,183 @@ impl WorkflowConfig { } } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct WorkflowSpec { + #[serde(default)] + pub id: Option, + pub goal: String, + #[serde(default)] + pub description: Option, + #[serde(default)] + pub budget: BudgetSpec, + #[serde(default)] + pub permissions: PermissionSpec, + #[serde(default)] + pub model_policy: ModelPolicy, + #[serde(default)] + pub promotion_policy: PromotionPolicy, + #[serde(default)] + pub nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", content = "spec", rename_all = "snake_case")] +pub enum WorkflowNode { + BranchSet(BranchSpec), + Leaf(LeafSpec), + Sequence(SequenceSpec), + Reduce(ReduceSpec), + TeacherReview(TeacherReviewSpec), + LoopUntil(LoopUntilSpec), + Cond(CondSpec), + Expand(ExpandSpec), +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct BranchSpec { + pub id: String, + #[serde(default)] + pub description: Option, + #[serde(default)] + pub parallel: bool, + #[serde(default)] + pub budget: BudgetSpec, + #[serde(default)] + pub permissions: PermissionSpec, + #[serde(default)] + pub model_policy: ModelPolicy, + #[serde(default)] + pub children: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct LeafSpec { + pub id: String, + pub prompt: String, + #[serde(default)] + pub agent_type: AgentType, + #[serde(default)] + pub mode: TaskMode, + #[serde(default)] + pub isolation: IsolationMode, + #[serde(default)] + pub file_scope: Vec, + #[serde(default)] + pub depends_on_results: Vec, + #[serde(default)] + pub budget: BudgetSpec, + #[serde(default)] + pub permissions: PermissionSpec, + #[serde(default)] + pub model_policy: ModelPolicy, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SequenceSpec { + pub id: String, + #[serde(default)] + pub children: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ReduceSpec { + pub id: String, + #[serde(default)] + pub inputs: Vec, + pub prompt: String, + #[serde(default)] + pub model_policy: ModelPolicy, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TeacherReviewSpec { + pub id: String, + #[serde(default)] + pub candidates: Vec, + #[serde(default)] + pub promotion_policy: PromotionPolicy, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct LoopUntilSpec { + pub id: String, + pub condition: String, + #[serde(default)] + pub max_iterations: Option, + #[serde(default)] + pub children: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CondSpec { + pub id: String, + pub condition: String, + #[serde(default)] + pub then_nodes: Vec, + #[serde(default)] + pub else_nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ExpandSpec { + pub id: String, + pub source: String, + #[serde(default)] + pub template: Option>, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct BudgetSpec { + #[serde(default)] + pub max_steps: Option, + #[serde(default)] + pub timeout_secs: Option, + #[serde(default)] + pub max_parallel: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct PermissionSpec { + #[serde(default)] + pub allow_write: bool, + #[serde(default)] + pub allow_network: bool, + #[serde(default)] + pub allowed_tools: Vec, + #[serde(default)] + pub file_scope: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct ModelPolicy { + #[serde(default)] + pub provider: Option, + #[serde(default)] + pub model: Option, + #[serde(default)] + pub fallback_models: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct PromotionPolicy { + #[serde(default)] + pub strategy: PromotionStrategy, + #[serde(default)] + pub require_teacher_review: bool, + #[serde(default)] + pub min_successful_branches: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum PromotionStrategy { + #[default] + All, + FirstSuccess, + BestScore, + TeacherSelected, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct WorkflowPlan { goal: String, @@ -769,6 +946,135 @@ mod tests { assert_eq!(parsed, workflow); } + #[test] + fn workflow_ir_roundtrip() { + let discover_leaf = LeafSpec { + id: "scan-readme".to_string(), + prompt: "Inspect README setup gaps".to_string(), + agent_type: AgentType::Explore, + mode: TaskMode::ReadOnly, + isolation: IsolationMode::Shared, + file_scope: vec!["README.md".to_string()], + depends_on_results: Vec::new(), + budget: BudgetSpec { + max_steps: Some(8), + timeout_secs: Some(300), + max_parallel: None, + }, + permissions: PermissionSpec::default(), + model_policy: ModelPolicy { + provider: Some("openai".to_string()), + model: Some("gpt-5.4".to_string()), + fallback_models: Vec::new(), + }, + }; + let workflow = WorkflowSpec { + id: Some("v090-readme-check".to_string()), + goal: "tighten setup docs".to_string(), + description: Some("metadata-only typed WhaleFlow IR".to_string()), + budget: BudgetSpec { + max_steps: Some(30), + timeout_secs: Some(1_800), + max_parallel: Some(2), + }, + permissions: PermissionSpec { + allow_write: false, + allow_network: false, + allowed_tools: vec!["rg".to_string()], + file_scope: vec!["README.md".to_string()], + }, + model_policy: ModelPolicy { + provider: Some("openai".to_string()), + model: Some("gpt-5.4".to_string()), + fallback_models: vec!["gpt-5.4-mini".to_string()], + }, + promotion_policy: PromotionPolicy { + strategy: PromotionStrategy::TeacherSelected, + require_teacher_review: true, + min_successful_branches: Some(1), + }, + nodes: vec![ + WorkflowNode::BranchSet(BranchSpec { + id: "discover".to_string(), + description: Some("parallel doc inspection".to_string()), + parallel: true, + budget: BudgetSpec { + max_steps: Some(12), + timeout_secs: Some(600), + max_parallel: Some(2), + }, + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + children: vec![WorkflowNode::Leaf(discover_leaf)], + }), + WorkflowNode::Sequence(SequenceSpec { + id: "review-and-reduce".to_string(), + children: vec![ + WorkflowNode::TeacherReview(TeacherReviewSpec { + id: "select-best".to_string(), + candidates: vec!["scan-readme".to_string()], + promotion_policy: PromotionPolicy { + strategy: PromotionStrategy::BestScore, + require_teacher_review: true, + min_successful_branches: Some(1), + }, + }), + WorkflowNode::Reduce(ReduceSpec { + id: "summarize".to_string(), + inputs: vec!["scan-readme".to_string()], + prompt: "Summarize the smallest safe patch".to_string(), + model_policy: ModelPolicy::default(), + }), + ], + }), + WorkflowNode::Cond(CondSpec { + id: "maybe-expand".to_string(), + condition: "summary identifies multiple independent gaps".to_string(), + then_nodes: vec![WorkflowNode::Expand(ExpandSpec { + id: "split-followups".to_string(), + source: "summarize".to_string(), + template: Some(Box::new(WorkflowNode::Leaf(LeafSpec { + id: "followup-template".to_string(), + prompt: "Patch one independent gap".to_string(), + agent_type: AgentType::Implementer, + mode: TaskMode::ReadWrite, + isolation: IsolationMode::Worktree, + file_scope: vec!["README.md".to_string()], + depends_on_results: Vec::new(), + budget: BudgetSpec::default(), + permissions: PermissionSpec { + allow_write: true, + allow_network: false, + allowed_tools: Vec::new(), + file_scope: vec!["README.md".to_string()], + }, + model_policy: ModelPolicy::default(), + }))), + })], + else_nodes: vec![WorkflowNode::LoopUntil(LoopUntilSpec { + id: "verify-once".to_string(), + condition: "local verification passes".to_string(), + max_iterations: Some(1), + children: Vec::new(), + })], + }), + ], + }; + + let json = serde_json::to_string_pretty(&workflow).expect("serialize workflow ir"); + + assert!(json.contains("\"kind\": \"branch_set\"")); + assert!(json.contains("\"strategy\": \"teacher_selected\"")); + let parsed: WorkflowSpec = serde_json::from_str(&json).expect("parse workflow ir"); + assert_eq!(parsed, workflow); + + let minimal: WorkflowSpec = serde_json::from_str(r#"{"goal":"ship v0.9","nodes":[]}"#) + .expect("parse minimal workflow ir"); + assert_eq!(minimal.budget, BudgetSpec::default()); + assert_eq!(minimal.permissions, PermissionSpec::default()); + assert_eq!(minimal.model_policy, ModelPolicy::default()); + } + #[test] fn branch_result_serialization() { let result = BranchResult {