diff --git a/.agents/evolve/preferences.yaml.template b/.agents/evolve/preferences.yaml.template new file mode 100644 index 000000000..756f7a1a4 --- /dev/null +++ b/.agents/evolve/preferences.yaml.template @@ -0,0 +1,43 @@ +# /evolve per-repo preferences (template) +# +# Copy this file to `.agents/evolve/preferences.yaml` and edit. The actual +# preferences file is gitignored — each repo and operator picks their own +# physics for the autonomous /evolve loop. +# +# Resolution order: +# 1. defaults (built-in Go constants) +# 2. this file overrides defaults +# 3. CLI flag overrides this file (caller-applied) +# +# Invalid keys, types, or out-of-range values produce a startup error with +# file:line:column context. No silent fallback. +# +# Inspect the resolved state with: `ao evolve config --show` +# Or as JSON: `ao evolve config --show --json` + +# Schema version. Must equal 1 for this template. +schema_version: 1 + +# Default mode when /evolve is invoked without a mode flag. +# burst — single supervised cycle, then exit (default). +# loop — keep cycling until the queue stabilizes or a halt signal fires. +mode_default: burst + +# When does /evolve narrow from explore (scout) to exploit (productive)? +scope_filter: + # Number of productive cycles required before scope narrows. Range [1..100]. + productive_threshold: 5 + # Halt the loop on a streak of pure-scout (no productive change) cycles. + scout_streak_halt: true + +# If true, treat a missing or stale `recommended` pointer as an error rather +# than a soft warning. +recommended_pointer_strict: true + +# Filesystem flags that halt the loop. Relative paths from the repo root. +halt_signals: + - .agents/evolve/STOP + - .agents/evolve/KILL + +# When true, the generator skill applies layered templates during cycles. +generator_layers_enabled: true diff --git a/.gitignore b/.gitignore index a04b64ff8..60ea812c7 100644 --- a/.gitignore +++ b/.gitignore @@ -158,5 +158,13 @@ packs/ # AgentOps session artifacts .agents/ +# Re-include the per-repo evolve preferences template (soc-6svt). The template +# ships in the repo as a starter; the actual preferences.yaml stays per-operator +# and remains gitignored. We have to re-allowlist each ancestor directory after +# the unanchored `.agents/` re-exclude above — git won't recurse into an +# excluded directory to evaluate file-level negations. +!/.agents/ +!/.agents/evolve/ +!/.agents/evolve/preferences.yaml.template evals/workbench/scorecard-latest.json .doctor/ diff --git a/cli/cmd/ao/evolve_config.go b/cli/cmd/ao/evolve_config.go new file mode 100644 index 000000000..35e5c4a25 --- /dev/null +++ b/cli/cmd/ao/evolve_config.go @@ -0,0 +1,74 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/boshu2/agentops/cli/internal/evolve" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +var ( + evolveConfigShow bool + evolveConfigJSON bool +) + +var evolveConfigCmd = &cobra.Command{ + Use: "config", + Short: "Show per-repo /evolve preferences", + Long: `Display the resolved per-repo /evolve preferences. + +Reads .agents/evolve/preferences.yaml (gitignored per-repo) on top of the +built-in defaults. A missing file is not an error — defaults are shown. A +malformed file exits 1 with file:line:column context for operator triage. + +Resolution order (caller applies step 3): + 1. defaults (built-in Go constants) + 2. .agents/evolve/preferences.yaml + 3. CLI flag overrides + +Examples: + ao evolve config --show # YAML output (default when --show is set) + ao evolve config --show --json # JSON output`, + RunE: runEvolveConfig, +} + +func init() { + evolveConfigCmd.Flags().BoolVar(&evolveConfigShow, "show", false, "Print the resolved preferences (defaults + preferences.yaml)") + evolveConfigCmd.Flags().BoolVar(&evolveConfigJSON, "json", false, "Emit JSON instead of YAML") + evolveCmd.AddCommand(evolveConfigCmd) +} + +// runEvolveConfig loads preferences and prints them in YAML or JSON. +func runEvolveConfig(cmd *cobra.Command, _ []string) error { + if !evolveConfigShow { + return fmt.Errorf("ao evolve config: pass --show to print preferences") + } + prefs, err := evolve.Load(cmd.Context()) + if err != nil { + return err + } + return writeEvolvePrefs(cmd.OutOrStdout(), prefs, evolveConfigJSON) +} + +// writeEvolvePrefs serializes prefs to w as YAML or JSON depending on asJSON. +func writeEvolvePrefs(w io.Writer, prefs *evolve.Prefs, asJSON bool) error { + if asJSON { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + if err := enc.Encode(prefs); err != nil { + return fmt.Errorf("encode json: %w", err) + } + return nil + } + data, err := yaml.Marshal(prefs) + if err != nil { + return fmt.Errorf("encode yaml: %w", err) + } + if _, err := w.Write(data); err != nil { + return fmt.Errorf("write yaml: %w", err) + } + return nil +} diff --git a/cli/cmd/ao/evolve_config_test.go b/cli/cmd/ao/evolve_config_test.go new file mode 100644 index 000000000..6772f9e0f --- /dev/null +++ b/cli/cmd/ao/evolve_config_test.go @@ -0,0 +1,205 @@ +package main + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +// resetEvolveConfigFlags resets the package-level flags between subtests so +// state from a prior run doesn't leak. +func resetEvolveConfigFlags(t *testing.T) { + t.Helper() + t.Cleanup(func() { + evolveConfigShow = false + evolveConfigJSON = false + }) +} + +// writeEvolvePrefsFile writes contents to /.agents/evolve/preferences.yaml. +func writeEvolvePrefsFile(t *testing.T, dir, contents string) { + t.Helper() + full := filepath.Join(dir, ".agents", "evolve") + if err := os.MkdirAll(full, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + path := filepath.Join(full, "preferences.yaml") + if err := os.WriteFile(path, []byte(contents), 0o644); err != nil { + t.Fatalf("write: %v", err) + } +} + +// runEvolveConfigCmd executes `ao evolve config ` with a fresh +// stdout/stderr buffer and returns (stdout, stderr, err). +func runEvolveConfigCmd(t *testing.T, args ...string) (string, string, error) { + t.Helper() + var stdout, stderr bytes.Buffer + rootCmd.SetOut(&stdout) + rootCmd.SetErr(&stderr) + full := append([]string{"evolve", "config"}, args...) + rootCmd.SetArgs(full) + err := rootCmd.Execute() + rootCmd.SetOut(nil) + rootCmd.SetErr(nil) + return stdout.String(), stderr.String(), err +} + +func TestEvolveConfig_MissingFile_PrintsDefaults_YAML(t *testing.T) { + dir := chdirTemp(t) + _ = dir + resetEvolveConfigFlags(t) + + stdout, _, err := runEvolveConfigCmd(t, "--show") + if err != nil { + t.Fatalf("evolve config: %v", err) + } + + // Round-trip the YAML back into a map and assert the canonical default values. + var got map[string]any + if uerr := yaml.Unmarshal([]byte(stdout), &got); uerr != nil { + t.Fatalf("unmarshal yaml: %v\n--- output ---\n%s", uerr, stdout) + } + if v, _ := got["schema_version"].(int); v != 1 { + t.Fatalf("schema_version: want 1, got %v", got["schema_version"]) + } + if v, _ := got["mode_default"].(string); v != "burst" { + t.Fatalf("mode_default: want burst, got %v", got["mode_default"]) + } + sf, ok := got["scope_filter"].(map[string]any) + if !ok { + t.Fatalf("scope_filter missing or wrong type: %v", got["scope_filter"]) + } + if v, _ := sf["productive_threshold"].(int); v != 5 { + t.Fatalf("scope_filter.productive_threshold: want 5, got %v", sf["productive_threshold"]) + } + if v, _ := sf["scout_streak_halt"].(bool); v != true { + t.Fatalf("scope_filter.scout_streak_halt: want true, got %v", sf["scout_streak_halt"]) + } + if v, _ := got["recommended_pointer_strict"].(bool); v != true { + t.Fatalf("recommended_pointer_strict: want true, got %v", got["recommended_pointer_strict"]) + } + if v, _ := got["generator_layers_enabled"].(bool); v != true { + t.Fatalf("generator_layers_enabled: want true, got %v", got["generator_layers_enabled"]) + } + signals, ok := got["halt_signals"].([]any) + if !ok || len(signals) != 2 { + t.Fatalf("halt_signals: want list of len 2, got %v", got["halt_signals"]) + } + if s, _ := signals[0].(string); s != ".agents/evolve/STOP" { + t.Fatalf("halt_signals[0]: want .agents/evolve/STOP, got %v", signals[0]) + } +} + +func TestEvolveConfig_ValidFile_OverridesDefaults_YAML(t *testing.T) { + dir := chdirTemp(t) + resetEvolveConfigFlags(t) + writeEvolvePrefsFile(t, dir, `schema_version: 1 +mode_default: loop +scope_filter: + productive_threshold: 17 + scout_streak_halt: false +recommended_pointer_strict: false +halt_signals: + - .agents/evolve/STOP +generator_layers_enabled: false +`) + + stdout, _, err := runEvolveConfigCmd(t, "--show") + if err != nil { + t.Fatalf("evolve config: %v", err) + } + + var got map[string]any + if uerr := yaml.Unmarshal([]byte(stdout), &got); uerr != nil { + t.Fatalf("unmarshal yaml: %v\n--- output ---\n%s", uerr, stdout) + } + if v, _ := got["mode_default"].(string); v != "loop" { + t.Fatalf("mode_default: want loop, got %v", got["mode_default"]) + } + sf := got["scope_filter"].(map[string]any) + if v, _ := sf["productive_threshold"].(int); v != 17 { + t.Fatalf("productive_threshold: want 17, got %v", sf["productive_threshold"]) + } + if v, _ := sf["scout_streak_halt"].(bool); v != false { + t.Fatalf("scout_streak_halt: want false, got %v", sf["scout_streak_halt"]) + } + if v, _ := got["recommended_pointer_strict"].(bool); v != false { + t.Fatalf("recommended_pointer_strict: want false, got %v", got["recommended_pointer_strict"]) + } + if v, _ := got["generator_layers_enabled"].(bool); v != false { + t.Fatalf("generator_layers_enabled: want false, got %v", got["generator_layers_enabled"]) + } +} + +func TestEvolveConfig_JSONFlag_ProducesValidJSON(t *testing.T) { + dir := chdirTemp(t) + _ = dir + resetEvolveConfigFlags(t) + + stdout, _, err := runEvolveConfigCmd(t, "--show", "--json") + if err != nil { + t.Fatalf("evolve config --json: %v", err) + } + + var got map[string]any + if jerr := json.Unmarshal([]byte(stdout), &got); jerr != nil { + t.Fatalf("json.Unmarshal: %v\n--- output ---\n%s", jerr, stdout) + } + // JSON numbers come back as float64. + if v, _ := got["schema_version"].(float64); v != 1 { + t.Fatalf("schema_version: want 1, got %v", got["schema_version"]) + } + if v, _ := got["mode_default"].(string); v != "burst" { + t.Fatalf("mode_default: want burst, got %v", got["mode_default"]) + } + sf, ok := got["scope_filter"].(map[string]any) + if !ok { + t.Fatalf("scope_filter missing or wrong type: %v", got["scope_filter"]) + } + if v, _ := sf["productive_threshold"].(float64); v != 5 { + t.Fatalf("productive_threshold: want 5, got %v", sf["productive_threshold"]) + } +} + +func TestEvolveConfig_MalformedFile_ErrorsWithFileLineContext(t *testing.T) { + dir := chdirTemp(t) + resetEvolveConfigFlags(t) + writeEvolvePrefsFile(t, dir, `schema_version: 1 +scope_filter: + productive_threshold: "abc" +`) + + _, _, err := runEvolveConfigCmd(t, "--show") + if err == nil { + t.Fatal("expected error from malformed preferences.yaml, got nil") + } + msg := err.Error() + if !strings.Contains(msg, "preferences.yaml:") { + t.Errorf("error %q missing preferences.yaml: prefix (file:line context)", msg) + } + if !strings.Contains(msg, "scope_filter.productive_threshold") { + t.Errorf("error %q missing field name", msg) + } + if !strings.Contains(msg, "expected int") { + t.Errorf("error %q missing type-mismatch description", msg) + } +} + +func TestEvolveConfig_WithoutShowFlag_Errors(t *testing.T) { + dir := chdirTemp(t) + _ = dir + resetEvolveConfigFlags(t) + + _, _, err := runEvolveConfigCmd(t) + if err == nil { + t.Fatal("expected error when --show is not set, got nil") + } + if !strings.Contains(err.Error(), "--show") { + t.Errorf("error %q missing --show hint", err.Error()) + } +} diff --git a/cli/docs/COMMANDS.md b/cli/docs/COMMANDS.md index 3c2fb38cc..30a572725 100644 --- a/cli/docs/COMMANDS.md +++ b/cli/docs/COMMANDS.md @@ -1781,7 +1781,7 @@ ao eval task show [flags] Run the v2 autonomous improvement loop. ``` -ao evolve [goal] [flags] +ao evolve [command] ``` **Flags:** @@ -1825,6 +1825,24 @@ ao evolve [goal] [flags] --supervisor Enable autonomous supervisor mode (lease lock, self-heal, retries, gates, cleanup) (default true) ``` +**Subcommands:** + +#### `ao evolve config` + +Display the resolved per-repo /evolve preferences. + +``` +ao evolve config [flags] +``` + +**Flags:** + +``` + -h, --help help for config + --json Emit JSON instead of YAML + --show Print the resolved preferences (defaults + preferences.yaml) +``` + --- ### `ao factory` diff --git a/cli/internal/evolve/preferences.go b/cli/internal/evolve/preferences.go new file mode 100644 index 000000000..860bd2422 --- /dev/null +++ b/cli/internal/evolve/preferences.go @@ -0,0 +1,358 @@ +// Package evolve provides per-repo operator preferences for the autonomous +// /evolve loop. Preferences live in .agents/evolve/preferences.yaml. The +// resolution order is: +// +// 1. defaults (Go constants in Defaults()) +// 2. preferences.yaml overrides defaults +// 3. CLI flag overrides preferences.yaml (the caller applies this step) +// +// Invalid keys, types, or out-of-range values produce an error containing the +// preferences.yaml line:column for operator triage. There is no silent +// fallback. +package evolve + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// PreferencesRelPath is the location of the preferences file relative to a +// repo working directory. +const PreferencesRelPath = ".agents/evolve/preferences.yaml" + +// Prefs is the in-memory representation of evolve preferences after Load has +// merged defaults with the on-disk file. +type Prefs struct { + SchemaVersion int `yaml:"schema_version" json:"schema_version"` + ModeDefault string `yaml:"mode_default" json:"mode_default"` + ScopeFilter ScopeFilterPrefs `yaml:"scope_filter" json:"scope_filter"` + RecommendedPointerStrict bool `yaml:"recommended_pointer_strict" json:"recommended_pointer_strict"` + HaltSignals []string `yaml:"halt_signals" json:"halt_signals"` + GeneratorLayersEnabled bool `yaml:"generator_layers_enabled" json:"generator_layers_enabled"` +} + +// ScopeFilterPrefs controls when /evolve narrows scope from explore to exploit. +type ScopeFilterPrefs struct { + ProductiveThreshold int `yaml:"productive_threshold" json:"productive_threshold"` + ScoutStreakHalt bool `yaml:"scout_streak_halt" json:"scout_streak_halt"` +} + +// Defaults returns a fresh Prefs populated with the canonical defaults. The +// caller may mutate the returned value safely; HaltSignals is a fresh slice. +func Defaults() *Prefs { + return &Prefs{ + SchemaVersion: 1, + ModeDefault: "burst", + ScopeFilter: ScopeFilterPrefs{ + ProductiveThreshold: 5, + ScoutStreakHalt: true, + }, + RecommendedPointerStrict: true, + HaltSignals: []string{ + ".agents/evolve/STOP", + ".agents/evolve/KILL", + }, + GeneratorLayersEnabled: true, + } +} + +// Load reads .agents/evolve/preferences.yaml relative to the current working +// directory. A missing file is not an error; Defaults() is returned with a nil +// error. Malformed YAML or schema violations produce an error whose message +// includes "preferences.yaml:line:column" context. +func Load(ctx context.Context) (*Prefs, error) { + cwd, err := os.Getwd() + if err != nil { + return nil, fmt.Errorf("get working directory: %w", err) + } + return LoadFromDir(ctx, cwd) +} + +// LoadFromDir is the testable variant of Load. The path .agents/evolve/preferences.yaml +// is resolved relative to dir. +func LoadFromDir(ctx context.Context, dir string) (*Prefs, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + path := filepath.Join(dir, PreferencesRelPath) + data, err := os.ReadFile(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return Defaults(), nil + } + return nil, fmt.Errorf("read preferences file %s: %w", path, err) + } + return parsePreferences(path, data) +} + +// parsePreferences walks the YAML document via yaml.Node so we can attach +// line:column context to every validation error. +func parsePreferences(path string, data []byte) (*Prefs, error) { + var root yaml.Node + if err := yaml.Unmarshal(data, &root); err != nil { + return nil, fmt.Errorf("%s: parse yaml: %w", path, err) + } + prefs := Defaults() + if root.Kind == 0 { + // Empty file → defaults stand. + return prefs, nil + } + doc := &root + if doc.Kind == yaml.DocumentNode { + if len(doc.Content) == 0 { + return prefs, nil + } + doc = doc.Content[0] + } + if doc.Kind != yaml.MappingNode { + return nil, fmt.Errorf("%s:%d:%d: expected mapping at root, got %s", + path, doc.Line, doc.Column, yamlKind(doc)) + } + if err := applyMapping(path, doc, prefs); err != nil { + return nil, err + } + if err := validatePrefs(path, doc, prefs); err != nil { + return nil, err + } + return prefs, nil +} + +// applyMapping populates prefs from the top-level YAML mapping. +func applyMapping(path string, node *yaml.Node, prefs *Prefs) error { + for i := 0; i < len(node.Content); i += 2 { + k := node.Content[i] + v := node.Content[i+1] + switch k.Value { + case "schema_version": + n, err := requireInt(path, k.Value, v) + if err != nil { + return err + } + prefs.SchemaVersion = n + case "mode_default": + s, err := requireString(path, k.Value, v) + if err != nil { + return err + } + prefs.ModeDefault = s + case "scope_filter": + if err := applyScopeFilter(path, v, &prefs.ScopeFilter); err != nil { + return err + } + case "recommended_pointer_strict": + b, err := requireBool(path, k.Value, v) + if err != nil { + return err + } + prefs.RecommendedPointerStrict = b + case "halt_signals": + ss, err := requireStringList(path, k.Value, v) + if err != nil { + return err + } + prefs.HaltSignals = ss + case "generator_layers_enabled": + b, err := requireBool(path, k.Value, v) + if err != nil { + return err + } + prefs.GeneratorLayersEnabled = b + default: + return fmt.Errorf("%s:%d:%d: unknown key %q", + path, k.Line, k.Column, k.Value) + } + } + return nil +} + +// applyScopeFilter populates the nested ScopeFilter struct. +func applyScopeFilter(path string, node *yaml.Node, sf *ScopeFilterPrefs) error { + if node.Kind != yaml.MappingNode { + return fmt.Errorf("%s:%d:%d: scope_filter: expected mapping, got %s", + path, node.Line, node.Column, yamlKind(node)) + } + for i := 0; i < len(node.Content); i += 2 { + k := node.Content[i] + v := node.Content[i+1] + key := "scope_filter." + k.Value + switch k.Value { + case "productive_threshold": + n, err := requireInt(path, key, v) + if err != nil { + return err + } + sf.ProductiveThreshold = n + case "scout_streak_halt": + b, err := requireBool(path, key, v) + if err != nil { + return err + } + sf.ScoutStreakHalt = b + default: + return fmt.Errorf("%s:%d:%d: unknown key %q under scope_filter", + path, k.Line, k.Column, k.Value) + } + } + return nil +} + +// validatePrefs enforces the schema constraints that depend on the populated +// struct (range checks, enum membership). +func validatePrefs(path string, doc *yaml.Node, prefs *Prefs) error { + if prefs.SchemaVersion != 1 { + line, col := locateKey(doc, "schema_version") + return fmt.Errorf("%s:%d:%d: schema_version: expected 1, got %d", + path, line, col, prefs.SchemaVersion) + } + switch prefs.ModeDefault { + case "burst", "loop": + default: + line, col := locateKey(doc, "mode_default") + return fmt.Errorf("%s:%d:%d: mode_default: expected one of [burst, loop], got %q", + path, line, col, prefs.ModeDefault) + } + if prefs.ScopeFilter.ProductiveThreshold < 1 || prefs.ScopeFilter.ProductiveThreshold > 100 { + line, col := locateNestedKey(doc, "scope_filter", "productive_threshold") + return fmt.Errorf("%s:%d:%d: scope_filter.productive_threshold: expected int in [1..100], got %d", + path, line, col, prefs.ScopeFilter.ProductiveThreshold) + } + return nil +} + +// requireInt extracts an integer scalar; returns a typed error otherwise. +func requireInt(path, key string, v *yaml.Node) (int, error) { + if v.Kind != yaml.ScalarNode || (v.Tag != "" && v.Tag != "!!int") { + return 0, fmt.Errorf("%s:%d:%d: %s: expected int, got %s %q", + path, v.Line, v.Column, key, yamlScalarType(v), v.Value) + } + var n int + if err := v.Decode(&n); err != nil { + return 0, fmt.Errorf("%s:%d:%d: %s: expected int, got %q", + path, v.Line, v.Column, key, v.Value) + } + return n, nil +} + +// requireString extracts a string scalar. +func requireString(path, key string, v *yaml.Node) (string, error) { + if v.Kind != yaml.ScalarNode { + return "", fmt.Errorf("%s:%d:%d: %s: expected string, got %s", + path, v.Line, v.Column, key, yamlKind(v)) + } + if v.Tag != "" && v.Tag != "!!str" { + return "", fmt.Errorf("%s:%d:%d: %s: expected string, got %s %q", + path, v.Line, v.Column, key, yamlScalarType(v), v.Value) + } + return v.Value, nil +} + +// requireBool extracts a bool scalar. +func requireBool(path, key string, v *yaml.Node) (bool, error) { + if v.Kind != yaml.ScalarNode || (v.Tag != "" && v.Tag != "!!bool") { + return false, fmt.Errorf("%s:%d:%d: %s: expected bool, got %s %q", + path, v.Line, v.Column, key, yamlScalarType(v), v.Value) + } + var b bool + if err := v.Decode(&b); err != nil { + return false, fmt.Errorf("%s:%d:%d: %s: expected bool, got %q", + path, v.Line, v.Column, key, v.Value) + } + return b, nil +} + +// requireStringList extracts a sequence of strings. +func requireStringList(path, key string, v *yaml.Node) ([]string, error) { + if v.Kind != yaml.SequenceNode { + return nil, fmt.Errorf("%s:%d:%d: %s: expected list, got %s", + path, v.Line, v.Column, key, yamlKind(v)) + } + out := make([]string, 0, len(v.Content)) + for _, item := range v.Content { + s, err := requireString(path, key+"[]", item) + if err != nil { + return nil, err + } + out = append(out, s) + } + return out, nil +} + +// locateKey finds the (line, col) of a top-level key, or the document's +// position if the key isn't present. +func locateKey(doc *yaml.Node, key string) (int, int) { + if doc.Kind != yaml.MappingNode { + return doc.Line, doc.Column + } + for i := 0; i < len(doc.Content); i += 2 { + if doc.Content[i].Value == key { + return doc.Content[i].Line, doc.Content[i].Column + } + } + return doc.Line, doc.Column +} + +// locateNestedKey finds (line, col) for a child key under a top-level key. +func locateNestedKey(doc *yaml.Node, parent, child string) (int, int) { + if doc.Kind != yaml.MappingNode { + return doc.Line, doc.Column + } + for i := 0; i < len(doc.Content); i += 2 { + if doc.Content[i].Value != parent { + continue + } + v := doc.Content[i+1] + if v.Kind != yaml.MappingNode { + return v.Line, v.Column + } + for j := 0; j < len(v.Content); j += 2 { + if v.Content[j].Value == child { + return v.Content[j].Line, v.Content[j].Column + } + } + return v.Line, v.Column + } + return doc.Line, doc.Column +} + +// yamlKind returns a human-friendly name for a node kind. +func yamlKind(n *yaml.Node) string { + switch n.Kind { + case yaml.DocumentNode: + return "document" + case yaml.SequenceNode: + return "list" + case yaml.MappingNode: + return "mapping" + case yaml.ScalarNode: + return yamlScalarType(n) + case yaml.AliasNode: + return "alias" + default: + return "unknown" + } +} + +// yamlScalarType returns the friendly name of a scalar node's tag. +func yamlScalarType(n *yaml.Node) string { + switch n.Tag { + case "!!int": + return "int" + case "!!bool": + return "bool" + case "!!str": + return "string" + case "!!float": + return "float" + case "!!null": + return "null" + case "": + return "scalar" + default: + return n.Tag + } +} diff --git a/cli/internal/evolve/preferences_test.go b/cli/internal/evolve/preferences_test.go new file mode 100644 index 000000000..0e2978d53 --- /dev/null +++ b/cli/internal/evolve/preferences_test.go @@ -0,0 +1,266 @@ +package evolve + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" +) + +// writePrefs writes contents to /.agents/evolve/preferences.yaml. +func writePrefs(t *testing.T, dir, contents string) { + t.Helper() + full := filepath.Join(dir, ".agents", "evolve") + if err := os.MkdirAll(full, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + path := filepath.Join(full, "preferences.yaml") + if err := os.WriteFile(path, []byte(contents), 0o644); err != nil { + t.Fatalf("write: %v", err) + } +} + +func TestDefaults_KnownValues(t *testing.T) { + d := Defaults() + if d.SchemaVersion != 1 { + t.Fatalf("SchemaVersion: want 1, got %d", d.SchemaVersion) + } + if d.ModeDefault != "burst" { + t.Fatalf("ModeDefault: want burst, got %q", d.ModeDefault) + } + if d.ScopeFilter.ProductiveThreshold != 5 { + t.Fatalf("ProductiveThreshold: want 5, got %d", d.ScopeFilter.ProductiveThreshold) + } + if d.ScopeFilter.ScoutStreakHalt != true { + t.Fatalf("ScoutStreakHalt: want true, got %v", d.ScopeFilter.ScoutStreakHalt) + } + if d.RecommendedPointerStrict != true { + t.Fatalf("RecommendedPointerStrict: want true, got %v", d.RecommendedPointerStrict) + } + if len(d.HaltSignals) != 2 { + t.Fatalf("HaltSignals len: want 2, got %d", len(d.HaltSignals)) + } + if d.HaltSignals[0] != ".agents/evolve/STOP" { + t.Fatalf("HaltSignals[0]: want .agents/evolve/STOP, got %q", d.HaltSignals[0]) + } + if d.HaltSignals[1] != ".agents/evolve/KILL" { + t.Fatalf("HaltSignals[1]: want .agents/evolve/KILL, got %q", d.HaltSignals[1]) + } + if d.GeneratorLayersEnabled != true { + t.Fatalf("GeneratorLayersEnabled: want true, got %v", d.GeneratorLayersEnabled) + } +} + +func TestLoadFromDir_MissingFile_ReturnsDefaults(t *testing.T) { + dir := t.TempDir() + got, err := LoadFromDir(context.Background(), dir) + if err != nil { + t.Fatalf("LoadFromDir: %v", err) + } + want := Defaults() + if got.SchemaVersion != want.SchemaVersion { + t.Fatalf("SchemaVersion: want %d, got %d", want.SchemaVersion, got.SchemaVersion) + } + if got.ModeDefault != want.ModeDefault { + t.Fatalf("ModeDefault: want %q, got %q", want.ModeDefault, got.ModeDefault) + } + if got.ScopeFilter.ProductiveThreshold != want.ScopeFilter.ProductiveThreshold { + t.Fatalf("ProductiveThreshold: want %d, got %d", + want.ScopeFilter.ProductiveThreshold, got.ScopeFilter.ProductiveThreshold) + } +} + +func TestLoadFromDir_ValidFile_OverridesDefaults(t *testing.T) { + dir := t.TempDir() + writePrefs(t, dir, `schema_version: 1 +mode_default: loop +scope_filter: + productive_threshold: 12 + scout_streak_halt: false +recommended_pointer_strict: false +halt_signals: + - .agents/evolve/STOP + - .agents/evolve/CUSTOM +generator_layers_enabled: false +`) + got, err := LoadFromDir(context.Background(), dir) + if err != nil { + t.Fatalf("LoadFromDir: %v", err) + } + if got.ModeDefault != "loop" { + t.Fatalf("ModeDefault: want loop, got %q", got.ModeDefault) + } + if got.ScopeFilter.ProductiveThreshold != 12 { + t.Fatalf("ProductiveThreshold: want 12, got %d", got.ScopeFilter.ProductiveThreshold) + } + if got.ScopeFilter.ScoutStreakHalt != false { + t.Fatalf("ScoutStreakHalt: want false, got %v", got.ScopeFilter.ScoutStreakHalt) + } + if got.RecommendedPointerStrict != false { + t.Fatalf("RecommendedPointerStrict: want false, got %v", got.RecommendedPointerStrict) + } + if len(got.HaltSignals) != 2 { + t.Fatalf("HaltSignals len: want 2, got %d", len(got.HaltSignals)) + } + if got.HaltSignals[1] != ".agents/evolve/CUSTOM" { + t.Fatalf("HaltSignals[1]: want .agents/evolve/CUSTOM, got %q", got.HaltSignals[1]) + } + if got.GeneratorLayersEnabled != false { + t.Fatalf("GeneratorLayersEnabled: want false, got %v", got.GeneratorLayersEnabled) + } +} + +func TestLoadFromDir_PartialOverride_KeepsOtherDefaults(t *testing.T) { + dir := t.TempDir() + writePrefs(t, dir, `schema_version: 1 +mode_default: loop +`) + got, err := LoadFromDir(context.Background(), dir) + if err != nil { + t.Fatalf("LoadFromDir: %v", err) + } + if got.ModeDefault != "loop" { + t.Fatalf("ModeDefault: want loop, got %q", got.ModeDefault) + } + if got.ScopeFilter.ProductiveThreshold != 5 { + t.Fatalf("ProductiveThreshold default preserved: want 5, got %d", + got.ScopeFilter.ProductiveThreshold) + } + if got.RecommendedPointerStrict != true { + t.Fatalf("RecommendedPointerStrict default preserved: want true, got %v", + got.RecommendedPointerStrict) + } +} + +func TestLoadFromDir_Errors(t *testing.T) { + cases := []struct { + name string + contents string + wantSub []string // substrings expected in the error message + }{ + { + name: "malformed_yaml", + contents: "this is: not: valid: yaml: :", + wantSub: []string{"preferences.yaml", "parse yaml"}, + }, + { + name: "wrong_schema_version", + contents: `schema_version: 2 +mode_default: burst +`, + wantSub: []string{"preferences.yaml:1:", "schema_version: expected 1, got 2"}, + }, + { + name: "productive_threshold_not_int", + contents: `schema_version: 1 +scope_filter: + productive_threshold: "abc" +`, + wantSub: []string{"preferences.yaml:3:", "scope_filter.productive_threshold: expected int"}, + }, + { + name: "productive_threshold_out_of_range_low", + contents: `schema_version: 1 +scope_filter: + productive_threshold: 0 +`, + wantSub: []string{"preferences.yaml:3:", "scope_filter.productive_threshold: expected int in [1..100], got 0"}, + }, + { + name: "productive_threshold_out_of_range_high", + contents: `schema_version: 1 +scope_filter: + productive_threshold: 101 +`, + wantSub: []string{"preferences.yaml:3:", "scope_filter.productive_threshold: expected int in [1..100], got 101"}, + }, + { + name: "unknown_top_key", + contents: `schema_version: 1 +bogus_extra_key: hello +`, + wantSub: []string{"preferences.yaml:2:", `unknown key "bogus_extra_key"`}, + }, + { + name: "unknown_nested_key", + contents: `schema_version: 1 +scope_filter: + bogus_inner: 1 +`, + wantSub: []string{"preferences.yaml:3:", `unknown key "bogus_inner" under scope_filter`}, + }, + { + name: "wrong_mode", + contents: `schema_version: 1 +mode_default: turbo +`, + wantSub: []string{"preferences.yaml:2:", "mode_default: expected one of [burst, loop], got \"turbo\""}, + }, + { + name: "halt_signals_not_list", + contents: `schema_version: 1 +halt_signals: STOP +`, + wantSub: []string{"preferences.yaml:2:", "halt_signals: expected list"}, + }, + { + name: "scope_filter_not_mapping", + contents: `schema_version: 1 +scope_filter: nope +`, + wantSub: []string{"preferences.yaml:2:", "scope_filter: expected mapping"}, + }, + { + name: "generator_layers_not_bool", + contents: `schema_version: 1 +generator_layers_enabled: "yes please" +`, + wantSub: []string{"preferences.yaml:2:", "generator_layers_enabled: expected bool"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + dir := t.TempDir() + writePrefs(t, dir, tc.contents) + _, err := LoadFromDir(context.Background(), dir) + if err == nil { + t.Fatalf("expected error, got nil") + } + msg := err.Error() + for _, sub := range tc.wantSub { + if !strings.Contains(msg, sub) { + t.Fatalf("error %q missing expected substring %q", msg, sub) + } + } + }) + } +} + +func TestLoadFromDir_EmptyFile_ReturnsDefaults(t *testing.T) { + dir := t.TempDir() + writePrefs(t, dir, "") + got, err := LoadFromDir(context.Background(), dir) + if err != nil { + t.Fatalf("LoadFromDir: %v", err) + } + if got.ModeDefault != "burst" { + t.Fatalf("ModeDefault: want burst, got %q", got.ModeDefault) + } + if got.SchemaVersion != 1 { + t.Fatalf("SchemaVersion: want 1, got %d", got.SchemaVersion) + } +} + +func TestLoadFromDir_RootNotMapping_Errors(t *testing.T) { + dir := t.TempDir() + writePrefs(t, dir, "- a\n- b\n") + _, err := LoadFromDir(context.Background(), dir) + if err == nil { + t.Fatal("expected error for non-mapping root") + } + if !strings.Contains(err.Error(), "expected mapping at root") { + t.Fatalf("error %q missing 'expected mapping at root'", err.Error()) + } +} diff --git a/evals/agentops-core/cli-command-surface-matrix.json b/evals/agentops-core/cli-command-surface-matrix.json index 61300ef9c..73e3800c8 100644 --- a/evals/agentops-core/cli-command-surface-matrix.json +++ b/evals/agentops-core/cli-command-surface-matrix.json @@ -41,7 +41,7 @@ }, "expectations": [ {"type": "exit_code", "value": 0}, - {"type": "stdout_contains", "value": "cli-command-headings: top=73 sub=197 all=270"}, + {"type": "stdout_contains", "value": "cli-command-headings: top=73 sub=198 all=271"}, {"type": "stdout_contains", "value": "cli-help-matrix-ok"} ], "dimensions": ["correctness", "runtime_compatibility", "artifact_quality"], diff --git a/evals/agentops-core/fixtures/cli-command-surface-smoke.sh b/evals/agentops-core/fixtures/cli-command-surface-smoke.sh index f728628db..934ce844c 100755 --- a/evals/agentops-core/fixtures/cli-command-surface-smoke.sh +++ b/evals/agentops-core/fixtures/cli-command-surface-smoke.sh @@ -17,7 +17,7 @@ top_count="$(rg -c '^### `ao ' "$DOCS_PATH")" sub_count="$(rg -c '^#### `ao ' "$DOCS_PATH")" all_count="$(rg -c '^#{3,4} `ao ' "$DOCS_PATH")" -if [[ "$top_count" != "73" || "$sub_count" != "197" || "$all_count" != "270" ]]; then +if [[ "$top_count" != "73" || "$sub_count" != "198" || "$all_count" != "271" ]]; then printf 'unexpected command heading counts: top=%s sub=%s all=%s\n' "$top_count" "$sub_count" "$all_count" >&2 exit 1 fi @@ -25,7 +25,7 @@ fi # shellcheck disable=SC2016 # literal backticks delimit generated Markdown command headings. mapfile -t commands < <(rg '^#{3,4} `ao ' "$DOCS_PATH" | sed -E 's/^.*`([^`]+)`.*/\1/') -if [[ "${#commands[@]}" -ne 270 ]]; then +if [[ "${#commands[@]}" -ne 271 ]]; then printf 'unexpected command matrix size: %s\n' "${#commands[@]}" >&2 exit 1 fi diff --git a/registry.json b/registry.json index ae8991566..d98459a82 100644 --- a/registry.json +++ b/registry.json @@ -1,13 +1,13 @@ { "schema_version": 1, - "generated_at": "2026-05-21T02:27:12Z", + "generated_at": "2026-05-21T15:19:15Z", "summary": { "skills": 80, "hooks": 44, - "knowledge_stores": 4, + "knowledge_stores": 5, "job_types": 14, "eval_files": 62, - "cli_commands": 174 + "cli_commands": 175 }, "surfaces": { "skills": [ @@ -1007,6 +1007,12 @@ } ], "knowledge_stores": [ + { + "name": "evolve", + "path": ".agents/evolve/", + "purpose": "Evolve skill session outputs", + "file_count": 1 + }, { "name": "findings", "path": ".agents/findings/", @@ -1380,6 +1386,10 @@ "name": "evolve", "path": "cli/cmd/ao/evolve.go" }, + { + "name": "evolve_config", + "path": "cli/cmd/ao/evolve_config.go" + }, { "name": "extract", "path": "cli/cmd/ao/extract.go" diff --git a/schemas/evolve-preferences.v1.schema.json b/schemas/evolve-preferences.v1.schema.json new file mode 100644 index 000000000..d1293ee86 --- /dev/null +++ b/schemas/evolve-preferences.v1.schema.json @@ -0,0 +1,52 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema#", + "$id": "https://agentops.dev/schemas/evolve-preferences.v1.schema.json", + "title": "Evolve Per-Repo Preferences", + "description": "Per-repo operator preferences for the /evolve autonomous loop. Lives at .agents/evolve/preferences.yaml. Resolution order: defaults < preferences.yaml < CLI flag.", + "practices": ["design-by-contract", "lean-startup"], + "type": "object", + "required": ["schema_version"], + "properties": { + "schema_version": { + "type": "integer", + "const": 1, + "description": "Schema version. Must equal 1 for this schema." + }, + "mode_default": { + "type": "string", + "enum": ["burst", "loop"], + "description": "Default evolve mode. 'burst' = single supervised cycle. 'loop' = run until queue stable." + }, + "scope_filter": { + "type": "object", + "description": "Controls when /evolve narrows its scope from explore to exploit.", + "properties": { + "productive_threshold": { + "type": "integer", + "minimum": 1, + "maximum": 100, + "description": "Number of productive cycles before scope narrows." + }, + "scout_streak_halt": { + "type": "boolean", + "description": "If true, halt loop on a streak of pure-scout (no productive change) cycles." + } + }, + "additionalProperties": false + }, + "recommended_pointer_strict": { + "type": "boolean", + "description": "If true, treat missing/stale recommended pointer as an error rather than a warning." + }, + "halt_signals": { + "type": "array", + "items": { "type": "string" }, + "description": "List of relative paths whose presence halts the /evolve loop." + }, + "generator_layers_enabled": { + "type": "boolean", + "description": "If true, the generator skill applies layered templates during cycles." + } + }, + "additionalProperties": false +} diff --git a/tests/scripts/export-session-summary.bats b/tests/scripts/export-session-summary.bats index 7e6135937..7cb51d406 100644 --- a/tests/scripts/export-session-summary.bats +++ b/tests/scripts/export-session-summary.bats @@ -70,7 +70,10 @@ run_export() { @test "compressed cycle ledger truncates notes to 140 chars" { long_note="$(printf 'x%.0s' {1..300})" - write_cycle 1 "2026-05-20T10:00:00Z" "m" "productive" "$long_note" + # Use a dynamic timestamp so the cycle falls inside the default 24h export + # window regardless of when the test runs (was hardcoded 2026-05-20, broke + # after 2026-05-21 — see soc-4ake). + write_cycle 1 "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "m" "productive" "$long_note" run_export --stdout --no-bd --no-prs [ "$status" -eq 0 ] # Find the line containing the cycle and check length.