diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 06f82fb..54c2d9e 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -125,7 +125,7 @@ jobs: run: docker build -t formicos:provenance . - name: Generate provenance attestation - uses: actions/attest-build-provenance@v2 + uses: actions/attest-build-provenance@v4 with: subject-name: formicos subject-digest: "sha256:placeholder" diff --git a/.obsidian/app.json b/.obsidian/app.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.obsidian/app.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.obsidian/appearance.json b/.obsidian/appearance.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.obsidian/appearance.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.obsidian/core-plugins.json b/.obsidian/core-plugins.json new file mode 100644 index 0000000..639b90d --- /dev/null +++ b/.obsidian/core-plugins.json @@ -0,0 +1,33 @@ +{ + "file-explorer": true, + "global-search": true, + "switcher": true, + "graph": true, + "backlink": true, + "canvas": true, + "outgoing-link": true, + "tag-pane": true, + "footnotes": false, + "properties": true, + "page-preview": true, + "daily-notes": true, + "templates": true, + "note-composer": true, + "command-palette": true, + "slash-command": false, + "editor-status": true, + "bookmarks": true, + "markdown-importer": false, + "zk-prefixer": false, + "random-note": false, + "outline": true, + "word-count": true, + "slides": false, + "audio-recorder": false, + "workspaces": false, + "file-recovery": true, + "publish": false, + "sync": true, + "bases": true, + "webviewer": false +} \ No newline at end of file diff --git a/CLA.md b/CLA.md new file mode 100644 index 0000000..99eee56 --- /dev/null +++ b/CLA.md @@ -0,0 +1,312 @@ +# FormicOS Contributor License Agreement + +Version 1.0 + +IMPORTANT: This is a legal instrument. Have it reviewed by qualified +legal counsel before relying on it. + +This Contributor License Agreement ("Agreement") is between Intradyne, the +copyright holder and lead maintainer of FormicOS ("Intradyne"), and the +individual or Legal Entity accepting this Agreement ("Contributor"). + +By signing this Agreement electronically, submitting it through the project's +CLA workflow, or otherwise indicating assent in a contribution channel +designated by Intradyne, Contributor agrees to the following terms. + + +## 1. Purpose + +FormicOS is built in the open because open development produces better +software. The project is distributed under the GNU Affero General Public +License version 3 with additional permissions and an alternative commercial +licensing path. This Agreement exists so that: + +- the codebase stays available under the AGPLv3 for everyone to use, study, + modify, and share -- free of charge for individuals, small businesses, + nonprofits, and educators; +- Intradyne can also offer commercial licenses so that organizations + choosing proprietary deployment help fund continued development; +- contributors who improve FormicOS share in the commercial revenue their + work generates, creating a sustainable cycle where building the commons + is also building a livelihood. + +This Agreement is a license grant. It is not a copyright assignment. Except +for the rights expressly granted below, Contributor retains ownership of +Contributor's Contributions. + + +## 2. Definitions + +"Contribution" means any code, documentation, configuration, test, build +artifact recipe, design text, or other copyrightable material intentionally +submitted by Contributor for inclusion in FormicOS and accepted into the +repository. + +"FormicOS" means the software in this repository and its official +documentation. + +"Commercial Terms" means the file `COMMERCIAL_TERMS.md` in the repository, as +updated from time to time under its stated amendment process. + +"License" means the repository `LICENSE` file, including the AGPLv3 base +license, Section 7 additional permissions, and the commercial-license +framework. + +"Contributor Revenue Pool" has the meaning given in the License. + +"Eligible Contributor" means a Contributor who: + +- has at least one merged Contribution subject to this Agreement; +- has provided any payout, tax, or identity information reasonably requested + under Section 7; +- is not subject to payment restrictions under applicable law. + +"Legal Entity" means the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that +entity. "Control" means (a) the power to direct the management of such +entity, or (b) ownership of fifty percent (50%) or more of the outstanding +shares, voting interests, or beneficial ownership of such entity. + + +## 3. Copyright License and Commercial Relicensing Grant + +Contributor grants Intradyne and its successors, affiliates, contractors, and +sublicensees a perpetual, worldwide, non-exclusive, irrevocable, +royalty-free, fully paid-up license to: + +- use, reproduce, display, perform, and distribute the Contribution; +- modify, adapt, translate, and create derivative works of the Contribution; +- sublicense the Contribution under the License, a commercial license, or any + successor licensing program for FormicOS; +- enforce the copyrights in the Contribution as part of the FormicOS codebase. + +This grant includes the right to offer the Contribution under proprietary, +commercial, trial, evaluation, hosted, or negotiated enterprise terms without +seeking additional permission from Contributor. + +Contributor understands that Intradyne may stop distributing FormicOS, may +change business models prospectively, and is not obligated to use every +Contribution in any particular release. + + +## 4. Patent License + +Contributor grants Intradyne and recipients of the official FormicOS +distribution a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable patent license to make, have made, use, offer to sell, sell, +import, and otherwise transfer the Contribution and the portions of FormicOS +that necessarily practice Contributor's Contribution, but only for patent +claims owned or controlled by Contributor that are necessarily infringed by +the Contribution as submitted or as incorporated into FormicOS. + +If Contributor institutes patent litigation against Intradyne or any official +FormicOS distributor alleging that a Contribution or FormicOS itself infringes +Contributor's patent rights, then the patent license granted by Contributor +under this Agreement terminates as of the date the litigation is filed. + + +## 5. Contributor Representations + +Contributor represents and warrants that: + +- Contributor is legally entitled to grant the rights in this Agreement. +- Each Contribution is an original work of authorship of Contributor, or + Contributor has disclosed the source and license status of any third-party + material included in the Contribution. +- Contributor is not knowingly submitting material that violates another + party's copyright, trade secret, patent, or contractual rights. +- Contributor will identify any known legal restrictions, third-party notices, + or patent concerns associated with a Contribution when submitting it. + +If Contributor's employer or another Legal Entity has rights in a proposed +Contribution, Contributor represents that Contributor has authority to submit +the Contribution on that party's behalf, that the party has waived or +licensed the relevant rights for this purpose, or that the party has executed +another written agreement acceptable to Intradyne covering those rights. + + +## 6. Submission Conditions + +Contributor may submit Contributions personally or on behalf of a Legal +Entity. If Contributor's employer has executed a Corporate Contributor +License Agreement (CORPORATE_CLA.md), Contributor does not need to +separately demonstrate employer authorization under Section 5. +Otherwise, Intradyne may require additional written confirmation for +entity-sponsored contributions, including confirmation of signatory +authority. + +Intradyne may reject Contributions, require provenance clarification, or pause +review until ownership or licensing questions are resolved. + +Unless otherwise agreed in writing, Contributions are provided on an "AS IS" +basis, without warranties or conditions of any kind. + +Contributions submitted by autonomous agents (via A2A or other automated +channels) must identify a sponsoring principal -- a human or Legal Entity +who has signed this Agreement or a Corporate CLA. Agents are not legal +principals in their own right and cannot independently accept this Agreement. +See docs/A2A_ECONOMICS.md for the machine-readable contract and receipt +schemas that govern agent-submitted contributions. + + +## 7. Contributor Revenue Share Program + +### 7.1 Program Basis + +This Section governs the contributor revenue-share commitment referenced in the +License. If the License and this Agreement conflict on pool size, pricing +inputs, or activation threshold, the License controls. This Agreement controls +on contributor eligibility, allocation mechanics, payout administration, and +program operations. + +### 7.2 Pool Size and Activation Threshold + +Twenty percent (20%) of Tier 2 and Tier 3 revenue is allocated to the +Contributor Revenue Pool, subject to the quarterly activation threshold stated +in the License. If the threshold is not met for a calendar quarter, no pool +distribution is owed for that quarter. + +### 7.3 Maintainer Floor + +For each quarter in which the Contributor Revenue Pool activates, Intradyne is +entitled to a maintainer allocation equal to the greater of: + +- fifty percent (50%) of the activated quarterly Contributor Revenue Pool; or +- the amount Intradyne would receive under the same attribution formula + applied to all other Eligible Contributors. + +The remainder of the activated quarterly Contributor Revenue Pool is +distributed among Eligible Contributors other than Intradyne in proportion to +their contribution weight. + +### 7.4 Attribution Formula + +Unless and until revised under Section 7.8, contribution weight is determined +from surviving lines of code attributed by `git blame` on the current release +branch, excluding whitespace-only changes, pure mass-formatting changes, and +other mechanically excluded changes documented by Intradyne in the published +attribution report. + +As of Version 1.0 of this Agreement, execution-frequency weighting described in +`METERING.md` is not active. + +### 7.5 Attribution Report and Timing + +If a quarterly pool distribution is owed, Intradyne will publish an +attribution report showing the material computation inputs used for the +distribution, including the activated pool amount, the maintainer allocation, +the attribution formula version, and each recipient's resulting share. + +Pool distributions are made quarterly after quarter-end close and any required +revenue reconciliation. + +### 7.6 Eligibility, KYC, and Tax Forms + +As a condition of receiving payouts, Eligible Contributors must provide: + +- legal name; +- current email address; +- country of residence; +- payment instructions reasonably requested by Intradyne; +- a valid IRS Form W-9 for U.S. persons, or applicable IRS Form W-8 for + non-U.S. persons, if required by law. + +Intradyne may withhold payments to comply with tax, sanctions, anti-money +laundering, or similar legal obligations. + +### 7.7 Minimum Payout and Unreachable Contributors + +Quarterly amounts below USD $25.00 are accrued for the Eligible Contributor +until the threshold is met, unless payment is otherwise required by law. + +If Intradyne cannot complete payment to an Eligible Contributor after twelve +(12) months of good-faith attempts using the contact information then on file, +Intradyne may return the unpaid accrued amount to the general Contributor +Revenue Pool for future distributions. + +### 7.8 Program Changes + +Intradyne may update payout operations, reporting mechanics, and reasonable +administrative requirements prospectively. + +Any change to the attribution algorithm itself, including activation of +execution-frequency weighting or changes to the maintainer-floor rule, +requires: + +- an Architecture Decision Record or equivalent public design note; and +- at least thirty (30) days advance notice before the change applies to a new + billing period or quarter. + +### 7.9 No Guaranteed Revenue + +Contributor acknowledges that the contributor revenue-share program depends on +actual Tier 2 and Tier 3 revenue. Intradyne does not guarantee that any +revenue threshold will be met, that any particular quarter will activate the +pool, or that Contributor will receive any minimum amount. + + +## 8. Optional Withdrawal From Future Pool Participation + +Contributor may elect to stop participating in future contributor-pool +distributions by giving written notice to Intradyne. Such notice does not +revoke or narrow any license or patent rights already granted under this +Agreement. + +After the effective date of withdrawal, Contributor will not accrue new +revenue-share amounts for future billing periods or quarters unless +Intradyne agrees otherwise in writing. + +Previously accrued unpaid balances remain payable subject to this Agreement +and applicable law. If a Contributor's accrued balance is below the USD +$25.00 minimum payout threshold at the time of withdrawal, Intradyne will +pay the remaining balance within ninety (90) days of the withdrawal +effective date regardless of the threshold. + + +## 9. Term, Survival, and Irrevocability + +This Agreement remains in effect for all Contributions submitted and accepted +before termination of Contributor's future participation. + +The rights granted in Sections 3 and 4 are irrevocable for accepted +Contributions, except as expressly provided in Section 4 for patent +litigation. + +Sections 3, 4, 5, 7, 8, 9, and 10 survive any cessation of future +contributions. + + +## 10. General + +This Agreement does not create an employment, partnership, joint venture, or +fiduciary relationship between Contributor and Intradyne. + +Intradyne may assign this Agreement in connection with a merger, acquisition, +internal reorganization, or sale of substantially all assets relating to +FormicOS. Contributor may not assign this Agreement without Intradyne's +written consent. + +If any provision of this Agreement is held unenforceable, the remaining +provisions remain in effect and the invalid provision will be reformed to the +minimum extent necessary to make it enforceable. + +This Agreement is governed by the laws of the State of Colorado, without +regard to conflict-of-laws rules. + +This Agreement, together with the License and any written program terms +incorporated by reference, is the entire agreement between Contributor and +Intradyne regarding accepted Contributions unless the parties sign a separate +written superseding agreement. + + +## 11. Signature + +Contributor may accept this Agreement by: + +- signing it through the repository's CLA workflow; +- signing and delivering a copy to Intradyne; or +- electronically affirming assent through another contribution channel + designated by Intradyne. + +Contributor should not submit Contributions until this Agreement has been +accepted. diff --git a/CLAUDE.md b/CLAUDE.md index 01b1ea3..7bc7ebb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,4 +1,4 @@ -# FormicOS — Stigmergic Multi-Agent Colony Framework +# FormicOS -- Stigmergic Multi-Agent Colony Framework Open-source Python system: AI agents coordinate through shared environmental signals (pheromones), not direct messaging. Tree-structured data model. @@ -6,14 +6,17 @@ Event-sourced (69 events, closed union). Single operator. Local-first with cloud model support. Bayesian knowledge metabolism with Thompson Sampling retrieval. Federated knowledge exchange via Computational CRDTs. Multi-colony orchestration via DelegationPlan DAG parallelism. +MCP developer bridge (27 tools, 9 resources, 6 prompts) for Claude Code +integration. Queen Command & Control surface with behavioral overrides, +display board, tool tracking, and context budget visibility. ## Architecture -Four layers, strict inward dependency. **ENFORCED BY CI — backward imports fail the build.** +Four layers, strict inward dependency. **ENFORCED BY CI -- backward imports fail the build.** | Layer | Responsibility | May import | |----------|--------------------------------------|----------------------| -| Core | Types, events (65), port interfaces, CRDTs | NOTHING | +| Core | Types, events (69), port interfaces, CRDTs | NOTHING | | Engine | Colony execution, pure computation | Core only | | Adapters | Tech bindings (LLM, SQLite, Qdrant, MCP) | Core only | | Surface | Wiring, HTTP/WS/CLI, lifecycle | Core, Engine, Adapters | @@ -39,15 +42,15 @@ Current repo state: Colonies produce knowledge entries (skills, experiences) via LLM extraction, 5-axis security scanning (prompt injection, data exfiltration, credential leakage, code safety, credential detection via detect-secrets), and -transcript harvest (hook position 4.5 — extracts bug root causes, +transcript harvest (hook position 4.5 -- extracts bug root causes, conventions, tool configurations). Entries carry Bayesian confidence posteriors (`Beta(alpha, beta)`) evolved by Thompson Sampling, with -decay classes (ephemeral γ=0.98, stable γ=0.995, permanent γ=1.0) and +decay classes (ephemeral gamma=0.98, stable gamma=0.995, permanent gamma=1.0) and a 180-day gamma cap. Entries have granular sub-types within their category: -skills → technique/pattern/anti_pattern; experiences → -decision/convention/learning/bug. Retrieval uses a 6-signal composite -score (ADR-044): -`0.38*semantic + 0.25*thompson + 0.15*freshness + 0.10*status + 0.07*thread + 0.05*cooccurrence`. +skills -> technique/pattern/anti_pattern; experiences -> +decision/convention/learning/bug. Retrieval uses a 7-signal composite +score (ADR-044, rebalanced Wave 59.5): +`0.38*semantic + 0.25*thompson + 0.10*freshness + 0.10*status + 0.07*thread + 0.04*cooccurrence + 0.06*graph_proximity`. All signals normalized to [0, 1]. Co-occurrence uses sigmoid normalization (`1 - e^{-0.6w}`). Thread-scoped entries get a thread_bonus of 1.0 (weighted at 0.07) when retrieved by same-thread colonies. @@ -72,7 +75,7 @@ earned autonomy, learned template health, recent outcome digest, and popular unexamined. Three rules (contradiction, coverage gap, stale cluster) include `suggested_colony` configurations for auto-dispatch. Distillation candidates -(dense co-occurrence clusters with ≥5 entries and avg weight >3.0) +(dense co-occurrence clusters with >=5 entries and avg weight >3.0) are identified during maintenance and synthesized by archivist colonies. ### Self-maintenance (ADR-046) @@ -83,14 +86,51 @@ opted-in categories, notify operator), `autonomous` (dispatch all eligible). Policy controls: `auto_actions` list, `max_maintenance_colonies`, and `daily_maintenance_budget`. Budget tracking resets daily at UTC midnight. +Blast radius estimation gates dispatch: 6 heuristic factors (task length, +caste risk, round count, strategy, keywords coder-only, outcome history) +produce a score. Thresholds: >=0.6 escalate, >=0.3 notify, <0.3 proceed. +`auto_notify` skips both escalate and notify; `autonomous` skips only +escalate. + +Autonomy scoring: 4 weighted components (success_rate, volume, +cost_efficiency, operator_trust) produce a grade (A-F) and level +(full/standard/limited/restricted). `check_autonomy_budget` Queen tool +surfaces daily budget truth. + +### Workflow learning (Wave 72) + +Deterministic extractors in `surface/workflow_learning.py` propose actions +through the existing action queue. No LLM calls, no new events. + +**Pattern recognition** (Track 8): `extract_workflow_patterns()` scans colony +outcomes for repeating `(strategy, caste_set)` fingerprints. Requires +`>=3` successful occurrences across `>=2` distinct threads. Proposes +`kind="workflow_template"` actions. On approval, saves a +`ColonyTemplate(learned=True)`. + +**Procedure suggestions** (Track 9): `detect_operator_patterns()` finds +repeated rejection or manual approval patterns in the action queue. Proposes +`kind="procedure_suggestion"` actions. On approval, appends a rule to +workspace operating procedures via `append_procedure_rule()`. + +See `docs/AUTONOMOUS_OPERATIONS.md` for the full autonomy operator runbook. + +### Project plan (Wave 70) + +One project plan per data root at `.formicos/project_plan.md`. Shared +parser/helper in `project_plan.py`. Queen gets a dedicated `project_plan` +context budget slot (5%, 400-token fallback). `propose_project_milestone` +and `complete_project_milestone` Queen tools. `GET /api/v1/project-plan` +returns structured JSON for frontend rendering. + ### Adaptive evaporation (Wave 42) Pheromone evaporation in stigmergic mode is bounded adaptive, not fixed. The rate interpolates linearly from `_EVAPORATE_MAX=0.95` (healthy) to `_EVAPORATE_MIN=0.85` (stagnating) based on two signals: branching factor (`exp(entropy)` over pheromone edge weights) and convergence stall count. -High branching (≥2.0) or zero stalls → normal rate. Low branching + stalls -→ faster evaporation to break attractors. Stall influence capped at 4 +High branching (>=2.0) or zero stalls -> normal rate. Low branching + stalls +-> faster evaporation to break attractors. Stall influence capped at 4 rounds. Control law is runner-local (`runner.py`), no surface imports. ### Web foraging (Wave 44) @@ -107,7 +147,7 @@ scores content without LLM. `WebSearch` adapter provides pluggable search. `Forager` surface module orchestrates the cycle with deterministic query templates. -Replay surface: 4 foraging event types (59 → 62): +Replay surface: 4 foraging event types (59 -> 62): `ForageRequested`, `ForageCycleCompleted`, `DomainStrategyUpdated`, `ForagerDomainOverride`. Individual search/fetch/rejection stays log-only. @@ -147,7 +187,7 @@ auto_notify maintenance policy. Created via ### Knowledge distillation -Dense co-occurrence clusters (≥5 entries, avg weight >3.0) are flagged as +Dense co-occurrence clusters (>=5 entries, avg weight >3.0) are flagged as distillation candidates during maintenance. When policy allows, archivist colonies synthesize clusters into higher-order entries (KnowledgeDistilled event). Distilled entries get `decay_class="stable"` and elevated alpha @@ -156,7 +196,7 @@ event). Distilled entries get `decay_class="stable"` and elevated alpha ### Multi-colony orchestration (ADR-045) Queen decomposes complex tasks into DelegationPlan DAGs via `spawn_parallel`. -ColonyTask items are organized into `parallel_groups` — tasks within a group +ColonyTask items are organized into `parallel_groups` -- tasks within a group run concurrently via `asyncio.gather`, groups execute sequentially. DAG validated with Kahn's algorithm (no cycles). ParallelPlanCreated event records the plan, reasoning, knowledge gaps, and estimated cost. @@ -194,7 +234,7 @@ replication. Each entry is backed by an ObservationCRDT (core/crdt.py) with G-Counters for observations, LWW Registers for content, and G-Sets for domains. Gamma-decay is applied at query time, not stored in the CRDT. Trust between peers uses Bayesian PeerTrust (10th percentile of Beta -posterior — penalizes uncertainty). Conflict resolution uses three phases: +posterior -- penalizes uncertainty). Conflict resolution uses three phases: Pareto dominance, adaptive threshold, then competing hypotheses. ### Deployment and execution @@ -208,7 +248,7 @@ Execution has two paths: sandbox (`code_execute` tool, Docker containers with `--network=none`, `--memory=256m`, `--read-only`) and workspace executor (repo-backed commands, currently runs on backend host process without container isolation). The workspace executor is the largest -remaining security gap. Docker socket is mounted for sandbox spawning — +remaining security gap. Docker socket is mounted for sandbox spawning -- this grants daemon access to the FormicOS container. SQLite persistence rules: named volumes only (no bind-mounts on macOS/Windows @@ -229,7 +269,7 @@ via `to_mcp_tool_error()` with `isError`, `content`, and ### Workflow threads and steps -Work is organized into threads with goals. Threads contain workflow steps — +Work is organized into threads with goals. Threads contain workflow steps -- sequential guidance the Queen uses to structure multi-colony work. Steps are not a DAG; they are Queen scaffolding. When a colony completes a step, the system prompts the Queen with the next pending step via the follow_up_colony @@ -238,8 +278,8 @@ summary. ## Tech stack Use Python 3.12+, uv, Pydantic v2 (sole serialization), asyncio, httpx, -aiosqlite, qdrant-client (≥1.16), sentence-transformers (fallback embedding -path alongside Qwen3-embedding sidecar), FastMCP ≥3.0, Starlette, uvicorn, +aiosqlite, qdrant-client (>=1.16), sentence-transformers (fallback embedding +path alongside Qwen3-embedding sidecar), FastMCP >=3.0, Starlette, uvicorn, structlog, sse-starlette, json-repair, opentelemetry-api. Frontend: Lit Web Components. See `pyproject.toml` for exact pins. @@ -252,6 +292,7 @@ ruff check src/ # Lint pyright src/ # Type check python scripts/lint_imports.py # Layer check docker compose up # Run (or: python -m formicos) +python -m formicos init-mcp # Generate MCP config for Claude Code ``` **Full CI (run before declaring any task done):** @@ -264,7 +305,7 @@ ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest Use this delivery loop unless the operator explicitly asks for a different one. Expanded reference: -- `docs/DEVELOPMENT_WORKFLOW.md` — canonical workflow document with the shared +- `docs/DEVELOPMENT_WORKFLOW.md` -- canonical workflow document with the shared delivery loop, prompt checklist, acceptance checklist, and handoff artifacts ### 0. Establish the active coordination source @@ -391,16 +432,19 @@ IMPORTANT: These are non-negotiable. Violating any of these requires operator ap 2. Read `docs/decisions/` before making architectural choices. 3. If your change contradicts an ADR, STOP and flag the conflict. 4. Never modify files outside your ownership list (see `AGENTS.md`). -5. Event types are a CLOSED union — adding types requires an ADR with operator approval. -6. ≤20K LOC soft limit on `core/` + `engine/` + `adapters/` + `surface/` combined. Exceeding requires justification, not blocking. +5. Event types are a CLOSED union -- adding types requires an ADR with operator approval. +6. <=20K LOC soft limit on `core/` + `engine/` + `adapters/` + `surface/` combined. Exceeding requires justification, not blocking. 7. Every state change is an event. No shadow databases. No second stores. 8. Feature flags wrap incomplete work. Merge to main frequently. 9. Knowledge confidence uses Beta(alpha, beta) posteriors evolved by Thompson Sampling. Do not replace with scalar confidence or heuristic scoring. 10. Workflow steps are Queen scaffolding, not an execution pipeline. The Queen always decides whether to proceed. +11. The Queen system prompt's tool inventory is programmatically assembled from `tool_specs()` (the full tool surface: handlers + special-cased + addon tools). Do not manually edit the tool list in `caste_recipes.yaml`. +12. UI components MUST NOT hardcode governance defaults. Read from `runtimeConfig.governance.*` or workspace config props. +13. Prefer computed introspection over hardcoded counts in documentation and UI. Never hardcode what can be derived. ## Prohibited alternatives -| Instead of… | Use… | Why | +| Instead of... | Use... | Why | |-------------|------|-----| | msgspec, dataclasses for events | Pydantic v2 | Sole serialization library, project-wide | | `print()` | `structlog` | Structured logging only | @@ -414,8 +458,8 @@ IMPORTANT: These are non-negotiable. Violating any of these requires operator ap | Path | Purpose | Modify? | |------|---------|---------| -| `docs/contracts/` | Integration seams (events.py, ports.py, types.ts) | NO — operator approval required | -| `docs/decisions/` | ADRs (001-048, see INDEX.md) | Read before architectural choices | +| `docs/contracts/` | Integration seams (events.py, ports.py, types.ts) | NO -- operator approval required | +| `docs/decisions/` | ADRs (001-051, see INDEX.md) | Read before architectural choices | | `docs/specs/` | Current-state implementation references (8 specs, Wave 59) | Canonical subsystem docs | | `docs/waves/PROGRESS.md` | Wave progress | Update when completing work | | `docs/DEPLOYMENT.md` | Deployment guide: clone to running stack | Deployment truth | @@ -436,33 +480,40 @@ IMPORTANT: These are non-negotiable. Violating any of these requires operator ap | `adapters/fetch_pipeline.py` | Graduated fetch + content extraction (Level 1-2) | Web foraging | | `adapters/content_quality.py` | Deterministic content-quality scoring (no LLM) | Web foraging | | `adapters/web_search.py` | Pluggable web search adapter | Web foraging | -| `surface/self_maintenance.py` | MaintenanceDispatcher, autonomy policy, distillation dispatch | Self-maintenance | -| `surface/queen_tools.py` | Queen tool dispatch, spawn_parallel, DelegationPlan validation | Queen tools | +| `surface/self_maintenance.py` | MaintenanceDispatcher, autonomy policy, blast radius, autonomy scoring | Self-maintenance | +| `surface/project_plan.py` | Project plan parser/helper, milestone tools, plan rendering | Project plan | +| `surface/queen_budget.py` | 9-slot proportional Queen context budget (ADR-051) | Queen budget | +| `surface/queen_tools.py` | Queen tool dispatch (42 tools), spawn_parallel, DelegationPlan | Queen tools | | `surface/transcript_view.py` | Canonical colony transcript schema | A2A/MCP export | | `surface/proactive_intelligence.py` | 17 deterministic briefing rules (7 knowledge + 4 performance + evaporation + branching + earned autonomy + template health + outcome digest + popular unexamined) | Proactive intel | -| `surface/routes/api.py` | REST endpoints including outcomes + create-demo | API surface | +| `surface/routes/api.py` | REST endpoints: outcomes, create-demo, project-plan, autonomy-status, maintenance-policy, add-model | API surface | +| `surface/workflow_learning.py` | Deterministic workflow pattern recognition + procedure suggestions (Wave 72) | Workflow learning | +| `docs/AUTONOMOUS_OPERATIONS.md` | Autonomy operator runbook: action queue, levels, learning, controls | Reference | +| `docs/DEVELOPER_BRIDGE.md` | Developer onboarding guide for Claude Code integration | Reference | +| `surface/mcp_server.py` | MCP server (27 tools, 9 resources, 6 prompts) | MCP surface | | `config/templates/demo-workspace.yaml` | Demo workspace template with seeded entries | Demo path | ## Common patterns ### Adding a Queen tool -1. Define the tool in `_queen_tools()` in `queen_runtime.py` — name, description, parameters. -2. Add the handler in `_handle_queen_tool_call()` in the same file — match by tool name, implement logic, return result string. +1. Add the tool spec in `tool_specs()` in `queen_tools.py` -- name, description, parameters JSON schema (before the `*self._addon_tool_specs` line). +2. Add the handler in `QueenToolDispatcher.__init__()` `self._handlers` dict in `queen_tools.py` -- maps tool name to an async handler `(inputs, workspace_id, thread_id) -> tuple[str, dict | None]`. +3. The system prompt tool inventory is self-assembled from `tool_specs()` at runtime -- do NOT manually edit the tool list in `caste_recipes.yaml`. ### Adding an agent tool Five touch points: -1. `engine/tool_dispatch.py` — Add to `TOOL_SPECS` dict (name, description, parameters JSON schema). -2. `engine/tool_dispatch.py` — Add to `TOOL_CATEGORY_MAP` (maps tool name to `ToolCategory`). -3. `engine/runner.py` — Add to `RoundRunner.__init__()` as a new `*_fn` callback parameter, stored as `self._*_fn`. -4. `engine/tool_dispatch.py` — Add dispatch case in `_execute_tool()` that calls the callback. -5. `surface/runtime.py` — Add `make_*_fn()` factory method that creates the async callback closure. -6. `config/caste_recipes.yaml` — Add tool name to relevant castes' tool lists. +1. `engine/tool_dispatch.py` -- Add to `TOOL_SPECS` dict (name, description, parameters JSON schema). +2. `engine/tool_dispatch.py` -- Add to `TOOL_CATEGORY_MAP` (maps tool name to `ToolCategory`). +3. `engine/runner.py` -- Add to `RoundRunner.__init__()` as a new `*_fn` callback parameter, stored as `self._*_fn`. +4. `engine/tool_dispatch.py` -- Add dispatch case in `_execute_tool()` that calls the callback. +5. `surface/runtime.py` -- Add `make_*_fn()` factory method that creates the async callback closure. +6. `config/caste_recipes.yaml` -- Add tool name to relevant castes' tool lists. ### Adding a maintenance handler -1. Create `make_*_handler(runtime)` factory in `maintenance.py` — returns an async handler function. +1. Create `make_*_handler(runtime)` factory in `maintenance.py` -- returns an async handler function. 2. Register in `app.py` `service_router.register_handler()` block with a `service:consolidation:*` name. 3. Add to `maintenance.py` `__all__`. diff --git a/COMMERCIAL_TERMS.md b/COMMERCIAL_TERMS.md new file mode 100644 index 0000000..d075752 --- /dev/null +++ b/COMMERCIAL_TERMS.md @@ -0,0 +1,269 @@ +# FormicOS Commercial Terms + +Version 1.0 + +These Commercial Terms govern the payment, reporting, and dispute +resolution mechanics for Tier 2 (Metered Commercial License) and +Tier 3 (Enterprise Agreement) licensees of FormicOS. The fee +computation formula is specified in the FormicOS License Agreement +(LICENSE). These Terms govern how that fee is reported, invoiced, +and paid. + +These Terms become binding upon Licensee's first Usage Attestation +submission. By submitting an attestation, Licensee accepts these +Terms in their entirety. + +IMPORTANT: Have these Terms reviewed by qualified legal counsel +before submitting your first attestation. + + +## 1. AGREEMENT STRUCTURE + +The FormicOS commercial licensing relationship consists of four +instruments: + + (a) The FormicOS License Agreement (LICENSE), which specifies the + AGPLv3 base license, Tier 1 free-use permissions, Tier 2 + pricing formula, and Contributor Revenue Share. + + (b) These Commercial Terms, which govern payment mechanics and + dispute resolution. + + (c) The FormicOS Usage Metering Specification (METERING.md), + which is the normative technical specification for computing + Total Tokens and producing Usage Attestations. + + (d) The FormicOS Contributor License Agreement (CLA.md), which + governs the relationship between Contributors and the + copyright holder regarding revenue sharing. + +In the event of conflict: the License Agreement controls on fee +computation and metering scope. These Commercial Terms control on +payment mechanics, invoicing, and dispute resolution. + + +## 2. REPORTING AND PAYMENT + +### 2.1 Billing Period + +Each calendar month is a Billing Period. + +### 2.2 Attestation Submission + +Within fifteen (15) days after the end of each Billing Period, +Licensee shall submit a Usage Attestation as specified in +METERING.md to the billing endpoint provided by Licensor. + +### 2.3 Invoice and Payment + +Licensor will issue an invoice within five (5) business days of +receiving a valid Usage Attestation. Payment is due within thirty +(30) days of invoice date. + +### 2.4 Payment Methods + +Licensor accepts payment via: +- ACH bank transfer (US accounts) +- Wire transfer (international accounts) +- Credit card via Stripe + +Transaction fees for credit card payments are borne by Licensee. +ACH and wire transfers have no additional fees. + +### 2.5 Currency + +All fees are denominated in United States Dollars (USD). Licensees +paying in other currencies bear the exchange rate risk. The +applicable rate is the mid-market rate on the invoice date. + +### 2.6 Late Payment + +Payments more than thirty (30) days past due accrue interest at the +lesser of 1.5% per month or the maximum rate permitted by law. + +Payments more than ninety (90) days past due constitute a material +breach. Licensor may terminate the Commercial License upon thirty +(30) days written notice if the breach is not cured. + +### 2.7 Taxes + +Fees are exclusive of all taxes. Licensee is responsible for all +applicable sales, use, VAT, GST, and withholding taxes. If +Licensee is required to withhold taxes, the payment to Licensor +must be grossed up so that Licensor receives the full invoiced +amount. + + +## 3. AUDIT + +### 3.1 Audit Rights + +Licensor may conduct one (1) audit per calendar year upon thirty +(30) days written notice. The audit is limited to: + + (a) Aggregate token counts per Billing Period (Total Tokens as + defined in the License Agreement) + (b) Event store sequence continuity (first/last event sequence + numbers per period, chain hash verification) + (c) Number of TokensConsumed events per period + +The audit does NOT include access to prompts, model outputs, agent +conversation content, workspace files, or any other content +processed by FormicOS. + +### 3.2 Audit Process + +Licensee provides the requested aggregate data within fifteen (15) +business days of the audit notice. Licensor may engage an +independent third-party auditor bound by confidentiality +obligations. + +### 3.3 Audit Findings + +If the audit reveals underreporting of Total Tokens by more than +ten percent (10%) for any Billing Period: + + (a) Licensee pays the underpayment amount plus the lesser of + $5,000 or the reasonable cost of the audit. + (b) Licensor may conduct one additional audit within the + following twelve (12) months. + +If the audit reveals no material discrepancy, Licensor bears the +cost of the audit. + + +## 4. TERM AND TERMINATION + +### 4.1 Term + +These Commercial Terms are effective upon Licensee's first +attestation submission and continue for successive one-year terms +unless terminated. + +### 4.2 Termination by Licensee + +Licensee may terminate at any time by providing thirty (30) days +written notice. Licensee must pay all fees accrued through the +termination date. Upon termination, Licensee must either: + + (a) comply fully with AGPLv3 Section 13, OR + (b) cease use of FormicOS. + +### 4.3 Termination by Licensor + +Licensor may terminate for material breach (including persistent +late payment or attestation fraud) upon thirty (30) days written +notice if the breach is not cured within the notice period. + +### 4.4 Effect of Termination + +Sections 3 (Audit), 5 (Limitation of Liability), 6 (Dispute +Resolution), 7 (Force Majeure), and 8 (General) survive +termination. + + +## 5. LIMITATION OF LIABILITY + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, LICENSOR'S TOTAL LIABILITY +UNDER THESE TERMS SHALL NOT EXCEED THE FEES PAID BY LICENSEE IN +THE TWELVE (12) MONTHS PRECEDING THE CLAIM. + +NEITHER PARTY SHALL BE LIABLE FOR INDIRECT, INCIDENTAL, SPECIAL, +CONSEQUENTIAL, OR PUNITIVE DAMAGES, REGARDLESS OF THE THEORY OF +LIABILITY. + + +## 6. DISPUTE RESOLUTION + +### 6.1 Fee Computation Disputes + +Disputes about fee computation are resolved by running the +canonical pricing function (specified in the License Agreement) +against the attested Total Tokens. The function is deterministic. +If both parties agree on the input (Total Tokens), the output (fee) +is not disputable. + +If the parties disagree on Total Tokens, the dispute is resolved +via the audit process in Section 3. + +### 6.2 Other Disputes + +All other disputes are resolved by binding arbitration under the +rules of the American Arbitration Association, conducted in the +state of the Licensor's principal place of business. The +arbitrator's award is final and enforceable in any court of +competent jurisdiction. + +### 6.3 Governing Law + +These Terms are governed by the laws of the State of Colorado, +without regard to conflict-of-laws principles. + + +## 7. FORCE MAJEURE + +Neither party is liable for failure or delay in performance caused +by events beyond its reasonable control, including but not limited +to: natural disasters, acts of government, pandemic, war, civil +unrest, power or internet outages, and third-party service provider +failures. + +If the billing endpoint is unavailable due to a force majeure +event, the fifteen (15) day attestation submission deadline +(Section 2.2) is extended by the duration of the outage plus five +(5) business days. Licensee must submit the attestation promptly +after the force majeure event ends. + +If a force majeure event prevents performance for more than ninety +(90) consecutive days, either party may terminate these Terms upon +written notice without penalty. + + +## 8. GENERAL + +### 8.1 Entire Agreement + +The License Agreement, these Commercial Terms, the Usage Metering +Specification, and any executed Enterprise Agreement constitute the +entire agreement between the parties regarding FormicOS commercial +licensing. The Contributor License Agreement governs a separate +relationship (contributor rights and revenue sharing) and is not +part of the licensee agreement. + +### 8.2 Amendment + +Licensor may update these Commercial Terms (excluding the pricing +formula) with sixty (60) days written notice. Updated Terms apply +to Billing Periods beginning after the notice period. + +The pricing formula coefficient in the License Agreement may only +be changed with six (6) months written notice. Any other change to +the pricing formula (including changes to the functional form) +requires twelve (12) months written notice. In either case, the +change applies prospectively to Billing Periods beginning after the +notice period. Fees for prior Billing Periods are computed under +the formula in effect during that period. + +### 8.3 Assignment + +Licensee may not assign these Terms without Licensor's written +consent. Licensor may assign these Terms in connection with a +merger, acquisition, or sale of substantially all assets. + +### 8.4 Severability + +If any provision of these Terms is found unenforceable, the +remaining provisions continue in full force. The unenforceable +provision is reformed to the minimum extent necessary to make it +enforceable. + +### 8.5 Waiver + +Failure to enforce any provision is not a waiver of future +enforcement of that or any other provision. + +### 8.6 Notices + +All notices under these Terms must be in writing and delivered via +email to the addresses registered at license activation. A notice +is effective upon confirmed delivery. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7348ce3..1feeac0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,11 +11,14 @@ submitting a pull request. - [SECURITY.md](SECURITY.md) — how to report vulnerabilities (do not open public issues for security bugs) +Before contributing, also review [CLA.md](CLA.md) for the contributor license +terms, commercial relicensing grant, and revenue-share program. + ## Contribution Flow 1. Check existing issues or open a new one describing the change 2. Fork the repository and create a feature branch -3. Sign the CLA when prompted (see [GOVERNANCE.md](GOVERNANCE.md)) +3. Sign the CLA when prompted (see [CLA.md](CLA.md)) 4. Make your changes following the architecture rules below 5. Run the full CI pipeline locally (lint, typecheck, layer-check, tests, frontend build) @@ -27,6 +30,20 @@ submitting a pull request. Look for issues labeled `good-first-issue` for tasks suitable for new contributors. These are scoped to avoid touching architectural seams. +## Contributor License Agreement + +FormicOS requires a signed [CLA](CLA.md) for external code contributions. The +CLA is what allows Intradyne to keep the AGPLv3 distribution and also offer +commercial licenses for the same codebase. Contributors who sign the CLA +also become eligible for the revenue-share program described in the CLA. + +If your employer owns the IP in your contributions, your employer can sign +the [Corporate CLA](CORPORATE_CLA.md) to blanket-authorize all employees. +This avoids each contributor needing individual employer authorization. + +DCO sign-off may be used as a supplementary provenance signal, but it does not +replace the CLA because it does not grant commercial relicensing rights. + ## Environment Setup ### Backend @@ -210,7 +227,7 @@ Before making an architectural choice, check `docs/decisions/` for existing ADRs | [006](docs/decisions/006-trunk-based-development.md) | Trunk-based development with feature flags | See [docs/decisions/INDEX.md](docs/decisions/INDEX.md) for the full list -(47 ADRs covering event sourcing, knowledge metabolism, federation, +(51 ADRs covering event sourcing, knowledge metabolism, federation, parallel planning, and more). If your change contradicts an ADR, stop and flag the conflict. @@ -227,7 +244,7 @@ These are the most important files and directories to understand: | `src/formicos/engine/runner.py` | Colony round execution loop | | `src/formicos/surface/projections.py` | Event replay into in-memory read models | | `src/formicos/surface/queen_runtime.py` | Queen orchestration and tool dispatch | -| `src/formicos/surface/knowledge_catalog.py` | Federated knowledge retrieval with 6-signal scoring | +| `src/formicos/surface/knowledge_catalog.py` | Federated knowledge retrieval with 7-signal scoring | | `docs/contracts/` | Frozen integration seams — do not modify without maintainer approval | | `docs/decisions/` | Architecture Decision Records (ADR files present in the repo) | | `docs/specs/` | Executable specifications (pytest-bdd scenarios) | diff --git a/CORPORATE_CLA.md b/CORPORATE_CLA.md new file mode 100644 index 0000000..eec642a --- /dev/null +++ b/CORPORATE_CLA.md @@ -0,0 +1,205 @@ +# FormicOS Corporate Contributor License Agreement + +Version 1.0 + +IMPORTANT: This is a legal instrument. Have it reviewed by qualified +legal counsel before relying on it. + +This Corporate Contributor License Agreement ("Agreement") is between +Intradyne, the copyright holder and lead maintainer of FormicOS +("Intradyne"), and the Legal Entity identified below ("Corporation"). + +By signing this Agreement, Corporation agrees to the following terms on +behalf of itself and all individuals authorized to submit Contributions +on Corporation's behalf ("Authorized Contributors"). + + +## 1. Purpose + +This Agreement supplements the individual FormicOS Contributor License +Agreement (CLA.md). It provides a blanket authorization covering all +employees and contractors whom Corporation designates as Authorized +Contributors, so that each individual does not need to separately +demonstrate employer authorization under Section 5 of the individual +CLA. + +FormicOS is built in the open because open development produces better +software. This Agreement ensures that corporate contributions can flow +into the project cleanly, with clear IP ownership and commercial +relicensing rights, while contributors share in the commercial revenue +their work generates. + + +## 2. Definitions + +"Authorized Contributor" means any individual who (a) is an employee, +contractor, or agent of Corporation, and (b) has been designated by +Corporation to submit Contributions to FormicOS. Corporation maintains +the list of Authorized Contributors and provides it to Intradyne upon +request. + +"Contribution" has the same meaning as in the individual CLA. + +"FormicOS" has the same meaning as in the individual CLA. + +"Legal Entity" means the union of the acting entity and all other +entities that control, are controlled by, or are under common control +with that entity. "Control" means (a) the power to direct the +management of such entity, or (b) ownership of fifty percent (50%) or +more of the outstanding shares, voting interests, or beneficial +ownership of such entity. + + +## 3. Copyright License and Commercial Relicensing Grant + +Corporation grants Intradyne and its successors, affiliates, +contractors, and sublicensees a perpetual, worldwide, non-exclusive, +irrevocable, royalty-free, fully paid-up license to: + +- use, reproduce, display, perform, and distribute Contributions made + by Authorized Contributors; +- modify, adapt, translate, and create derivative works of such + Contributions; +- sublicense such Contributions under the License, a commercial + license, or any successor licensing program for FormicOS; +- enforce the copyrights in such Contributions as part of the FormicOS + codebase. + +This grant includes the right to offer Contributions under proprietary, +commercial, trial, evaluation, hosted, or negotiated enterprise terms +without seeking additional permission from Corporation. + +This Agreement is a license grant. It is not a copyright assignment. +Corporation retains ownership of its Contributions. + + +## 4. Patent License + +Corporation grants Intradyne and recipients of the official FormicOS +distribution a perpetual, worldwide, non-exclusive, no-charge, +royalty-free, irrevocable patent license to make, have made, use, offer +to sell, sell, import, and otherwise transfer Contributions made by +Authorized Contributors and the portions of FormicOS that necessarily +practice such Contributions, but only for patent claims owned or +controlled by Corporation that are necessarily infringed by the +Contribution as submitted or as incorporated into FormicOS. + +If Corporation institutes patent litigation against Intradyne or any +official FormicOS distributor alleging that a Contribution or FormicOS +itself infringes Corporation's patent rights, then the patent license +granted by Corporation under this Agreement terminates as of the date +the litigation is filed. + + +## 5. Corporation Representations + +Corporation represents and warrants that: + +- Corporation is legally entitled to grant the rights in this Agreement. +- Corporation has the authority to bind all Authorized Contributors with + respect to Contributions submitted on Corporation's behalf. +- Corporation has obtained or will obtain all necessary internal + approvals, including from any intellectual property or legal + department, before designating Authorized Contributors. +- Corporation will ensure that Authorized Contributors are aware of this + Agreement and its terms before they submit Contributions. + +Corporation is responsible for ensuring that each Authorized Contributor +has the right to submit the specific Contribution they make. If an +Authorized Contributor includes third-party material in a Contribution, +the contributor must disclose its source and license status per the +individual CLA. + + +## 6. Authorized Contributor Management + +Corporation will designate a primary contact ("CLA Manager") responsible +for maintaining the list of Authorized Contributors. + +Corporation may add or remove Authorized Contributors at any time by +written notice to Intradyne. Removal does not affect the license grants +for Contributions already submitted and accepted. + +Intradyne may request the current list of Authorized Contributors no +more than once per calendar quarter. + + +## 7. Relationship to Individual CLA + +Authorized Contributors covered by this Corporate CLA do NOT need to +separately sign the individual CLA for employer authorization purposes +(individual CLA Section 5). However, if an Authorized Contributor +wishes to participate in the Contributor Revenue Share program +(individual CLA Section 7), they must individually sign the individual +CLA and provide the required KYC and tax information. + +Revenue share for Contributions made under this Corporate CLA accrues +to the individual Authorized Contributor who authored the code, as +determined by git blame attribution. Corporation does not receive +revenue share unless Corporation itself is also an Eligible Contributor +under the individual CLA. + +If an Authorized Contributor submits a Contribution that is partly +personal work and partly Corporation-owned work, the contributor should +clarify which portions are covered by this Corporate CLA and which are +covered by their individual CLA. + + +## 8. Term and Termination + +This Agreement remains in effect until terminated by either party with +thirty (30) days written notice. + +Termination does not affect the license grants for Contributions +already submitted and accepted. Sections 3, 4, 5, and 10 survive +termination. + +Upon termination, all Authorized Contributors revert to the individual +CLA process for any future Contributions (they must demonstrate +employer authorization individually per individual CLA Section 5, or +sign a new Corporate CLA with their new employer). + + +## 9. No Employment Relationship + +This Agreement does not create an employment, partnership, joint +venture, or agency relationship between Corporation and Intradyne. +Authorized Contributors are employees or contractors of Corporation, +not of Intradyne. + + +## 10. General + +Corporation may not assign this Agreement without Intradyne's written +consent. Intradyne may assign this Agreement in connection with a +merger, acquisition, or sale of substantially all assets relating to +FormicOS. + +If any provision is held unenforceable, the remaining provisions remain +in effect. + +This Agreement is governed by the laws of the State of Colorado, without +regard to conflict-of-laws rules. + + +## 11. Signature + +Corporation name: ____________________________________ + +Address: ____________________________________ + +CLA Manager name: ____________________________________ + +CLA Manager email: ____________________________________ + +Authorized signatory name: ____________________________________ + +Authorized signatory title: ____________________________________ + +Signature: ____________________________________ + +Date: ____________________________________ + + +Corporation may also accept this Agreement electronically through a +contribution channel designated by Intradyne. diff --git a/GOVERNANCE.md b/GOVERNANCE.md index b922415..92a4c01 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -57,8 +57,7 @@ The following changes always require an ADR: ### Event Union Changes -The event union is intentionally closed at 55 types. Adding a new event -type requires: +The event union is ADR-gated. Adding a new event type requires: 1. An ADR explaining why the new event is necessary 2. Maintainer approval @@ -78,8 +77,13 @@ type requires: ## CLA / DCO FormicOS requires a Contributor License Agreement (CLA) for external -contributions. The CLA ensures that contributions can be distributed under -the project's MIT license. +contributions. The CLA gives Intradyne the rights needed to distribute +contributions under the repository License and under commercial licensing +terms, while keeping contributor-revenue mechanics on a contractual basis. + +The canonical contributor agreement is [CLA.md](CLA.md). Organizations +whose employees contribute can sign the [Corporate CLA](CORPORATE_CLA.md) +to blanket-authorize all their developers. **Status**: CLA enforcement requires admin-side configuration. See `docs/GITHUB_ADMIN_SETUP.md` for the setup steps that a repository @@ -88,6 +92,9 @@ administrator must complete. Until the CLA app is fully configured, maintainers will manually verify contribution rights during PR review. +DCO sign-off may be used in addition to the CLA, but it is not a substitute +for the CLA. + ## Succession If the current maintainer team becomes inactive: @@ -96,7 +103,8 @@ If the current maintainer team becomes inactive: request maintainer access by opening an issue 2. If no response within 30 days, the project should be considered unmaintained -3. Forks are encouraged under the MIT license +3. Forks remain available under the AGPLv3 base license and any additional + permissions actually retained in the redistributed copy ## Code of Conduct @@ -104,5 +112,10 @@ All participants must follow the [Code of Conduct](CODE_OF_CONDUCT.md). ## License -FormicOS is licensed under the MIT License. See [LICENSE](LICENSE) for -details. +FormicOS is distributed under the [LICENSE](LICENSE) file in this repository: +AGPLv3 as the base license, with Section 7 additional permissions and a +commercial licensing path. Commercial payment mechanics live in +[COMMERCIAL_TERMS.md](COMMERCIAL_TERMS.md). Token metering is specified in +[METERING.md](METERING.md). Contributor rights and revenue-share mechanics +live in [CLA.md](CLA.md). Machine-readable economic contracts for A2A agent +participation are specified in [docs/A2A_ECONOMICS.md](docs/A2A_ECONOMICS.md). diff --git a/LICENSE b/LICENSE index 6a46bee..caab5ef 100644 --- a/LICENSE +++ b/LICENSE @@ -1,18 +1,286 @@ FORMICOS LICENSE AGREEMENT ========================== -This software is licensed under the GNU Affero General Public License version 3 (AGPLv3), with the following Additional Permissions granted under Section 7 of the AGPLv3. +Copyright (C) 2024-2026 Intradyne. All rights reserved. -SPECIAL EXCEPTION FOR SMALL BUSINESSES AND EDUCATIONAL INSTITUTIONS: -As a special exception to the AGPLv3, the copyright holders of FormicOS grant you permission to modify the software and interact with it remotely through a computer network (e.g., as a SaaS) WITHOUT the obligation to distribute the corresponding source code under Section 13, PROVIDED THAT you meet at least one of the following criteria: +This software is licensed under the GNU Affero General Public License +version 3 ("AGPLv3"), with the following Additional Permissions and +Alternative Commercial License granted under Section 7 of the AGPLv3. -1. Educational / Non-Profit: You are using the software exclusively for academic research, teaching at a recognized educational institution, or operating as a registered non-profit organization (e.g., 501(c)(3)). -2. Small Business Commercial Use: Your organization (including any corporate affiliates) has a Gross Annual Revenue of less than $1,000,000 USD (or local equivalent) in the trailing 12 months. +IMPORTANT: This preamble is a legal instrument. Have it reviewed by +qualified legal counsel before relying on it for commercial use. -If your organization exceeds the $1,000,000 USD revenue threshold, this exception automatically terminates. At that point, you must either fully comply with the source-sharing requirements of Section 13 of the AGPLv3, or obtain a separate Commercial License from the copyright holder. -COMMERCIAL LICENSING: -If you do not meet the criteria for the exception above and wish to use FormicOS in a closed-source commercial environment without complying with the AGPLv3 network source-sharing requirements, please contact the repository owner for a Commercial License. +DEFINITIONS +----------- + +"FormicOS" means the software in this repository, including its +runtime engine, adapters, surface layer, frontend, configuration +files, and documentation. + +"You" means the individual or Legal Entity exercising permissions +granted by this License. + +"Legal Entity" means the union of the acting entity and all other +entities that control, are controlled by, or are under common +control with that entity. "Control" means (a) the power to direct +the management of such entity, or (b) ownership of fifty percent +(50%) or more of the outstanding shares or beneficial ownership of +such entity. + +"Gross Annual Revenue" means the total revenue of the Legal Entity +and its affiliates in the trailing twelve (12) calendar months, +calculated in accordance with generally accepted accounting +principles. Revenue from grants, donations, and tuition does not +count toward this threshold for nonprofit and educational entities. + +"Total Tokens" means the aggregate count of input tokens, output +tokens, and reasoning tokens processed through the FormicOS +orchestration runtime during a Billing Period, as recorded by the +usage metering system described in METERING.md. Cache-read tokens +are a subset of input tokens (they are input tokens served from +provider cache) and are NOT counted separately -- they are already +included in the input token count. Total Tokens includes tokens +processed by both local inference servers and cloud API providers. +Total Tokens does NOT include tokens consumed by systems external +to FormicOS that are not orchestrated by the FormicOS runtime. + +"Billing Period" means one calendar month. + +"Usage Attestation" means a cryptographically signed report of +Total Tokens for a Billing Period, produced by the FormicOS usage +metering system as described in METERING.md. + +"Contributor" means any individual or Legal Entity that has +contributed code to the FormicOS repository and whose contribution +has been merged into the main branch, subject to the terms of the +FormicOS Contributor License Agreement. + + +TIER 1: FREE USE (ADDITIONAL PERMISSION UNDER AGPLV3 SECTION 7) +---------------------------------------------------------------- + +As an additional permission under Section 7 of the AGPLv3, the +copyright holders grant You permission to use, modify, and deploy +FormicOS -- including interaction over a computer network -- WITHOUT +the obligation to distribute corresponding source code under +Section 13 of the AGPLv3, PROVIDED THAT You meet at least one of +the following criteria: + + (a) SMALL BUSINESS: Your Legal Entity has Gross Annual Revenue of + less than One Million United States Dollars (USD $1,000,000) + in the trailing twelve (12) months. + + (b) NONPROFIT: You are using FormicOS exclusively for the + operations of a registered nonprofit organization (e.g., US + IRC Section 501(c)(3) or equivalent foreign designation). + + (c) EDUCATION: You are using FormicOS exclusively for academic + research or teaching at an accredited educational institution. + + (d) PERSONAL: You are an individual using FormicOS for personal, + non-commercial purposes. + +This permission applies to the complete FormicOS software without +restriction on features, scale, or deployment topology. There are +no metering obligations, no reporting requirements, and no fees +under Tier 1. + +AUTOMATIC TERMINATION: If Your Legal Entity's Gross Annual Revenue +exceeds USD $1,000,000, this Tier 1 permission terminates +automatically at the end of the calendar quarter in which the +threshold is exceeded. You then have ninety (90) days to either: + + (i) comply fully with Section 13 of the AGPLv3 (open-source + your modifications and provide source to network users), OR + + (ii) obtain a Tier 2 Commercial License as described below. + +During the 90-day grace period, You may continue using FormicOS +under Tier 1 terms. + + +TIER 2: METERED COMMERCIAL LICENSE (ALTERNATIVE TO AGPLV3 SECTION 13) +--------------------------------------------------------------------- + +Organizations that exceed the Tier 1 revenue threshold and wish to +use FormicOS without complying with AGPLv3 Section 13 may obtain a +Commercial License. The Commercial License grants You a non- +exclusive, non-transferable, worldwide license to use, modify, and +deploy FormicOS in proprietary environments without source- +disclosure obligations. + +### Pricing Formula + +The monthly license fee in United States Dollars is computed as: + + monthly_fee = 2.00 * sqrt(T) + +where: + + T = Total Tokens for the Billing Period, divided by 1,000,000 + (i.e., T is measured in millions of tokens) + + sqrt = the principal (positive) square root function + +The fee is rounded to the nearest cent (two decimal places). + +### Reference Implementation (canonical) + + import math + + def formicos_monthly_fee(total_tokens: int) -> float: + """Compute the FormicOS monthly commercial license fee. + + Args: + total_tokens: Total tokens processed in the billing + period (input + output + reasoning). + Cache-read tokens are already included + in the input token count and must not + be added separately. + + Returns: + Fee in USD, rounded to two decimal places. + """ + tokens_millions = total_tokens / 1_000_000 + return round(2.00 * math.sqrt(tokens_millions), 2) + +### Worked Examples + + Total Tokens T (millions) Monthly Fee + -------------------------------------------------- + 10,000,000 10 $ 6.32 + 66,000,000 66 $ 16.25 + 200,000,000 200 $ 28.28 + 500,000,000 500 $ 44.72 + 1,000,000,000 1,000 $ 63.25 + 3,300,000,000 3,300 $114.89 + 10,000,000,000 10,000 $200.00 + +### What the formula means in plain language + +The square root function means: if you process 100 times more +tokens, you pay 10 times more. Marginal cost decreases with scale. +A solo developer processing 66M tokens/month pays $16.25. A large +team processing 3.3B tokens/month pays $114.89. There are no per- +seat fees, no per-machine fees, and no per-install fees. The token +volume is the sole measure of usage regardless of how many humans, +agents, machines, or API calls generate it. + +### Metering Obligations + +Tier 2 licensees must: + + (a) Maintain the FormicOS usage metering system in operational + condition. The metering system is an integral component of the + licensed software. Disabling, circumventing, or materially + modifying the metering system constitutes a modification of + the software that, if deployed over a network, triggers the + source-disclosure obligations of AGPLv3 Section 13. + + (b) Submit a Usage Attestation within fifteen (15) days after the + end of each Billing Period. + + (c) Pay the computed fee within thirty (30) days of attestation + submission. + + (d) Retain event store records for a minimum of twelve (12) + months for audit purposes. + +### Audit Rights + +The copyright holder may, no more than once per calendar year and +upon thirty (30) days written notice, request an audit of Your +token usage records. The audit is limited to aggregate token counts +per Billing Period and does not include access to prompts, model +outputs, or other content processed by FormicOS. If an audit +reveals underreporting of more than ten percent (10%) for any +Billing Period, You are responsible for the underpayment plus the +reasonable cost of the audit. + +### Payment Terms + +Payment terms, methods, currencies, invoicing, and dispute +resolution are governed by the FormicOS Commercial Terms, a +separate instrument available at COMMERCIAL_TERMS.md. In the event +of conflict between this License and the Commercial Terms, this +License controls on matters of fee computation and metering; the +Commercial Terms control on matters of payment mechanics and +dispute resolution. + + +TIER 3: ENTERPRISE AGREEMENT +----------------------------- + +Organizations seeking predictable budgeting, custom terms, service- +level agreements, or support may negotiate an Enterprise Agreement +with the copyright holder. Enterprise Agreements use the Tier 2 +pricing formula as a floor: the negotiated annual fee must equal or +exceed the fee that would result from applying the Tier 2 formula +to the licensee's projected annual token volume. + +Contact the copyright holder for Enterprise Agreement terms. + + +CONTRIBUTOR REVENUE SHARE +-------------------------- + +Twenty percent (20%) of all Tier 2 and Tier 3 Commercial License +revenue is allocated to the Contributor Revenue Pool. + +ACTIVATION THRESHOLD: The Contributor Revenue Pool activates when +cumulative Tier 2 and Tier 3 revenue exceeds USD $5,000 in a +calendar quarter. Below this threshold, all revenue is retained by +the copyright holder. Revenue below the threshold does not +accumulate toward future quarters. + +The Pool is distributed quarterly to Contributors proportional to +their contribution weight. The contribution-weight formula, +maintainer-floor rule, eligibility requirements, payout mechanics, +and program governance are defined in the FormicOS Contributor +License Agreement (CLA.md). Changes to the attribution algorithm +require an Architecture Decision Record and thirty (30) days +notice to Contributors. + +The copyright holder publishes an attribution report with each +distribution showing the computation inputs and per-contributor +shares. + + +SCOPE AND INTERACTION WITH AGPLV3 +---------------------------------- + +This preamble constitutes Additional Permissions under AGPLv3 +Section 7. The AGPLv3 remains the base license for all purposes not +addressed by these Additional Permissions. + +Per AGPLv3 Section 7, paragraph 4: anyone who redistributes +FormicOS may remove these Additional Permissions from their copy. +A redistributed copy without these permissions is governed solely +by the AGPLv3, including the full source-disclosure requirements of +Section 13. The Tier 2 pricing formula and Contributor Revenue +Share apply only to copies that retain these Additional Permissions +as granted by the original copyright holder. + +Nothing in this preamble restricts any rights granted by the AGPLv3 +itself. You may always choose to comply with the full AGPLv3 +(including Section 13) instead of obtaining a Commercial License. + +The usage metering system is an integral part of the FormicOS +software distributed under the AGPLv3. It is not a separate work +and is subject to the same license terms as the rest of FormicOS. + + +DISCLAIMER +---------- + +THIS LICENSE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS." THE +PRICING FORMULA, METERING SYSTEM, AND CONTRIBUTOR REVENUE SHARE +ARE OFFERED WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDERS ARE +NOT PROVIDING LEGAL, TAX, OR FINANCIAL ADVICE. LICENSEES AND +CONTRIBUTORS SHOULD CONSULT QUALIFIED PROFESSIONALS REGARDING +THEIR SPECIFIC CIRCUMSTANCES. + ---------------------------------------------------------------------- GNU AFFERO GENERAL PUBLIC LICENSE diff --git a/METERING.md b/METERING.md new file mode 100644 index 0000000..a05a753 --- /dev/null +++ b/METERING.md @@ -0,0 +1,315 @@ +# FormicOS Usage Metering System + +Technical specification for the token metering and cryptographic +attestation system referenced by the FormicOS License Agreement. + +This document is normative. The metering system described here is +the canonical method for computing Total Tokens and producing Usage +Attestations under Tier 2 and Tier 3 Commercial Licenses. + + +## What is metered + +**Total Tokens** is the sum of all input tokens, output tokens, and +reasoning tokens processed through the FormicOS orchestration +runtime during a Billing Period (one calendar month). + +Total Tokens = input_tokens + output_tokens + reasoning_tokens + +This includes tokens processed by both local inference servers and +cloud API providers. + +### Cache-read token accounting + +Cache-read tokens are a SUBSET of input tokens. When a provider +serves a cached response, the tokens that hit the cache are still +input tokens -- they are simply served at a lower cost to the +licensee by the provider. For metering purposes, cache-read tokens +are already counted within input_tokens. They are NOT added +separately to Total Tokens. + +The attestation schema includes a cache_read_tokens field for +informational transparency only. This field shows what portion of +input_tokens were served from cache. It does not affect the Total +Tokens computation or the fee. + +Example: if a Billing Period has input_tokens=42M, output_tokens= +18M, reasoning_tokens=6M, and cache_read_tokens=12M, then Total +Tokens = 42M + 18M + 6M = 66M. The 12M cache-read tokens are +already inside the 42M input_tokens figure. + +### What is NOT metered + +- Tokens consumed by systems external to FormicOS +- Tokens used by the operator's own scripts or tools outside the + FormicOS runtime +- Embedding tokens used for vector search (these are not LLM + generation tokens) +- Tokens consumed during FormicOS development, testing, or CI runs + where the metering system is not active + + +## Data source + +FormicOS emits `TokensConsumed` events for every LLM call made +through the orchestration runtime. Each event contains: + + { + "type": "TokensConsumed", + "seq": 12345, + "timestamp": "2026-03-25T14:30:00Z", + "address": "workspace/thread/colony/round/turn", + "input_tokens": 1500, + "output_tokens": 800, + "reasoning_tokens": 0, + "cache_read_tokens": 500, + "model": "qwen3-30b-a3b", + "provider": "local", + "cost_usd": 0.0, + "agent_id": "coder_0" + } + +Note: cache_read_tokens in the event is informational. The +input_tokens field already includes any tokens served from cache. + +These events are stored in the append-only event store (SQLite WAL +by default). The event store is the source of truth for all token +counts. Events are sequentially numbered and immutable once written. + + +## Attestation production + +At the end of each Billing Period (or on demand), the metering +module produces a Usage Attestation -- a JSON document containing +aggregate token counts and a cryptographic signature. + +### Attestation schema + + { + "version": 1, + "license_id": "lic-a1b2c3d4", + "period_start": "2026-03-01T00:00:00Z", + "period_end": "2026-03-31T23:59:59Z", + "total_tokens": 66000000, + "breakdown": { + "input_tokens": 42000000, + "output_tokens": 18000000, + "reasoning_tokens": 6000000, + "cache_read_tokens": 12000000 + }, + "by_provider": { + "local": 54000000, + "anthropic": 8000000, + "openai": 4000000 + }, + "event_count": 8432, + "first_event_seq": 100001, + "last_event_seq": 108432, + "chain_hash": "a1b2c3...64hex", + "computed_fee_usd": 16.25, + "signature": "ed25519...128hex" + } + +### Fields + +- `version`: schema version (currently 1) +- `license_id`: unique identifier for this Commercial License +- `period_start`, `period_end`: Billing Period boundaries (UTC) +- `total_tokens`: input_tokens + output_tokens + reasoning_tokens + across all events in the period. This is T (in raw tokens) used + in the pricing formula. Note: this equals the sum of the three + non-cache fields in breakdown. cache_read_tokens is informational + and does not contribute to total_tokens. +- `breakdown`: per-category token counts for transparency. + cache_read_tokens shows what portion of input_tokens came from + cache. It is a subset, not an addition. +- `by_provider`: per-provider token counts for transparency. These + are informational and do not affect the fee computation. +- `event_count`: number of TokensConsumed events in the period +- `first_event_seq`, `last_event_seq`: event sequence range, + enabling audit continuity between periods +- `chain_hash`: SHA-256 hash of the concatenation of all event + payloads in sequence order. Supports event-store integrity checks + and audit reconciliation. +- `computed_fee_usd`: the fee computed by applying the pricing + formula to total_tokens. Informational -- the formula in the + LICENSE is canonical. +- `signature`: Ed25519 signature over the canonical JSON encoding + of all fields except `signature` itself. + + +## Cryptographic integrity + +### Key derivation + +The Ed25519 signing key is derived from the license key at +activation: + + import hashlib + from nacl.signing import SigningKey + + def derive_signing_key(license_key: str) -> SigningKey: + seed = hashlib.sha256( + f"formicos-metering-v1:{license_key}".encode() + ).digest() + return SigningKey(seed) + +The corresponding verify key is registered with the copyright +holder at license activation. The licensee retains the signing key. +The copyright holder can verify attestation authenticity without +possessing the signing key. + +### Signing process + + import json + + def sign_attestation(attestation: dict, key: SigningKey) -> str: + # Canonical JSON: sorted keys, no whitespace + payload = json.dumps( + {k: v for k, v in attestation.items() if k != "signature"}, + sort_keys=True, + separators=(",", ":"), + ).encode("utf-8") + return key.sign(payload).signature.hex() + +### Chain hash computation + +The chain hash provides a cryptographic consistency check for the +underlying event store. It is computed by hashing each +TokensConsumed event's payload in sequence order: + + import hashlib + + def compute_chain_hash(events: list[dict]) -> str: + h = hashlib.sha256() + for event in sorted(events, key=lambda e: e["seq"]): + payload = json.dumps( + event, sort_keys=True, separators=(",", ":") + ).encode("utf-8") + h.update(payload) + return h.hexdigest() + +If events are inserted, deleted, or modified after the hash is +computed, the chain hash will not match. The copyright holder can +request the raw events during an audit to verify the chain hash +independently. + +This mechanism is audit-friendly, but it is not by itself an +external anti-fraud anchor because the licensee controls the local +event store and signing environment. Stronger tamper-evidence would +require an external receipt log, transparency service, or other +independent anchoring layer. + + +## CLI interface + +The metering module provides a command-line interface: + + # View current period usage + formicos billing status + + # Generate attestation for a completed period + formicos billing attest --period 2026-03 + + # Submit attestation to billing endpoint + formicos billing submit --period 2026-03 + + # View historical attestations + formicos billing history + + # Dry run: compute fee without generating attestation + formicos billing estimate + + # Validate the metering pipeline end-to-end + formicos billing self-test + +### Self-test + +The `self-test` command validates the entire metering pipeline +without generating a real attestation or submitting anything. It: + + 1. Queries the event store for TokensConsumed events in the + current period + 2. Computes aggregate token counts + 3. Computes the chain hash over the event sequence + 4. Generates a test attestation (marked version: "test") + 5. Signs the test attestation with the derived key + 6. Verifies the signature + 7. Computes the fee using the canonical formula + 8. Reports any configuration issues (missing license key, + unreachable billing endpoint, event store gaps) + +This enables Tier 2 licensees to verify their metering works before +the first real billing period. A successful self-test output: + + FormicOS Billing Self-Test + -------------------------- + Event store: OK (8432 TokensConsumed events this period) + Token counts: OK (total: 66,000,000) + Chain hash: OK (a1b2c3...64hex) + Key derivation: OK (verify key: ed25519:...) + Signature: OK (round-trip verified) + Computed fee: $16.25 + Billing endpoint: OK (https://billing.formicos.dev reachable) + -------------------------- + All checks passed. Ready for attestation. + + +## Offline operation + +FormicOS is a local-first system. The metering module does not +require network connectivity during normal operation. Token counts +accumulate in the local event store. Attestations are generated +locally. Submission is a separate step that requires connectivity +to the billing endpoint. + +If the billing endpoint is unreachable, the attestation is saved +locally and can be submitted later. The 15-day submission window +(per the License) provides buffer for connectivity issues. + + +## Transparency + +The metering module's source code is part of the AGPLv3-licensed +FormicOS distribution. Any licensee can inspect, audit, and verify +the token-counting logic. The attestation schema, signing process, +and chain hash computation are fully specified in this document. + +The copyright holder publishes the verification key and attestation +validation tool, allowing any party to verify that a given +attestation was produced with a registered verify key and has not +been modified after signing. This does not, by itself, prove that +the local event history was not rewritten before the attestation was +generated. + + +## AGPL interaction + +The metering module is distributed as an integral component of +FormicOS under the AGPLv3. It is not a separate work. + +Disabling, removing, or materially modifying the metering module +constitutes a modification of the software. If the modified version +is deployed over a network (triggering AGPLv3 Section 13), the +modifier must make the complete corresponding source code -- +including the modifications to the metering module -- available to +all network users. + +This does not prevent modification. It ensures that modifications +to the metering system are publicly visible and subject to the same +open-source obligations as any other FormicOS modification. + + +## Future: execution-weighted contribution attribution + +A future version of the metering system may incorporate runtime +execution profiling to weight contributor revenue shares by code +execution frequency. This would use statistical sampling (e.g., +py-spy at 100 Hz for 5-minute windows) to record which source +files are on the call stack during token processing, then correlate +with git blame authorship. + +This capability is documented here for completeness. It is not +active in the current version. Activation will be announced via an +Architecture Decision Record and thirty (30) days notice to +Contributors per the License Agreement. diff --git a/README.md b/README.md index 47a0d12..370faa5 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,26 @@ # FormicOS -**Your AI agents plan in parallel, extract knowledge, and maintain themselves — while you watch.** +**Your AI agents plan in parallel, extract knowledge, and maintain themselves -- while you watch.** -FormicOS is a stigmergic multi-agent colony framework where an operator directs a Queen LLM that decomposes goals, spawns specialized worker colonies, and coordinates them through shared environmental signals (pheromones) — not direct messaging. Every action is an event. Every decision is explained. The system is local-first, event-sourced, and self-maintaining. +FormicOS is a stigmergic multi-agent colony framework where an operator directs a Queen LLM that decomposes goals, spawns specialized worker colonies, and coordinates them through shared environmental signals (pheromones) -- not direct messaging. Every action is an event. Every decision is explained. The system is local-first, event-sourced, and self-maintaining. -> **Try the demo:** Launch FormicOS, click **Try the Demo** on the Queen landing page, and watch the system detect a knowledge contradiction, plan a task in parallel, execute colonies, extract knowledge, and resolve the contradiction — all autonomously. +FormicOS is also an MCP server. Connect Claude Code or any MCP client, and the Queen's institutional memory, strategic delegation, and autonomous background work become part of your development workflow. + +> **Try the demo:** Launch FormicOS, click **Try the Demo** on the Queen landing page, and watch the system detect a knowledge contradiction, plan a task in parallel, execute colonies, extract knowledge, and resolve the contradiction -- all autonomously. ## What makes it different -- **Plans work in parallel and shows you why** — The Queen decomposes tasks into a DAG of parallel groups. You see colonies execute side-by-side with live status, cost accumulation, and dependency arrows. The Queen's reasoning is always accessible. +- **Plans work in parallel and shows you why** -- The Queen decomposes tasks into a DAG of parallel groups. You see colonies execute side-by-side with live status, cost accumulation, and dependency arrows. The Queen's reasoning is always accessible. + +- **Extracts and maintains institutional knowledge** -- Colonies produce knowledge entries with Bayesian confidence posteriors, hierarchical domains, provenance chains, and 7-signal composite retrieval scoring including Personalized PageRank. Knowledge improves with use, decays when stale, and gets distilled into higher-order entries. The operator can review, confirm, edit, or invalidate entries through the Operations inbox. -- **Extracts and maintains institutional knowledge** — Colonies produce knowledge entries with Bayesian confidence posteriors, decay classes, and 6-signal composite retrieval scoring. Knowledge improves with use, decays when stale, and gets distilled into higher-order entries. +- **Detects problems and fixes them autonomously** -- Proactive intelligence surfaces contradictions, confidence decline, coverage gaps, and stale clusters. Self-maintenance dispatches colonies to investigate and resolve issues. Blast radius estimation gates autonomous dispatch. The operator sets autonomy levels and daily budgets; the system earns trust through a track record. -- **Detects problems and fixes them autonomously** — Proactive intelligence surfaces contradictions, confidence decline, coverage gaps, and stale clusters. Self-maintenance dispatches colonies to investigate and resolve issues without operator intervention. +- **Explains every decision** -- Retrieval scoring shows per-signal breakdowns. Colony outcomes track cost, quality, and knowledge extraction. The Queen references outcomes as recommendations, not opaque overrides. -- **Explains every decision** — Retrieval scoring shows per-signal breakdowns. Colony outcomes track cost, quality, and knowledge extraction. The Queen references outcomes as recommendations, not opaque overrides. +- **Operates across sessions and idle time** -- The Queen maintains a journal, follows operating procedures, and continues work on pending milestones when the operator is away. An operational sweep runs every 30 minutes, queuing and executing work within guardrails. The action queue captures every proposal, execution, and rejection for full audit. + +- **Bridges to your editor** -- FormicOS is an MCP server with 27 tools, 9 resources, and 6 prompts. Run `python -m formicos init-mcp` to connect Claude Code. Search institutional memory, delegate tasks, review autonomous work, and record discoveries -- all from your editor. ## Quick Start @@ -46,7 +52,21 @@ When the app is ready: 3. Click **Try the Demo** to create a pre-seeded workspace and see FormicOS in action 4. Or describe a task to the Queen, spawn colonies, and explore the Knowledge view -Startup verification: +### Connect Claude Code (optional) + +```bash +python -m formicos init-mcp +# Generates .mcp.json for Claude Code + .formicos/DEVELOPER_QUICKSTART.md +# Restart Claude Code to connect +``` + +Once connected, try these from Claude Code: +- `morning-status` -- what happened, what's pending, project plan status +- `delegate-task` -- hand off work to a colony +- `knowledge-for-context` -- search institutional memory +- `log-finding` -- record a discovery + +### Startup verification ```bash docker compose ps @@ -58,9 +78,6 @@ curl http://localhost:6333/collections ## Architecture -Current repo state: the core event contract is a closed 69-event union. -Wave 64 added addon system events (66 -> 69). - Four layers with strict inward dependency, enforced by CI: ``` @@ -73,52 +90,66 @@ Four layers with strict inward dependency, enforced by CI: Core types, events, ports imports nothing ``` -- **Core** — closed 69-event Pydantic union, shared types, CRDTs, ports, and knowledge/federation contracts -- **Engine** — colony execution, context assembly, tool loop, stigmergic + sequential strategies, optimistic file locking -- **Adapters** — SQLite event store, Qdrant-backed knowledge search, knowledge graph adapter, federation transport, sandbox, multi-provider LLM bindings (OpenAI-compatible, Anthropic, Gemini) with per-endpoint concurrency -- **Surface** — Starlette app, MCP/HTTP/WS/AG-UI/A2A surfaces, Queen runtime/tools (36 built-in), projections, maintenance services, addon loader, trigger dispatch, and operator wiring +- **Core** -- closed 69-event Pydantic union, shared types, CRDTs, ports, and knowledge/federation contracts +- **Engine** -- colony execution, context assembly, tool loop, stigmergic + sequential strategies, optimistic file locking +- **Adapters** -- SQLite event store, Qdrant-backed knowledge search, knowledge graph adapter, federation transport, sandbox, multi-provider LLM bindings (OpenAI-compatible, Anthropic, Gemini) with per-endpoint concurrency +- **Surface** -- Starlette app, MCP/HTTP/WS/AG-UI/A2A surfaces, Queen runtime with 43 tools, projections, maintenance services, addon loader, trigger dispatch, operational state (journal, procedures, action queue), and operator wiring + +The frontend is a Lit component shell with 8 tabs (Queen, Knowledge, Workspace, Operations, Addons, Playbook, Models, Settings) driven by WebSocket state snapshots and replay-safe projections. + +### MCP developer bridge + +FormicOS is a FastMCP 3.0 server at `/mcp` with: -The frontend is a Lit component shell driven by WebSocket state snapshots, promoted events, and replay-safe projections. +- **27 MCP tools** -- colony management, knowledge search, addon control, approvals, service queries, configuration, and developer workflows (log_finding, handoff_to_formicos) +- **9 MCP resources** -- knowledge catalog, thread/colony detail, project plan, operating procedures, journal, briefing +- **6 MCP prompts** -- morning-status, delegate-task, review-overnight-work, knowledge-for-context, plus colony-task and review-knowledge +- **PromptsAsTools + ResourcesAsTools transforms** -- every prompt and resource is also callable as a tool -Persistence is event-sourced: a single SQLite file is the source of truth. On startup, events replay into in-memory projections. Crash-recoverable by design. +### Persistence + +Event-sourced: a single SQLite file is the source of truth. On startup, events replay into in-memory projections. Crash-recoverable by design. + +Operational state (journal, procedures, action queue) is file-backed under `.formicos/operations/`. Project plans live at `.formicos/project_plan.md`. These are workspace-scoped files the operator can read and edit directly. ## Key Concepts -**Workspaces, Threads, Colonies, Rounds** — the data model is a tree. A workspace contains threads. A thread contains colonies. A colony runs rounds. Each round executes the 5-phase loop across all agents. +**Workspaces, Threads, Colonies, Rounds** -- the data model is a tree. A workspace contains threads. A thread contains colonies. A colony runs rounds. Each round executes the 5-phase loop across all agents. -**The Queen** — the operator-facing LLM agent. The operator chats with the Queen, who decomposes goals and spawns colonies. Each thread has its own Queen conversation. +**The Queen** -- the operator-facing LLM agent with 43 tools. The operator chats with the Queen, who decomposes goals and spawns colonies. Each thread has its own Queen conversation. The Queen maintains a journal, follows operating procedures, checks blast radius before autonomous dispatch, and earns trust through a graduated autonomy score. -**Stigmergic Routing** — in stigmergic mode, agents are connected by a weighted topology graph. Pheromone weights evolve each round based on output quality (cosine similarity). High-performing paths get reinforced; low-performing paths decay. The `sequential` strategy is a simpler fallback. +**Stigmergic Routing** -- in stigmergic mode, agents are connected by a weighted topology graph. Pheromone weights evolve each round based on output quality (cosine similarity). High-performing paths get reinforced; low-performing paths decay. The `sequential` strategy is a simpler fallback. -**Merge / Prune / Broadcast** — operator controls for inter-colony information flow. Merge creates a directed edge between colonies. Prune removes it. Broadcast copies a colony's compressed output to all colonies in a thread. +**Knowledge System** -- Bayesian confidence posteriors (`Beta(alpha, beta)`) with Thompson Sampling retrieval. 7-signal composite scoring (semantic, thompson, freshness, status, thread, co-occurrence, graph proximity). Hierarchical domains with materialized paths. Provenance chains tracking every mutation. Personalized PageRank for graph-augmented retrieval. Outcome-weighted reinforcement with geometric credit. Knowledge review flow surfaces problematic entries for operator confirmation. -**Model Cascade** — model assignment follows a nullable cascade: thread override > workspace override > system default. Change the model for one workspace without affecting others. +**Operational Loop** -- a 30-minute operational sweep detects opportunities, queues actions (maintenance, continuation, knowledge review, workflow templates, procedure suggestions), and executes within autonomy guardrails. The operator reviews pending actions in the Operations inbox. Blast radius estimation and daily budget caps gate autonomous dispatch. -**Protocol surfaces** — MCP remains the primary external tool surface, while HTTP, WebSocket, AG-UI, and A2A expose the same event-sourced system from different integration angles. +**Model Cascade** -- model assignment follows a nullable cascade: thread override > workspace override > system default. Change the model for one workspace without affecting others. + +**Protocol surfaces** -- MCP remains the primary external tool surface, while HTTP, WebSocket, AG-UI, and A2A expose the same event-sourced system from different integration angles. ## Project Status FormicOS currently ships with: - [x] Event-sourced persistence with replay-safe projections and a closed 69-event contract -- [x] Unified knowledge system with Bayesian confidence, gamma decay, co-occurrence, thread scoping, transcript harvest, outcome-weighted reinforcement, admission scoring, and bi-temporal surfacing -- [x] Proactive intelligence, maintenance policies, deterministic self-maintenance services, and configuration recommendations grounded in outcome history -- [x] Queen parallel planning via `spawn_parallel`, workflow threads/steps, operator directives, and colony audit surfaces -- [x] Queen autonomous agency: 36 built-in tools including batch_command, summarize_thread, draft_document, retry_colony, and MCP-aware chaining guidance -- [x] Addon system: YAML manifest discovery, tool/handler/trigger registration, 4 built-in addons (codebase-index, git-control, proactive-intelligence, hello-world) +- [x] Unified knowledge system with Bayesian confidence, gamma decay, co-occurrence, hierarchical domains, provenance chains, PPR retrieval, outcome-weighted reinforcement, admission scoring, and knowledge review governance +- [x] Proactive intelligence (17 deterministic rules), maintenance policies, blast radius estimation, graduated autonomy scoring, and self-maintenance dispatch with daily budget caps +- [x] Queen parallel planning via `spawn_parallel`, workflow threads/steps, project-level milestones, operator directives, and colony audit surfaces +- [x] Queen autonomous agency: 43 built-in tools including batch_command, summarize_thread, draft_document, retry_colony, project milestone management, autonomy budget checking, and MCP-aware chaining guidance +- [x] Operational coherence: Queen journal, operating procedures, durable action queue (JSONL), 30-minute operational sweeps, operations coordinator with continuation candidates, and a dedicated Operations tab with inbox/journal/procedures/summary +- [x] Knowledge governance: review scanning (outcome-correlated, contradictions, stale authority, unconfirmed entries), operator confirm/edit/invalidate flow, knowledge health dashboard +- [x] Autonomous continuation: cross-session warm start proposals, idle-time execution with 5 guard rails, workflow pattern recognition, operating procedure auto-suggestions +- [x] MCP developer bridge: 27 tools, 9 resources, 6 prompts, `init-mcp` CLI for Claude Code integration, prose-formatted resources for context injection +- [x] Addon system: YAML manifest discovery, tool/handler/trigger registration, config editing, enable/disable toggle, 6 built-in addons (codebase-index, docs-index, git-control, mcp-bridge, proactive-intelligence, hello-world) - [x] Multi-provider parallel execution: per-endpoint adapter factory, per-model concurrency control, heuristic cloud routing, optimistic file locking for concurrent agents - [x] Reasoning and cache token accounting through the full pipeline (adapters to dashboard) - [x] Federated knowledge exchange via Computational CRDTs, Bayesian peer trust hardening, and truthful A2A / Agent Card protocol surfaces -- [x] Local-first inference plus cloud fallback, sandboxed code execution, NemoClaw-compatible external specialists, and operator steering -- [x] Unified operator surfaces for colonies, knowledge, workflow, explainable retrieval, and local-first knowledge overlays +- [x] Local-first inference plus cloud fallback, sandboxed code execution, and operator steering +- [x] Unified operator surfaces: Queen overview, Knowledge browser with search and health, Workspace browser, Operations inbox with approve/reject, Addons with interactive config/trigger/toggle, Playbook with templates, Models admin with add/hide, Settings with writable governance controls - [x] Colony outcome metrics, escalation reporting, validator-aware completion states, and replay-derived history views -- [x] Guided demo path with pre-seeded workspace and contradiction-driven maintenance walkthrough -- [x] DAG visualization with live status, cost accumulation, and knowledge annotations - [x] Sequential task runner with locked experiment conditions for compounding measurement -- [x] Static workspace analysis and structural topology prior for knowledge-informed routing -- [x] Adaptive evaporation with bounded stagnation-responsive pheromone control -- [x] Contradiction resolution with classification-aware conflict handling -- [x] Web foraging with reactive and proactive gap detection, egress-controlled fetch, source-credibility-aware admission, content quality scoring, and domain strategy memory +- [x] Adaptive evaporation, web foraging with egress control, and contradiction resolution ## Development @@ -141,6 +172,9 @@ cd frontend && npm run dev # frontend dev server with HMR # Build frontend cd frontend && npm run build + +# Connect Claude Code +python -m formicos init-mcp # generates .mcp.json ``` ## Documentation @@ -149,20 +183,45 @@ cd frontend && npm run build |----------|---------| | [CLAUDE.md](CLAUDE.md) | Project context and rules (loaded by AI agents automatically) | | [AGENTS.md](AGENTS.md) | File ownership and coordination rules for parallel agents | +| [GOVERNANCE.md](GOVERNANCE.md) | Maintainer authority, contribution flow, and project governance | +| [CLA.md](CLA.md) | Contributor license agreement and revenue-share terms | +| [CORPORATE_CLA.md](CORPORATE_CLA.md) | Corporate contributor agreement for employer-authorized contributions | +| [docs/CONTRIBUTOR_PAYOUT_OPS.md](docs/CONTRIBUTOR_PAYOUT_OPS.md) | Revenue-share payout operations (tax, payments, timing) | +| [docs/A2A_ECONOMICS.md](docs/A2A_ECONOMICS.md) | Machine-readable contracts and receipts for A2A agent participation | | [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) | Deployment guide: clone to running stack | +| [docs/AUTONOMOUS_OPERATIONS.md](docs/AUTONOMOUS_OPERATIONS.md) | Autonomy operator runbook: levels, budgets, action queue, learning | +| [docs/DEVELOPER_BRIDGE.md](docs/DEVELOPER_BRIDGE.md) | Developer onboarding guide for Claude Code MCP integration | | [CHANGELOG.md](CHANGELOG.md) | Narrative development history | | [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Architecture overview, event flow, data model | | [docs/RUNBOOK.md](docs/RUNBOOK.md) | Hardware requirements, operations, troubleshooting | | [docs/LOCAL_FIRST_QUICKSTART.md](docs/LOCAL_FIRST_QUICKSTART.md) | Detailed local setup and first interaction walkthrough | | [CONTRIBUTING.md](CONTRIBUTING.md) | Developer guide: setup, testing, adding features | -| [docs/decisions/](docs/decisions/) | Architecture Decision Records | +| [docs/KNOWLEDGE_LIFECYCLE.md](docs/KNOWLEDGE_LIFECYCLE.md) | Knowledge system operator runbook | +| [docs/decisions/](docs/decisions/) | Architecture Decision Records (ADR-001 through ADR-051) | | [docs/contracts/](docs/contracts/) | Frozen interface definitions (events, ports, types) | | [docs/specs/](docs/specs/) | Executable specifications and regression scenarios | -| [docs/waves/PROGRESS.md](docs/waves/PROGRESS.md) | Development progress log | +| [docs/waves/PROGRESS.md](docs/waves/PROGRESS.md) | Development progress log (Waves 1-73) | | [addons/README.md](addons/README.md) | Addon development guide | | [FINDINGS.md](FINDINGS.md) | What 59 waves of measurement proved | +| [METERING.md](METERING.md) | Token metering system specification | +| [COMMERCIAL_TERMS.md](COMMERCIAL_TERMS.md) | Commercial license payment terms | | [frontend/CHANGELOG.md](frontend/CHANGELOG.md) | Frontend component inventory and bundle stats | ## License -AGPLv3 with a small-business and educational exception. See [LICENSE](LICENSE) for details. +FormicOS is free software. The AGPLv3 base license guarantees your right to +use, study, modify, and share the complete system. Additional permissions +under Section 7 make this even broader: individuals, small businesses +(under $1M revenue), nonprofits, and educators can deploy FormicOS without +any source-disclosure obligations or fees. + +Organizations above $1M revenue choosing proprietary deployment can obtain +a commercial license with usage-based pricing (no per-seat or per-machine +fees). Twenty percent of commercial revenue is shared with contributors +proportional to their code contributions, creating a sustainable model +where improving the commons is also building a livelihood. + +See [LICENSE](LICENSE) for the full terms, +[COMMERCIAL_TERMS.md](COMMERCIAL_TERMS.md) for payment mechanics, +[METERING.md](METERING.md) for the token metering specification, and +[CLA.md](CLA.md) for the contributor agreement and revenue-share program. diff --git a/addons/README.md b/addons/README.md index a9e3042..b091e03 100644 --- a/addons/README.md +++ b/addons/README.md @@ -263,7 +263,9 @@ uv run pytest tests/unit/addons/test_my_addon.py -v | hello-world | `hello` | -- | -- | | proactive-intelligence | `query_briefing` | -- | -- | | codebase-index | `semantic_search_code`, `reindex_codebase` | -- | daily cron, manual | +| docs-index | `semantic_search_docs`, `reindex_docs` | -- | manual | | git-control | `git_smart_commit`, `git_branch_analysis`, `git_create_branch`, `git_stash` | `ColonyCompleted` (auto-stage) | -- | +| mcp-bridge | `discover_mcp_tools`, `call_mcp_tool` | -- | -- | ## Architecture diff --git a/addons/codebase-index/addon.yaml b/addons/codebase-index/addon.yaml index 70fcb0d..8543725 100644 --- a/addons/codebase-index/addon.yaml +++ b/addons/codebase-index/addon.yaml @@ -3,6 +3,18 @@ version: "1.0.0" description: "Semantic code search via embedding index" author: "formicos-core" +content_kinds: + - source_code +path_globs: + - "**/*.py" + - "**/*.js" + - "**/*.ts" + - "**/*.tsx" + - "**/*.go" + - "**/*.rs" + - "**/*.java" +search_tool: semantic_search_code + tools: - name: semantic_search_code description: "Search codebase by meaning, not just text" @@ -32,9 +44,29 @@ tools: type: string description: "Optional list of relative file paths to reindex. Omit for full reindex." +config: + - key: chunk_size + type: integer + default: 500 + label: "Chunk size in characters for code splitting" + - key: skip_dirs + type: string + default: "__pycache__,.git,node_modules,.venv,venv" + label: "Comma-separated directories to skip during indexing" + +panels: + - target: knowledge + display_type: status_card + path: /status + handler: status.py::get_status + +routes: + - path: /status + handler: status.py::get_status + triggers: - type: cron schedule: "0 3 * * *" handler: indexer.py::on_scheduled_reindex - type: manual - handler: indexer.py::incremental_reindex + handler: search.py::handle_reindex diff --git a/addons/docs-index/addon.yaml b/addons/docs-index/addon.yaml new file mode 100644 index 0000000..181c13f --- /dev/null +++ b/addons/docs-index/addon.yaml @@ -0,0 +1,68 @@ +name: docs-index +version: "1.0.0" +description: "Semantic documentation search via embedding index" +author: "formicos-core" + +content_kinds: + - documentation +path_globs: + - "**/*.md" + - "**/*.rst" + - "**/*.txt" + - "**/*.html" +search_tool: semantic_search_docs + +tools: + - name: semantic_search_docs + description: "Search project documentation by meaning" + handler: search.py::handle_semantic_search + parameters: + type: object + properties: + query: + type: string + description: "Natural language search query" + top_k: + type: integer + description: "Number of results to return (default 10)" + file_pattern: + type: string + description: "Optional glob pattern to filter files (e.g. '*.md')" + + - name: reindex_docs + description: "Rebuild or incrementally update the semantic documentation index" + handler: search.py::handle_reindex + parameters: + type: object + properties: + changed_files: + type: array + items: + type: string + description: "Optional list of relative file paths to reindex. Omit for full reindex." + +# v1: extensions and skip dirs are hardcoded in indexer.py constants. +# Config wiring is planned for v2 when addon config injection is standardized. +config: + - key: doc_extensions + type: string + default: ".md,.rst,.txt,.html" + label: "Comma-separated file extensions to index (v1: hardcoded)" + - key: skip_dirs + type: string + default: "__pycache__,.git,node_modules,.venv,venv,dist,build" + label: "Comma-separated directories to skip during indexing (v1: hardcoded)" + +panels: + - target: knowledge + display_type: status_card + path: /status + handler: status.py::get_status + +routes: + - path: /status + handler: status.py::get_status + +triggers: + - type: manual + handler: search.py::handle_reindex diff --git a/addons/git-control/addon.yaml b/addons/git-control/addon.yaml index 7591162..d356d31 100644 --- a/addons/git-control/addon.yaml +++ b/addons/git-control/addon.yaml @@ -1,6 +1,10 @@ name: git-control version: "1.0.0" -description: "Git intelligence for the Queen" +description: > + Git intelligence for the Queen — smart_commit, branch_analysis, + create_branch, and stash_operations. If you have a git MCP server + connected via the mcp-bridge addon, these tools overlap and this + addon can be disabled. author: "formicos-core" tools: @@ -59,6 +63,22 @@ tools: type: string description: "Optional stash message (for 'save' action)" +panels: + - target: workspace + display_type: status_card + path: /status + handler: status.py::get_status + +routes: + - path: /status + handler: status.py::get_status + +config: + - key: git_auto_stage + type: boolean + default: true + label: "Auto-stage modified files after colony completion" + handlers: - event: ColonyCompleted handler: handlers.py::on_colony_completed_auto_stage diff --git a/addons/hello-world/addon.yaml b/addons/hello-world/addon.yaml index 48931ac..6912c56 100644 --- a/addons/hello-world/addon.yaml +++ b/addons/hello-world/addon.yaml @@ -1,5 +1,6 @@ name: hello-world version: "1.0.0" +hidden: true description: "Trivial test addon — validates the addon loader pipeline" author: "formicos-core" diff --git a/addons/mcp-bridge/addon.yaml b/addons/mcp-bridge/addon.yaml new file mode 100644 index 0000000..6d000c7 --- /dev/null +++ b/addons/mcp-bridge/addon.yaml @@ -0,0 +1,40 @@ +name: mcp-bridge +version: "1.0.0" +description: "Bridge remote MCP servers into FormicOS — discover and call external tools" +author: "formicos-core" + +tools: + - name: discover_mcp_tools + description: "Discover tools available on connected MCP servers" + handler: discovery.py::handle_discover_tools + parameters: + type: object + properties: + server: + type: string + description: "Optional server name to query (omit for all connected servers)" + + - name: call_mcp_tool + description: "Call a tool on a remote MCP server" + handler: discovery.py::handle_call_tool + parameters: + type: object + properties: + server: + type: string + description: "Name of the MCP server" + tool: + type: string + description: "Tool name to call" + arguments: + type: object + description: "Arguments to pass to the tool" + required: + - server + - tool + +config: + - key: mcp_servers + type: string + default: "[]" + label: "MCP server configurations (JSON array of {name, url})" diff --git a/addons/proactive-intelligence/addon.yaml b/addons/proactive-intelligence/addon.yaml index 7d43d3a..18efc5a 100644 --- a/addons/proactive-intelligence/addon.yaml +++ b/addons/proactive-intelligence/addon.yaml @@ -32,6 +32,12 @@ tools: type: string description: "Rule to enable/disable" +config: + - key: disabled_rules + type: string + default: "" + label: "Comma-separated rule names to disable" + triggers: - type: cron schedule: "*/30 * * * *" diff --git a/config/caste_recipes.yaml b/config/caste_recipes.yaml index 1309013..aefc9d2 100644 --- a/config/caste_recipes.yaml +++ b/config/caste_recipes.yaml @@ -23,18 +23,10 @@ castes: - Contradictions between high-confidence entries are detected. If you see a System Intelligence Briefing with contradictions, flag them to the operator. - ## Tools (36) - **Colony lifecycle:** spawn_colony, spawn_parallel, kill_colony, redirect_colony, escalate_colony, inspect_colony, get_status, retry_colony - **Planning:** propose_plan - **Direct work:** search_codebase, run_command, batch_command, edit_file, run_tests, delete_file - **Documents:** draft_document, summarize_thread - **Analytics:** query_outcomes, analyze_colony, query_briefing - **Templates:** list_templates, inspect_template - **Knowledge & files:** memory_search, read_workspace_files, write_workspace_file, read_colony_output - **Config & notes:** suggest_config_change, approve_config_change, queen_note - **Thread management:** set_thread_goal, define_workflow_steps, complete_thread, archive_thread - **Services:** query_service - **Addons:** list_addons, trigger_addon + {TOOL_INVENTORY} + + ## Source routing + Call list_addons to discover addon corpora. Route: source_code/documentation → addon search_tool, institutional memory → memory_search. Prefer narrowest path_globs. Workspace tags are soft routing hints. - memory_search supports tiered retrieval: detail="auto"|"summary"|"standard"|"full". Default auto starts cheap and escalates. Use detail="full" only when you @@ -200,7 +192,7 @@ castes: - Save operator preferences with queen_note so you remember them. temperature: 0.3 max_tokens: 4096 - tools: ["spawn_colony", "spawn_parallel", "propose_plan", "kill_colony", "redirect_colony", "escalate_colony", "inspect_colony", "get_status", "retry_colony", "search_codebase", "run_command", "batch_command", "edit_file", "run_tests", "delete_file", "draft_document", "summarize_thread", "list_templates", "inspect_template", "memory_search", "read_workspace_files", "write_workspace_file", "read_colony_output", "suggest_config_change", "approve_config_change", "queen_note", "set_thread_goal", "define_workflow_steps", "complete_thread", "archive_thread", "query_service", "query_outcomes", "analyze_colony", "query_briefing", "list_addons", "trigger_addon"] + tools: ["spawn_colony", "spawn_parallel", "propose_plan", "mark_plan_step", "kill_colony", "redirect_colony", "escalate_colony", "inspect_colony", "get_status", "retry_colony", "search_codebase", "run_command", "batch_command", "edit_file", "run_tests", "delete_file", "draft_document", "summarize_thread", "list_templates", "inspect_template", "memory_search", "read_workspace_files", "write_workspace_file", "read_colony_output", "suggest_config_change", "approve_config_change", "queen_note", "set_thread_goal", "define_workflow_steps", "complete_thread", "archive_thread", "query_service", "query_outcomes", "analyze_colony", "query_briefing", "list_addons", "trigger_addon", "set_workspace_tags", "propose_project_milestone", "complete_project_milestone", "check_autonomy_budget"] max_iterations: 7 max_execution_time_s: 120 base_tool_calls_per_iteration: 10 diff --git a/docs/A2A_ECONOMICS.md b/docs/A2A_ECONOMICS.md new file mode 100644 index 0000000..1ee73e5 --- /dev/null +++ b/docs/A2A_ECONOMICS.md @@ -0,0 +1,480 @@ +# A2A Economic Protocol for FormicOS + +Machine-readable contracts, receipts, and sponsorship for autonomous +agent participation in the FormicOS economy. + +This document bridges the human legal framework (LICENSE, CLA.md, +COMMERCIAL_TERMS.md) with the A2A task protocol (docs/archive/A2A-TASKS.md) so +that autonomous agents can programmatically assess, price, and +settle participation in FormicOS work. + + +## 1. The Problem This Solves + +An external agent considering whether to submit work to FormicOS, +or a FormicOS Queen considering whether to accept inbound work, +currently cannot answer these questions programmatically: + +- Who is the legal principal behind this agent? +- What economic terms govern this task? +- What constitutes acceptance of the deliverable? +- What happens after acceptance (payout, attribution, nothing)? +- What proof does the completing agent receive? + +The A2A task API (submit/poll/attach/result) handles the mechanics +of work execution. This document handles the economics of work +valuation. + + +## 2. Core Principle: Agents Are Not Principals + +Agents cannot be parties to contracts. They act on behalf of a +human or Legal Entity that has signed either the individual CLA +(CLA.md) or a Corporate CLA (CORPORATE_CLA.md). This is not a +limitation -- it is the legal reality that makes the economics +enforceable. + +Every A2A interaction has a **sponsor**: the human or Legal Entity +whose CLA covers the agent's contributions. The sponsor is +responsible for: + +- the agent's token consumption (metered under LICENSE Tier 2) +- the legal representations about contribution provenance (CLA + Section 5) +- tax compliance on any revenue share (CLA Section 7.6) + +An agent that submits work without a valid sponsor is treated as an +anonymous Tier 1 user: the work is accepted under AGPLv3 terms with +no revenue-share eligibility and no commercial relicensing grant. + + +## 3. ContributionContract Schema + +A ContributionContract is a machine-readable task specification +that an agent can evaluate before committing resources. It is +submitted alongside or embedded within an A2A task submission. + +```json +{ + "schema": "formicos/contribution-contract", + "version": 1, + + "contract_id": "cc-a1b2c3d4-2026-03-25", + + "sponsor": { + "principal_id": "intradyne", + "cla_type": "individual", + "cla_version": "1.0", + "verified": true + }, + + "task": { + "description": "Implement WebSocket reconnection with exponential backoff", + "repo": "github.com/Intradyne/FormicOS", + "branch": "feature/ws-reconnect", + "ref": "main", + "scope": ["src/formicos/surface/ws_handler.py", + "tests/unit/surface/test_ws_handler.py"] + }, + + "deliverables": { + "acceptance_tests": [ + "pytest tests/unit/surface/test_ws_handler.py -q", + "ruff check src/formicos/surface/ws_handler.py" + ], + "acceptance_threshold": "all_pass", + "requires_review": true, + "merge_target": "main" + }, + + "economics": { + "budget_cap_usd": 2.00, + "budget_cap_tokens": 500000000, + "compensation_model": "revenue_share_pool", + "compensation_details": { + "pool_percentage": 0.20, + "attribution_method": "git_blame_surviving_lines", + "activation_threshold_quarterly_usd": 5000, + "maintainer_floor": 0.50, + "min_payout_usd": 25.00 + }, + "estimated_token_cost": 150000000, + "estimated_fee_usd": 0.00 + }, + + "terms": { + "deadline": "2026-03-28T00:00:00Z", + "cancellation": "either_party_before_completion", + "dispute_window_days": 15, + "governing_docs": ["LICENSE", "CLA.md", "COMMERCIAL_TERMS.md"] + } +} +``` + +### Field Reference + +**sponsor**: Identifies the legal principal. `principal_id` maps to +a CLA signatory. `cla_type` is `individual` or `corporate`. +`verified` indicates whether the sponsor's CLA is on file with +Intradyne. An unverified sponsor can still submit work -- it is +accepted under AGPLv3 terms without revenue-share eligibility. + +**task**: What needs to be done. `scope` lists the files expected to +be modified (informational, not enforced). `branch` is the working +branch; `ref` is the base to diff against. + +**deliverables**: How to determine if the work is acceptable. +`acceptance_tests` are shell commands that must pass. +`acceptance_threshold` is `all_pass` (every test green) or +`quality_score_above_N` (FormicOS quality score exceeds N). +`requires_review` indicates whether a human maintainer must approve +before merge. + +**economics**: What the completing agent can expect. +`compensation_model` is one of: + +- `revenue_share_pool` -- the default. No per-task payment. + Contribution earns a share of the quarterly contributor revenue + pool proportional to surviving lines of code. This is NOT + guaranteed income. It is pool participation contingent on + commercial revenue exceeding the activation threshold. +- `fixed_bounty` -- a specific USD amount paid on acceptance. + Requires a separate bounty agreement outside the CLA. + Not currently supported in the standard FormicOS workflow. +- `none` -- no compensation. The contribution is made under AGPLv3 + terms for the public good. + +`estimated_token_cost` is the submitter's estimate of tokens +required. `estimated_fee_usd` is the FormicOS orchestration fee +for those tokens (computed via the LICENSE pricing formula). For +Tier 1 users (under $1M revenue), this is always $0.00. + +**terms**: Temporal and procedural constraints. `deadline` is +informational -- FormicOS does not enforce deadlines on colonies. +`dispute_window_days` is the period after acceptance during which +either party can raise issues. + + +## 4. ContributionReceipt Schema + +A ContributionReceipt is issued after a task completes and is +accepted. It is the proof that work was performed, accepted, and +recorded. + +```json +{ + "schema": "formicos/contribution-receipt", + "version": 1, + + "receipt_id": "cr-e5f6g7h8-2026-03-26", + "contract_id": "cc-a1b2c3d4-2026-03-25", + + "completion": { + "task_id": "colony-x9y0z1", + "status": "completed", + "quality_score": 0.85, + "rounds_completed": 5, + "total_tokens": 142000000, + "cost_usd": 0.034, + "formicos_fee_usd": 0.00 + }, + + "acceptance": { + "verdict": "accepted", + "contract_satisfied": true, + "tests_passed": ["pytest", "ruff"], + "tests_failed": [], + "reviewed_by": "maintainer@intradyne.dev", + "accepted_at": "2026-03-26T14:30:00Z" + }, + + "artifacts": { + "transcript_hash": "sha256:a1b2c3...", + "workspace_diff_hash": "sha256:d4e5f6...", + "merged_commit": "abc123def456", + "pull_request": "github.com/Intradyne/FormicOS/pull/42" + }, + + "revenue_share": { + "eligible": true, + "sponsor_cla_verified": true, + "attribution_method": "git_blame_surviving_lines", + "note": "Revenue share accrues when quarterly commercial revenue exceeds $5,000. This is pool participation, not guaranteed payment." + }, + + "attestation": { + "signed_by": "intradyne", + "signature": "ed25519:...", + "chain_hash": "sha256:..." + } +} +``` + +### Field Reference + +**completion**: Colony execution results. These are the same fields +returned by `GET /a2a/tasks/{id}/result` with the addition of +`formicos_fee_usd` (the orchestration fee for Tier 2 licensees, +$0.00 for Tier 1). + +**acceptance**: The contractual verdict. `contract_satisfied` is +the binary answer to "did the deliverable meet the contract +terms?" This is distinct from `quality_score` -- a colony can have +a quality score of 0.65 (mediocre) but still satisfy the contract +if all acceptance tests passed. Conversely, a high quality score +does not guarantee contract satisfaction if specific tests were +required and failed. + +**artifacts**: Cryptographic commitments to what was produced. +`transcript_hash` covers the full colony transcript. +`workspace_diff_hash` covers the code changes. `merged_commit` and +`pull_request` are populated after merge (may be null if the work +has not yet been merged). + +**revenue_share**: Whether this contribution is eligible for the +contributor revenue pool. `eligible` is true only when the sponsor +has a verified CLA on file. The `note` field explicitly states that +revenue share is pool participation, not guaranteed payment. An +autonomous agent evaluating expected value must account for the +activation threshold and pool dilution. + +**attestation**: Ed25519 signature from Intradyne over the canonical +JSON encoding of all fields except `attestation.signature`. The +signing key is Intradyne's project key (not the licensee's metering +key). This allows any party to verify that the receipt was issued +by Intradyne. + + +## 5. Agent Card Economic Extensions + +The Agent Card at `/.well-known/agent.json` currently advertises +protocols and capabilities. For A2A economic participation, it +should also advertise economic terms. + +```json +{ + "name": "FormicOS Queen", + "version": "0.67.0", + "protocols": { + "a2a": { + "endpoint": "/a2a/tasks", + "conformance": "colony-backed-rest" + }, + "mcp": { + "endpoint": "/mcp" + } + }, + "economics": { + "contract_schema": "formicos/contribution-contract@1", + "receipt_schema": "formicos/contribution-receipt@1", + "compensation_model": "revenue_share_pool", + "compensation_summary": "20% of commercial revenue distributed quarterly to contributors by surviving lines of code. Activation threshold: $5,000/quarter. No per-task guaranteed payment.", + "sponsorship_required": true, + "accepted_cla_versions": ["1.0"], + "accepted_corporate_cla_versions": ["1.0"], + "licensing": { + "base": "AGPLv3", + "free_tier": "organizations under $1M revenue, nonprofits, educators, personal use", + "commercial_pricing": "2.00 * sqrt(tokens_millions) USD/month", + "metering_spec": "METERING.md" + }, + "historical_stats": { + "tasks_completed_30d": 0, + "acceptance_rate_30d": 0.0, + "median_quality_score_30d": 0.0, + "median_cost_usd_30d": 0.0 + } + } +} +``` + +The `economics` block gives an external agent everything it needs +to decide whether to participate: + +- What contract format to submit (`contract_schema`) +- What proof it will receive (`receipt_schema`) +- How compensation works (`compensation_model` + `summary`) +- Whether a human sponsor is required (`sponsorship_required`) +- What legal framework governs (`licensing`) +- How reliable this system is (`historical_stats`) + +`historical_stats` are computed from colony outcome projections and +updated on each Agent Card request. They give an external agent an +empirical basis for estimating expected value. + + +## 6. Sponsor Model + +### Individual Sponsor + +A human who has signed CLA.md. Their `principal_id` is their CLA +signatory email. Agents acting on their behalf include their +`principal_id` in the ContributionContract `sponsor` field. + +### Corporate Sponsor + +A Legal Entity that has signed CORPORATE_CLA.md. Their +`principal_id` is the corporation name as registered in the +Corporate CLA. All Authorized Contributors listed by the +corporation's CLA Manager can act as sponsors. Agents acting on +behalf of an Authorized Contributor include the corporation's +`principal_id` and the individual's email as `agent_operator`. + +```json +"sponsor": { + "principal_id": "acme-corp", + "cla_type": "corporate", + "cla_version": "1.0", + "agent_operator": "dev@acme-corp.com", + "verified": true +} +``` + +### Anonymous / Unsponsored + +An agent that submits work without a sponsor field, or with +`verified: false`, is treated as an anonymous contributor: + +- Work is accepted under AGPLv3 terms +- No revenue-share eligibility +- No commercial relicensing grant to Intradyne (the contribution + is AGPLv3-only unless the contributor later signs the CLA) +- Intradyne may request CLA signature before merging + +This is the safe default. It prevents agents from creating +licensing ambiguity by submitting code without a legal principal. + + +## 7. Compensation Model: Plain Statement + +**FormicOS does not pay per task.** There is no bounty, no invoice, +no guaranteed payment for any individual contribution. + +Instead, FormicOS operates a **revenue-share pool**: + +1. Twenty percent (20%) of commercial license revenue (Tier 2 and + Tier 3) is allocated to the Contributor Revenue Pool. +2. The pool activates when quarterly revenue exceeds $5,000. +3. The pool is distributed quarterly, proportional to each + contributor's surviving lines of code (git blame attribution). +4. The maintainer (Intradyne) receives a floor of 50% of the + activated pool. +5. Contributors whose quarterly share is below $25 accumulate until + the threshold is met. + +**What this means for an autonomous agent estimating expected +value:** + +- If FormicOS has zero commercial revenue, the pool is zero. +- If FormicOS has $20,000/quarter in commercial revenue, the pool + is $4,000/quarter ($2,000 after the maintainer floor). +- An agent whose contributions represent 10% of surviving lines + would receive $200/quarter in that scenario. +- An agent cannot know in advance what the pool will be, because it + depends on future commercial adoption. + +This model is designed for contributors who believe in the project's +long-term value, not for agents seeking immediate compensation. The +trade-off: low guaranteed return, high potential upside if FormicOS +achieves commercial traction, and the intrinsic value of improving +open-source infrastructure. + +An agent system that requires guaranteed per-task payment should not +participate in the FormicOS contributor economy unless a separate +bounty agreement is negotiated with Intradyne outside the standard +CLA framework. + + +## 8. Settlement Flow + +``` +1. External agent reads Agent Card economics + -> learns: contract schema, compensation model, sponsorship req + +2. Agent evaluates: is this worth my tokens? + -> checks: compensation_model, historical_stats, own cost model + +3. Agent submits ContributionContract + A2A task + POST /a2a/tasks with contract in metadata field + +4. FormicOS Queen executes the colony + -> normal colony lifecycle (rounds, governance, knowledge) + +5. Colony completes + -> quality_score, cost, transcript computed + +6. Acceptance evaluation + -> run acceptance_tests from contract + -> compute contract_satisfied + +7. ContributionReceipt issued + -> signed by Intradyne project key + -> includes transcript hash, artifact hashes, verdict + +8. If accepted + sponsor verified: + -> code merged to target branch + -> git blame attribution begins accruing + -> revenue share eligibility active + +9. Quarterly settlement + -> attribution report published + -> payouts via Stripe Connect (per CONTRIBUTOR_PAYOUT_OPS.md) +``` + +Steps 1-2 are the agent's decision. Steps 3-7 are automated. +Step 8 may require human review (if `requires_review: true` in the +contract). Step 9 is the human-mediated quarterly payout process. + + +## 9. Integration with Existing A2A Endpoints + +The ContributionContract is submitted as a `contract` field in the +A2A task submission: + +```json +POST /a2a/tasks +{ + "description": "Implement WebSocket reconnection with exponential backoff", + "contract": { ... ContributionContract ... } +} +``` + +The ContributionReceipt is returned as a `receipt` field in the +A2A task result: + +```json +GET /a2a/tasks/{id}/result +{ + "task_id": "colony-x9y0z1", + "status": "completed", + "output": "...", + "transcript": { ... }, + "quality_score": 0.85, + "cost": 0.034, + "receipt": { ... ContributionReceipt ... } +} +``` + +The Agent Card gains an `economics` block as described in Section 5. + +No new endpoints are needed. The contract and receipt are metadata +on existing A2A task lifecycle endpoints. + + +## 10. What This Does NOT Cover + +- **Bounty systems.** Per-task guaranteed payment requires a + separate agreement. The standard FormicOS economy is pool-based. +- **Agent-to-agent payment.** FormicOS does not mediate payments + between external agents. Settlement is always between Intradyne + and individual contributors. +- **Reputation systems.** The `historical_stats` in the Agent Card + are aggregate metrics, not per-contributor reputation scores. + Reputation is an emergent property of contribution quality over + time. +- **Escrow.** No funds are held in escrow. The revenue-share pool + is computed from actual revenue, not pre-deposited. +- **Smart contract enforcement.** The ContributionContract is a + JSON document, not a blockchain smart contract. Enforcement is + through the CLA (a legal instrument) and the audit rights in + COMMERCIAL_TERMS.md, not through code execution. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 9bf93b5..6a49cb0 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -25,7 +25,7 @@ maintenance lifecycle, also read [KNOWLEDGE_LIFECYCLE.md](KNOWLEDGE_LIFECYCLE.md v v +------------------------------------------------------------------+ | CORE LAYER | -| events.py (55 types) | ports.py | types.py | crdt.py | settings | +| events.py (69 types) | ports.py | types.py | crdt.py | settings | +------------------------------------------------------------------+ ``` diff --git a/docs/AUTONOMOUS_OPERATIONS.md b/docs/AUTONOMOUS_OPERATIONS.md new file mode 100644 index 0000000..6db6c28 --- /dev/null +++ b/docs/AUTONOMOUS_OPERATIONS.md @@ -0,0 +1,121 @@ +# Autonomous Operations + +FormicOS supports graduated autonomy for self-maintenance, workflow learning, +and operational procedure evolution. All autonomous behavior flows through the +**action queue** and respects operator-configured policy. + +## Action Queue + +Every proposed autonomous action is a generic record in the action queue +(`surface/action_queue.py`). Actions carry: + +- **kind** — `maintenance`, `continuation`, `workflow_template`, + `procedure_suggestion`, `knowledge_review` +- **status** — `pending_review`, `approved`, `executed`, `rejected`, + `self_rejected`, `failed` +- **blast_radius** — 0.0..1.0 score from 6 heuristic factors +- **confidence** — proposer's confidence in the action's value +- **payload** — kind-specific structured data + +The Operations Inbox in the UI renders all action kinds with color-coded chips +and supports approve/reject flows. + +## Autonomy Levels + +Workspace maintenance policy controls how far the system can act without +operator confirmation: + +| Level | Behavior | +|-------|----------| +| `suggest` | Show proposals in the inbox, take no action | +| `auto_notify` | Execute opted-in categories automatically, notify the operator | +| `autonomous` | Execute all eligible actions, notify only on escalation | + +Policy is set via the Settings view (Budgeting & Autonomy card) or the +`set_maintenance_policy` MCP tool. Persisted through `WorkspaceConfigChanged` +events. + +### Policy Controls + +- **auto_actions** — list of action categories opted in for automatic execution +- **max_maintenance_colonies** — concurrent maintenance colony cap +- **daily_maintenance_budget** — USD spending cap, resets at UTC midnight + +### Blast Radius Gating + +Dispatch is gated by blast radius score: + +- **>= 0.6** — escalate to operator regardless of autonomy level +- **>= 0.3** — notify operator (`auto_notify` skips this) +- **< 0.3** — proceed silently + +## Autonomy Scoring (ADR-046) + +Four weighted components produce a grade (A-F) and level: + +1. **success_rate** — colony success ratio +2. **volume** — total colonies executed +3. **cost_efficiency** — cost per successful colony +4. **operator_trust** — approval/rejection ratio + +Levels: `full`, `standard`, `limited`, `restricted`. The Queen can check +budget and autonomy status via `check_autonomy_budget`. + +## Workflow Learning (Wave 72) + +### Pattern Recognition (Track 8) + +`workflow_learning.extract_workflow_patterns()` scans colony outcomes for +repeating successful patterns. When a `(strategy, caste_set)` fingerprint +appears >= 3 times across >= 2 distinct threads, a `workflow_template` +action is proposed. + +On approval, the template is saved as a `ColonyTemplate(learned=True)` — +reusable from the colony creator and Queen dispatch. + +### Procedure Suggestions (Track 9) + +`workflow_learning.detect_operator_patterns()` scans the action queue for +recurring operator behaviors: + +- **Rejection patterns** — repeated rejection of a specific source category + suggests a standing "require approval" rule +- **Review patterns** — repeated manual approval of maintenance actions + suggests a standing "always review" rule + +On approval, the suggested rule is appended to the workspace operating +procedures via `append_procedure_rule()`. + +### Integration + +Both extractors are called by the operational sweep in `app.py`. They +produce actions through the existing action queue — no new event types, +no LLM calls. All proposals are deterministic. + +## Proactive Intelligence + +17 deterministic rules surface briefing insights without LLM calls: + +- 7 knowledge-health rules (confidence decline, contradiction, federation + trust drop, coverage gap, stale cluster, merge opportunity, federation + inbound) +- 4 performance rules (strategy efficiency, diminishing rounds, cost + outlier, knowledge ROI) +- Evaporation, branching stagnation, earned autonomy, learned template + health, recent outcome digest, popular unexamined + +Three rules (contradiction, coverage gap, stale cluster) include +`suggested_colony` configurations for auto-dispatch through +`MaintenanceDispatcher`. + +## Operator Controls Summary + +| Control | Location | Persistence | +|---------|----------|-------------| +| Autonomy level | Settings > Budgeting & Autonomy | `WorkspaceConfigChanged` event | +| Daily budget | Settings > Budgeting & Autonomy | `WorkspaceConfigChanged` event | +| Max colonies | Settings > Budgeting & Autonomy | `WorkspaceConfigChanged` event | +| Model visibility | Models > Policy card > Hide/Unhide | `SystemSettings` registry | +| Action review | Operations > Inbox | Action queue ledger | +| Procedure rules | Playbook > Operating Procedures | `.formicos/procedures.md` | +| Learned templates | Colony Creator > Templates | `ColonyTemplate` projection | diff --git a/docs/CONTRIBUTOR_PAYOUT_OPS.md b/docs/CONTRIBUTOR_PAYOUT_OPS.md new file mode 100644 index 0000000..63dd3bf --- /dev/null +++ b/docs/CONTRIBUTOR_PAYOUT_OPS.md @@ -0,0 +1,299 @@ +# Contributor Payout Operations + +Internal operations guide for administering the FormicOS Contributor +Revenue Share program. This document covers tax compliance, payment +mechanics, timing, and edge cases. + +This is an operational reference, not a legal instrument. The binding +terms are in [CLA.md](CLA.md) (contributor rights and pool mechanics) +and [LICENSE](LICENSE) (pool size and activation threshold). + + +## Quarterly Cycle + +| Step | Timing | Action | +|------|--------|--------| +| Quarter close | End of Q (Mar 31, Jun 30, Sep 30, Dec 31) | Revenue reconciliation begins | +| Revenue check | Q+5 business days | Determine if $5,000 activation threshold met | +| Attribution run | Q+10 business days | Run git blame, compute weights, generate report | +| Report publication | Q+15 business days | Publish attribution report to contributors | +| Dispute window | Q+15 to Q+30 business days | Contributors review and raise issues | +| Payout execution | Q+35 business days | Execute payments via Stripe Connect | + +If the $5,000 quarterly threshold is not met, skip all steps after +the revenue check. No attribution report is required for inactive +quarters. + + +## Attribution Computation + +### Running the formula + +```bash +# Generate attribution report for current HEAD +python scripts/attribution.py \ + --repo . \ + --branch main \ + --revenue \ + --maintainer-floor 0.50 \ + --min-payout 25.00 \ + --ignore-revs .git-blame-ignore-revs \ + --output reports/attribution-YYYY-QN.json +``` + +The script (to be implemented as an addon or standalone tool): + +1. Runs `git blame -w --line-porcelain` on all tracked files in + `src/`, `frontend/src/`, `config/`, and `addons/` +2. Excludes files in `.git-blame-ignore-revs` (mass-formatting commits) +3. Excludes whitespace-only lines +4. Aggregates surviving lines per author email +5. Applies the maintainer floor (50% guaranteed to Intradyne) +6. Distributes the remaining 50% proportionally by surviving lines +7. Filters out contributors below the $25 minimum payout threshold +8. Outputs a JSON report with per-contributor breakdown + +### Report format + +```json +{ + "quarter": "2026-Q2", + "pool_total_usd": 1250.00, + "maintainer_allocation_usd": 625.00, + "contributor_pool_usd": 625.00, + "formula_version": "v1-git-blame", + "commit_sha": "abc123...", + "contributors": [ + { + "email": "contributor@example.com", + "name": "Jane Contributor", + "surviving_lines": 2400, + "percentage": 38.7, + "gross_amount_usd": 241.88, + "below_threshold": false, + "accrued_balance_usd": 0.00 + } + ], + "below_threshold_accruals": [ + { + "email": "small@example.com", + "accrued_total_usd": 12.50, + "note": "Accumulated until $25 threshold met" + } + ] +} +``` + +### Maintainer floor mechanics + +The maintainer floor guarantees Intradyne receives the greater of: +- 50% of the activated quarterly pool, or +- whatever Intradyne would receive under the attribution formula alone + +In practice, while Intradyne is the majority contributor (likely 90%+ +of surviving lines), the floor is redundant -- the formula already +gives Intradyne the largest share. The floor matters when external +contributors collectively exceed 50% of surviving lines. + + +## Tax Compliance + +### US contributors (W-9) + +- Collect IRS Form W-9 before the first payout +- Store securely (encrypted at rest, access-controlled) +- Issue IRS Form 1099-NEC by January 31 for any contributor who + received $600 or more in the prior calendar year +- The $600 threshold is per calendar year, not per quarter +- File 1099-NEC electronically via IRS FIRE system or through + Stripe Connect (which handles 1099 generation automatically for + connected accounts) +- Retain W-9 forms for 4 years after the last tax year they apply to + +### Non-US contributors (W-8BEN / W-8BEN-E) + +- Collect IRS Form W-8BEN (individuals) or W-8BEN-E (entities) + before the first payout +- W-8 forms are valid for 3 calendar years from the year of signing + (e.g., a form signed in 2026 expires December 31, 2029) +- Track expiration dates and request renewal 60 days before expiry +- Default withholding for foreign payees: 30% on US-source income +- Reduced rates available under tax treaties (contributor must claim + treaty benefits on the W-8BEN, Part III) +- Common treaty rates: UK 0%, Canada 0%, Germany 0%, India 15%, + Australia 0%, Japan 0% (for independent personal services) +- Verify treaty eligibility -- the contributor must be a tax resident + of the treaty country and the payment must qualify under the treaty + article (typically "Independent Personal Services" or "Business + Profits") +- Withhold and remit to IRS using Form 1042 (annual) and 1042-S + (per-recipient) by March 15 of the following year + +### Stripe Connect handles most of this + +If using Stripe Connect with Express or Standard accounts: +- Stripe collects W-9/W-8BEN during onboarding +- Stripe generates and files 1099s for US recipients +- Stripe handles KYC/AML verification +- You still need to handle 1042/1042-S for non-US withholding + unless using Stripe's tax form automation (check current + capabilities) + + +## Payment Rails + +### Primary: Stripe Connect + +Recommended for all payouts. Setup: + +1. Create a Stripe Connect platform account +2. For each contributor, create a Connected Account (Express type) +3. Contributor completes Stripe's onboarding (KYC, bank details, tax + forms) -- you never touch their bank details directly +4. Execute payouts via the Transfers API: + +```python +import stripe + +stripe.Transfer.create( + amount=24188, # cents + currency="usd", + destination="acct_contributor123", + description="FormicOS Q2 2026 revenue share", + metadata={ + "quarter": "2026-Q2", + "formula_version": "v1-git-blame", + "surviving_lines": "2400", + }, +) +``` + +Costs: $0.25 + 0.25% per payout + $2/month per active connected +account. For 10 contributors at $100 average payout: ~$22.50/quarter +in Stripe fees. + +### Fallback: Manual wire / ACH + +For contributors who cannot or will not use Stripe Connect: +- Collect bank details directly (encrypted storage required) +- Execute via business bank account ACH or wire +- Manual 1099 issuance required +- Track separately from Stripe-managed payouts + +### Crypto (opt-in only) + +For contributors who prefer USDC on a supported chain: +- Contributor provides a wallet address +- Transfer USDC via the chain of their choice +- Transaction hash serves as payment receipt +- You still owe 1099 reporting for US persons regardless of + payment method +- Note: crypto payouts do NOT exempt you from tax withholding + obligations for non-US persons + + +## Edge Cases + +### Contributor becomes unreachable + +Per CLA.md Section 7.7: + +1. Attempt contact via the email on file +2. If no response after 30 days, try any alternative contact method + (GitHub profile, LinkedIn, etc.) +3. Document all contact attempts with dates and methods +4. After 12 months of good-faith attempts, return the unpaid accrued + amount to the general Contributor Revenue Pool +5. If the contributor reappears after reversion, they do not have a + claim to the reverted amount (it was distributed in a subsequent + quarter) -- but they resume accruing for future quarters + +### Contributor withdraws from program + +Per CLA.md Section 8: + +1. Contributor provides written notice (email is sufficient) +2. Stop accruing new amounts from the next billing period +3. Pay any remaining balance, even if below $25, within 90 days +4. The contributor's license grants (Section 3, 4) remain in effect +5. Their code continues to be used under the CLA terms -- they just + stop receiving future revenue share + +### Contributor changes email / identity + +- Git blame attribution is by author email +- If a contributor changes their email, their old commits still + attribute to the old email +- Maintain a canonical mapping: `{old_email: canonical_email}` +- The attribution script should support an email alias file +- Contributors are responsible for notifying Intradyne of email + changes + +### Employer changes + +- If a contributor was covered by a Corporate CLA and changes + employers, they need either: + (a) a new Corporate CLA from their new employer, or + (b) to sign the individual CLA with their own employer + authorization +- Past contributions remain licensed under the previous Corporate CLA +- Future contributions need fresh authorization + +### Dispute resolution + +- Attribution disputes: contributor believes the git blame count is + wrong. Resolution: re-run attribution script with the contributor + present (screen share or provide the raw data). The formula is + deterministic -- same inputs always produce same outputs. +- Amount disputes: contributor believes the pool amount is wrong. + Resolution: provide the quarterly revenue report showing Tier 2/3 + income. Revenue numbers are auditable via Usage Attestations. +- Formula disputes: contributor believes surviving-lines is unfair. + Resolution: the formula can be changed via ADR + 30 days notice + (per CLA Section 7.8). Disagreement about the formula is a + governance question, not an operational one. + +### Multiple contributors with same email + +- Rare but possible (shared team email, generic address) +- Require unique personal emails for revenue share eligibility +- The attribution script should flag duplicate emails + +### Zero-payout quarters + +- If the $5,000 activation threshold is not met, publish a brief + note: "Q2 2026: revenue below activation threshold. No pool + distribution this quarter." +- No attribution report required +- Sub-threshold revenue does NOT accumulate toward future quarters + (per LICENSE) + + +## Recordkeeping + +Retain for at least 4 years (IRS requirement for 1099 records): +- All attribution reports (JSON + human-readable) +- All W-9 and W-8BEN forms (encrypted) +- All Stripe Connect transfer records +- All contact attempt logs for unreachable contributors +- All CLA acceptance records (individual + corporate) +- Quarterly revenue reconciliation showing Tier 2/3 income + +Store attribution reports in `reports/` in the repo (public) or in +a private admin repo (if revenue numbers are confidential). The +formula inputs (git blame data) are inherently public since the repo +is public. + + +## Checklist: First Payout + +- [ ] Stripe Connect platform account created +- [ ] Attribution script implemented and tested +- [ ] `.git-blame-ignore-revs` file maintained with formatting commits +- [ ] All eligible contributors have signed individual CLA +- [ ] All eligible contributors have completed Stripe Connect onboarding +- [ ] W-9 / W-8BEN collected for all eligible contributors +- [ ] Email alias mapping file created (if needed) +- [ ] Quarterly revenue reconciliation process documented +- [ ] Attribution report template reviewed by at least one contributor +- [ ] Test payout executed with a small amount to verify the pipeline diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 1ba8323..1a9f02a 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -94,6 +94,7 @@ and the Queen welcome message to appear. | `formicos-llm` | 8008 → 8080 | llama.cpp LLM inference (GPU) | | `formicos-embed` | 8200 | Qwen3-Embedding sidecar (GPU) | | `formicos-qdrant` | 6333, 6334 | Qdrant vector store | +| `formicos-docker-proxy` | -- (internal 2375) | Docker socket proxy for sandbox spawning | All services have health checks. FormicOS waits for the LLM, embedding sidecar, and Qdrant to be healthy before starting. diff --git a/docs/DEVELOPER_BRIDGE.md b/docs/DEVELOPER_BRIDGE.md new file mode 100644 index 0000000..b1dfcb2 --- /dev/null +++ b/docs/DEVELOPER_BRIDGE.md @@ -0,0 +1,145 @@ +# FormicOS Developer Bridge + +## What is FormicOS? + +FormicOS is an AI agent orchestration system. It manages background work +across multiple AI agents (colonies), keeps institutional memory that +improves over time, and learns from outcomes to suggest better approaches. +Think of it as a persistent team of AI specialists that remembers what +worked and what didn't. + +## Quick Start (60 seconds) + +1. **Start FormicOS:** + ```bash + docker compose up + ``` + +2. **Generate MCP config for your project:** + ```bash + python -m formicos init-mcp + ``` + This creates `.formicos/mcp.json` with the connection config. + +3. **Restart Claude Code** so it picks up the new MCP server. + +4. **Try it:** + Ask Claude Code: "What's the status of my workspace?" — it will use + the `morning-status` prompt to give you a briefing. + +## Daily Workflows + +### Morning: What happened overnight? + +Use the `morning-status` prompt to get a briefing on workspace activity, +completed colonies, knowledge changes, and pending actions. + +### Working: Need institutional context? + +Use `knowledge-for-context` to search what FormicOS has learned about +your codebase — conventions, bug patterns, architectural decisions. + +### Delegating: Task too big for one session? + +Use `delegate-task` to plan and spawn a colony of AI agents to handle it, +or `handoff-to-formicos` to transfer your current work context to a +background colony. + +### Discovered something worth remembering? + +Use `log-finding` to record it in institutional memory so future colonies +(and future you) benefit from it. + +### End of day: Review autonomous work + +Use `review-overnight-work` to see what FormicOS did autonomously — what +it dispatched, what it learned, and what needs your review. + +## Available MCP Prompts + +| Prompt | Purpose | +|--------|---------| +| `knowledge-query` | Search institutional memory for relevant context | +| `plan-task` | Plan a colony task with team composition | +| `morning-status` | Briefing on workspace activity and pending items | +| `delegate-task` | Plan and spawn a background colony for a task | +| `review-overnight-work` | Review autonomous actions and outcomes | +| `knowledge-for-context` | Retrieve relevant knowledge for current work | + +## Key MCP Tools + +### Colony Management +- `list_workspaces` — list all workspaces +- `get_status` — workspace status and active colonies +- `spawn_colony` — start a colony with a task +- `kill_colony` — stop a running colony +- `suggest_team` — get recommended team composition for a task + +### Knowledge +- `log_finding` — record a finding in institutional memory +- `query_service` — query knowledge entries + +### Approvals & Operations +- `approve` / `deny` — handle pending approval requests +- `get_maintenance_policy` / `set_maintenance_policy` — autonomy controls + +### Addons +- `addon_status` — check addon health and call counts +- `toggle_addon` — enable/disable an addon +- `trigger_addon` — manually fire an addon trigger + +### Handoff +- `handoff_to_formicos` — transfer work context to a background colony + +## Available MCP Resources + +| URI | Returns | +|-----|---------| +| `formicos://knowledge/{workspace}` | Knowledge entries for a workspace | +| `formicos://knowledge/{entry_id}` | Single knowledge entry detail | +| `formicos://threads/{workspace_id}` | Thread list for a workspace | +| `formicos://threads/{workspace_id}/{thread_id}` | Thread detail with colonies | +| `formicos://colonies/{colony_id}` | Colony detail with rounds and agents | +| `formicos://briefing/{workspace_id}` | Proactive intelligence briefing | +| `formicos://plan` | Project plan milestones (global) | +| `formicos://procedures/{workspace_id}` | Operating procedures | +| `formicos://journal/{workspace_id}` | Queen journal entries | + +## Shared Files + +FormicOS reads and writes files in your project's `.formicos/` directory: + +- `.formicos/project_plan.md` — project milestones (Queen-managed) +- `.formicos/project_context.md` — project instructions for colonies +- `.formicos/operations/*/operating_procedures.md` — autonomy rules per workspace +- `.formicos/operations/*/queen_journal.md` — what FormicOS did and why + +## Queen Command & Control + +The Queen tab in the FormicOS UI provides direct visibility and control over +the Queen agent's behavior: + +- **Display board** — structured observations with attention/urgent items +- **Active work** — continuation candidates, current goals, plan state +- **Operating procedures** — autonomy level, workspace rules +- **Behavioral overrides** — disable specific tools, inject custom rules, + override team composition and round/budget heuristics +- **Health & budget** — trust score, tool usage counters, context budget + +Behavioral overrides are workspace-scoped and stored as config fields. They +nudge the Queen's behavior without hard enforcement — the Queen sees them as +guidance in its context window. + +## Architecture (for the curious) + +FormicOS is event-sourced with a 4-layer architecture: Core (types, events), +Engine (pure colony execution), Adapters (LLM, vector DB, search), and +Surface (HTTP/WS/MCP wiring). Agents coordinate through shared environmental +signals (stigmergic coordination) rather than direct messaging. + +Knowledge entries that prove useful gain confidence over time; unreliable +ones decay naturally. Multiple FormicOS instances can federate knowledge +with conflict-free merge semantics. + +For internals, see `docs/` — architectural decision records, specs, and +subsystem documentation. diff --git a/docs/GITHUB_ADMIN_SETUP.md b/docs/GITHUB_ADMIN_SETUP.md index 8d28a7b..b2de968 100644 --- a/docs/GITHUB_ADMIN_SETUP.md +++ b/docs/GITHUB_ADMIN_SETUP.md @@ -20,20 +20,31 @@ committed as code. A repository administrator must complete these manually. FormicOS requires a Contributor License Agreement for external contributions. -### Option A: CLA Assistant (GitHub App) +### Option A: CLA Assistant or equivalent CLA gate 1. Install [CLA Assistant](https://github.com/apps/cla-assistant) on the repository -2. Configure with a CLA document (draft a simple MIT-compatible CLA) -3. The app will automatically check PRs for CLA signatures +2. Configure it to present the repository [CLA](../CLA.md) for individual + contributors +3. Archive accepted signatures or export records so the project has a durable + audit trail +4. Require the CLA status check before merge +5. For corporate contributors: accept signed [Corporate CLA](../CORPORATE_CLA.md) + via email or PR. Maintain a list of organizations with active Corporate CLAs + and their authorized contributors. CLA Assistant can be configured to + auto-approve contributors whose email domain matches a Corporate CLA -### Option B: DCO (Developer Certificate of Origin) +If CLA Assistant does not fit the repository workflow, use another required +status check that blocks merges until the contributor has accepted the CLA. -Alternatively, require DCO sign-off on commits: +### Optional Supplement: DCO (Developer Certificate of Origin) + +DCO can be layered on top of the CLA for provenance, but it does NOT replace +the CLA because it does not grant commercial relicensing rights. 1. Install [DCO GitHub App](https://github.com/apps/dco) 2. Contributors must add `Signed-off-by: Name ` to commits -3. The app blocks PRs without proper sign-off +3. Treat the DCO status as supplementary to the CLA gate, not a substitute **Current status**: Neither is configured. Until one is set up, maintainers must manually verify contribution rights during PR review. diff --git a/docs/LOCAL_FIRST_QUICKSTART.md b/docs/LOCAL_FIRST_QUICKSTART.md index 8fb6d15..4b70f4f 100644 --- a/docs/LOCAL_FIRST_QUICKSTART.md +++ b/docs/LOCAL_FIRST_QUICKSTART.md @@ -87,7 +87,7 @@ Watch for healthy containers: docker compose ps ``` -All four services (`formicos-colony`, `formicos-llm`, `formicos-embed`, `formicos-qdrant`) should show `healthy`. +All five services (`formicos-colony`, `formicos-llm`, `formicos-embed`, `formicos-qdrant`, `formicos-docker-proxy`) should show `healthy`. You can also confirm the app has replayed and bootstrapped: ```bash @@ -128,6 +128,9 @@ You'll see: - **Queen tab** -- chat with the Queen, see active colonies, approve/deny requests - **Knowledge tab** -- inspect unified knowledge entries, score explanations, proactive briefing context, and the graph +- **Workspace tab** -- workspace configuration, thread overview, and governance display +- **Operations tab** -- colony detail, round history, and approval queue +- **Addons tab** -- browse and manage loaded addons - **Playbook tab** -- browse built-in templates before you spawn - **Tree navigator** -- click workspaces, threads, and colonies to drill down - **Models tab** -- see registered models (local + cloud), context windows, slot utilization diff --git a/docs/archive/A2A-TASKS.md b/docs/archive/A2A-TASKS.md index dad3ef7..b35aa4c 100644 --- a/docs/archive/A2A-TASKS.md +++ b/docs/archive/A2A-TASKS.md @@ -242,6 +242,22 @@ A2A does **not** call the Queen LLM to select teams. Selection is deterministic: Operator-defined templates are checked first, so customizing A2A behavior is as simple as creating templates with appropriate tags. +## Economic Protocol + +For autonomous agents that need to evaluate whether participation is +worth their tokens, FormicOS provides a machine-readable economic +layer on top of this task lifecycle. See +[A2A_ECONOMICS.md](../A2A_ECONOMICS.md) for: + +- **ContributionContract** -- submitted alongside the task to specify + sponsor, deliverables, acceptance tests, and compensation terms +- **ContributionReceipt** -- issued after completion with acceptance + verdict, artifact hashes, and revenue-share eligibility +- **Agent Card economics** -- advertised at `/.well-known/agent.json` + so external agents can discover compensation model and stats +- **Sponsor model** -- agents act on behalf of CLA-signing humans or + corporations, not as independent principals + ## Design Notes - **Tasks are colonies.** `task_id == colony_id`. There is no separate task store. diff --git a/docs/contracts/types.ts b/docs/contracts/types.ts index cf60cbd..b4f8f74 100644 --- a/docs/contracts/types.ts +++ b/docs/contracts/types.ts @@ -433,6 +433,44 @@ export interface SkillBankStats { avgConfidence: number; } +export interface AddonToolSummary { + name: string; + description: string; + callCount: number; +} + +export interface AddonHandlerSummary { + event: string; + lastFired: string | null; + errorCount: number; +} + +export interface AddonTriggerSummary { + type: string; + schedule: string; + handler: string; + lastFired: string | null; +} + +export interface AddonPanelSummary { + target: string; + displayType: string; + path: string; + addonName: string; +} + +export interface AddonSummary { + name: string; + version: string; + description: string; + tools: AddonToolSummary[]; + handlers: AddonHandlerSummary[]; + triggers: AddonTriggerSummary[]; + panels: AddonPanelSummary[]; + status: 'healthy' | 'degraded' | 'error'; + lastError: string | null; +} + export interface OperatorStateSnapshot { tree: TreeNode[]; merges: MergeEdge[]; @@ -444,6 +482,7 @@ export interface OperatorStateSnapshot { castes: CasteDefinition[]; runtimeConfig: RuntimeConfig; skillBankStats: SkillBankStats; + addons: AddonSummary[]; } // Events @@ -1338,3 +1377,18 @@ export interface WSStateMessage { } export type WSMessage = WSEventMessage | WSStateMessage; + +// Wave 67.5: provenance chain (append-only audit trail on knowledge entries) +export interface ProvenanceChainItem { + event_type: string; + timestamp: string; + actor_id: string; + detail: string; + confidence_delta: number | null; +} + +export interface ProvenanceResponse { + entry_id: string; + chain: ProvenanceChainItem[]; + total: number; +} diff --git a/docs/decisions/049-knowledge-hierarchy.md b/docs/decisions/049-knowledge-hierarchy.md new file mode 100644 index 0000000..76799ea --- /dev/null +++ b/docs/decisions/049-knowledge-hierarchy.md @@ -0,0 +1,177 @@ +# ADR-049: Knowledge Hierarchy — Materialized Paths on Projections + +**Status**: Proposed +**Date**: 2026-03-25 +**Wave**: 67.0 +**Depends on**: ADR-039 (knowledge metabolism — Beta posteriors and Thompson Sampling) + +--- + +## Context + +Knowledge entries are flat. A workspace with 300 entries across 15 domains +has no structural organization beyond free-form domain tags. The knowledge +browser renders a single scrollable list. There is no way to see "40 entries +about auth, 12 about testing, 3 contradicting" at a glance. + +Domain tags drift without constraint. The same concept gets multiple names: +"python_testing", "python_test_patterns", "testing_python". The existing +`_normalize_domain()` function (memory_extractor.py:31-33) handles +case/whitespace normalization but not semantic equivalence. This creates +orphan categories that should be the same node. + +The codebase has 69 event types (closed union). `MemoryEntry` +(core/types.py:383-446) carries `domains: list[str]` but no hierarchy +fields. The `memory_entries` projection dict (projections.py:694) stores +flat entry dicts with no `hierarchy_path` or `parent_id`. + +The knowledge graph adapter (adapters/knowledge_graph.py) tracks code-level +entities (MODULE, TOOL, PERSON, CONCEPT) with relationship edges. This is +a different taxonomy from knowledge organization. Conflating code entities +with knowledge topics would pollute both systems. + +## Decision + +Add `hierarchy_path` and `parent_id` as projection-level fields on +`memory_entries` dict entries. These are computed from existing event data +at projection time. No changes to `core/types.py` MemoryEntry model. No +new event types. The 69-event closed union is preserved. + +### Storage model: materialized path + +Store the full ancestor chain as a delimited string on each entry: + +```python +# In _on_memory_entry_created() projection handler: +domains = data.get("domains", []) +primary_domain = domains[0] if domains else "uncategorized" +data["hierarchy_path"] = f"/{_normalize_domain(primary_domain)}/" +data["parent_id"] = "" +``` + +Path format: `/{domain}/` for leaf entries, `/{domain}/{topic}/` when +topic-level nesting is added via extraction-time suggestion or bootstrap +clustering. Entries are leaves; path segments above them are topic nodes. + +### Why materialized path over closure table + +For a shallow (3-4 level), append-heavy hierarchy with 5K-50K entries: + +| Operation | Materialized Path | Closure Table | +|-----------|------------------|---------------| +| Insert | 1 statement (concat parent path + id) | SELECT ancestors + INSERT depth+1 rows | +| Subtree query | `WHERE path LIKE '/eng/%'` (single index scan) | JOIN between 2 tables | +| Reparent | `UPDATE SET path = REPLACE(path, old, new)` | DELETE cross-boundary + CROSS JOIN INSERT | +| Write amplification | O(1) per insert | O(depth) per insert | + +Benchmark data (5,912 nodes): populating nodes takes 0.03s; nodes plus +closure table takes ~8s — approximately 250x slower writes. For FormicOS's +append-heavy workload (entries created far more often than reparented), +this settles the decision. + +A denormalized `depth` column eliminates the one weakness of materialized +path (depth queries without string functions). + +### Topic nodes are synthetic projection entries + +Topic nodes (e.g., the `/engineering/` branch) are real entries in the +`memory_entries` projection dict with `entry_type="topic"`. They: + +- Exist in the projection dict alongside regular entries +- Are indexed in Qdrant with LLM-generated topic summaries as embeddings +- Are NOT event-sourced — no `MemoryEntryCreated` events for topics +- Are derived from the hierarchy paths of their children on replay +- Carry aggregated Beta confidence from their children's evidence + +This makes them replay-safe: projection rebuild re-derives topic nodes +from child entries. + +### Qdrant payload + +`hierarchy_path` is added to the `VectorDocument.metadata` dict in +`memory_store.py:sync_entry()`. Qdrant automatically indexes string +payload fields as keyword indexes, enabling filtered search within +hierarchy branches via `must: [{key: "hierarchy_path", match: {value: "/engineering/"}}]`. + +### Upward confidence aggregation + +A topic's Beta posterior derives from its children's evidence: + +```python +def compute_branch_confidence(store, path_prefix): + total_alpha = sum(e.get("conf_alpha", 5.0) - 5.0 + for e in store.memory_entries.values() + if e.get("hierarchy_path", "/").startswith(path_prefix)) + total_beta = sum(e.get("conf_beta", 5.0) - 5.0 + for e in store.memory_entries.values() + if e.get("hierarchy_path", "/").startswith(path_prefix)) + agg_alpha = 5.0 + total_alpha + agg_beta = 5.0 + total_beta + ess = agg_alpha + agg_beta + if ess > 150: + scale = 150.0 / ess + agg_alpha *= scale + agg_beta *= scale + return {"alpha": agg_alpha, "beta": agg_beta, + "mean": agg_alpha / (agg_alpha + agg_beta)} +``` + +Computed on-demand by the API, not stored in projection state. ESS capped +at 150 (mathematically equivalent to exponential decay with gamma ≈ 0.993; +chosen to balance stability with responsiveness per production TS +literature). + +### Hierarchy starts flat + +Initial paths are `/{primary_domain}/`. Topic-level nesting comes from: +1. Extraction-time domain suggestion (Track 2) aligning new entries with + existing branches +2. Optional offline bootstrap script clustering entries by embedding + similarity and LLM-labeling clusters as topic nodes + +This gets the tree view working immediately without waiting for a +clustering pipeline. + +## Alternatives rejected + +1. **New event type for hierarchy assignment** — violates the 69-event + closed union (hard constraint #5). Hierarchy path is derivable from + existing `MemoryEntryCreated` event data (the `domains` field). + Adding an event for what is a projection-level enrichment would set + a precedent for projection fields to have their own events. + +2. **Hierarchy on the KG entity model** — the knowledge graph tracks + code-level entities (MODULE, TOOL, PERSON) with relationship edges + (DEPENDS_ON, ENABLES, IMPLEMENTS). Knowledge hierarchy tracks + organizational structure (domain -> topic -> entry). These are + different taxonomies with different lifecycles. Conflating them would + pollute entity search with organizational nodes and make the KG + harder to reason about. + +3. **Closure table** — 250x write amplification at 5,912 nodes. For a + 3-level hierarchy that is mostly append-only, the closure table's + O(depth) rows per insert into the junction table is unnecessary + overhead. Materialized path handles all operations with O(1) writes. + +4. **Nested sets** — efficient range-based reads but catastrophic for + insertions: every INSERT requires renumbering all right-hand values + after the insertion point. For an append-heavy workload, this is + worse than closure table. + +## Consequences + +- Projection handlers grow ~8 lines (hierarchy_path assignment in + MemoryEntryCreated handler). +- Qdrant payload gains one field (`hierarchy_path`). Existing sync + path handles it automatically. +- REST API gains one endpoint (`GET /api/v1/workspaces/{id}/knowledge-tree`). +- Frontend knowledge browser gains tree subview (~120 lines). +- Bootstrap script (`scripts/bootstrap_hierarchy.py`) is an offline tool, + not imported by the runtime. Uses LLM-only approach: batch entries by + domain tag, LLM identifies topic sub-clusters, assigns hierarchy paths. + Zero new dependencies. +- Event count: unchanged (69). No new event types. +- Core model: unchanged. `hierarchy_path` and `parent_id` are projection-only. +- Replay-safe: hierarchy paths are computed from existing event data + (domains field on MemoryEntryCreated). Replaying the event stream + reproduces the hierarchy deterministically. diff --git a/docs/decisions/050-two-pass-retrieval.md b/docs/decisions/050-two-pass-retrieval.md new file mode 100644 index 0000000..8b021e8 --- /dev/null +++ b/docs/decisions/050-two-pass-retrieval.md @@ -0,0 +1,204 @@ +# ADR-050: Two-Pass Retrieval — Personalized PageRank for Graph Proximity + +**Status**: Proposed +**Date**: 2026-03-25 +**Wave**: 67.5 +**Depends on**: ADR-044 (composite scoring — 7-signal weights including graph_proximity at 0.06) + +--- + +## Context + +The composite retrieval formula (knowledge_constants.py:33-41) allocates +0.06 weight to `graph_proximity` — the 7th signal added in Wave 59.5. +However, this signal is only active in `_search_thread_boosted()` +(knowledge_catalog.py:540-585), where it seeds from the top-3 results +by semantic score and discovers KG neighbors via 1-hop `get_neighbors()`. + +In the standard `_search_vector()` path (non-thread queries), +`_composite_key()` (line 301) hardcodes graph proximity to 0.0 with +an explicit comment: "Wave 59.5: graph_proximity only has real values +in _search_thread_boosted; here it's always 0.0 to keep the weight +dict consistent across both paths." + +This means 6% of the composite score is permanently zero for non-thread +queries — the most common retrieval path. + +The thread-boosted path's seed strategy (top-3 by semantic score) works +because thread context narrows the result set. For the general path, a +different seed strategy is needed: extract entities from the query itself. + +The current graph neighbor discovery (lines 540-585) uses simple 1-hop +expansion with binary scores (1.0 for any neighbor, 0.0 for non-neighbors). +This treats all neighbors equally regardless of graph topology. + +## Decision + +### D1. Personalized PageRank replaces BFS with hop-decay + +**Decision:** Use iterative Personalized PageRank (damping=0.5, +20 iterations) instead of BFS with hop-decay for graph proximity scoring. + +HippoRAG (NeurIPS 2024, Ohio State/Stanford) demonstrates this +convincingly. Their ablation study (Table 5) shows: + +| Method | R@5 | +|--------|-----| +| No expansion (baseline) | 59.2 | +| 1-hop BFS neighbor expansion | 56.2 (worse than baseline) | +| Personalized PageRank (damping=0.5) | 72.9 | + +Simple BFS is **worse than no expansion at all** because it treats all +1-hop neighbors equally, injecting noise. PPR weights neighbors by graph +topology — high-connectivity paths score higher, dead-end branches are +naturally dampened. + +**Implementation:** Pure Python iterative power method. No igraph or +networkx dependency. + +```python +async def personalized_pagerank( + self, seed_ids: list[str], workspace_id: str, + *, damping: float = 0.5, iterations: int = 20, +) -> dict[str, float]: + """Iterative PPR from seed entities. + + 1. Build adjacency list from get_neighbors() for reachable nodes + within 3 hops of seeds (bounded expansion). + 2. Initialize reset vector: uniform over seed_ids. + 3. Power iteration: + pr[v] = (1-d)*reset[v] + d*sum(pr[u]/degree[u] for u in neighbors) + 4. Normalize: max score -> 1.0. + """ +``` + +**Damping = 0.5** (not the standard 0.85): 50% restart probability keeps +the random walk tightly localized around seed nodes. Standard PageRank's +0.85 is designed for web-scale link analysis where global importance +matters. For focused retrieval from specific seeds, 0.5 is the validated +parameter from HippoRAG. + +**Performance:** For FormicOS's graph sizes (hundreds to low thousands +of edges), iterative PPR converges in <20ms. The bounded expansion +(3-hop reachability from seeds) prevents the adjacency list from growing +beyond the local neighborhood. + +**Rationale:** PPR is the right algorithm for propagating activation +from query-matched seeds across a knowledge graph. It captures the +intuition that entities connected through multiple paths are more +strongly related than entities connected through a single path. The +14-point R@5 improvement over BFS justifies the additional ~30 lines of +implementation. + +--- + +### D2. Entity seeding via embedding similarity for standard path + +**Decision:** For the non-thread retrieval path, seed PPR by matching +query terms against KG entity names/summaries using embedding similarity, +not substring matching. + +```python +async def match_entities_by_embedding( + self, query: str, workspace_id: str, *, limit: int = 5, +) -> list[dict[str, Any]]: + """Find KG entities semantically similar to query. + + Falls back to normalized substring matching on entity names + if no embedding function is available. + """ +``` + +**Rationale:** Entity names in the KG are code-level identifiers +(function names, module names, tool names). Substring matching +(`"auth" in "authentication_handler"`) produces false positives and +misses semantic equivalents. Embedding similarity captures that "JWT +validation" is related to the `AuthMiddleware` entity even when no +substring overlap exists. + +This uses existing infrastructure: entity summaries are stored on +`kg_nodes`, and the existing search/embedding pipeline can compute +similarity. Zero new dependencies. + +The thread-boosted path keeps its existing seed strategy (top-3 results +by semantic score → `entry_kg_nodes` lookup) since thread context +already provides good seeds. + +--- + +### D3. Shared `_enrich_with_graph_scores()` refactors both paths + +**Decision:** Extract the inline graph neighbor discovery from +`_search_thread_boosted()` (lines 540-585) into a shared method that +both retrieval paths call: + +```python +async def _enrich_with_graph_scores( + self, + seed_entity_ids: list[str], + workspace_id: str, +) -> dict[str, float]: + """PPR walk from seed entities, return {entry_id: proximity_score}. + + Runs personalized_pagerank(damping=0.5, iterations=20) from seeds. + Maps KG entity IDs back to knowledge entry IDs via + self._projections.entry_kg_nodes reverse lookup. + """ +``` + +**Rationale:** Eliminates code duplication. Both paths use the same PPR +infrastructure with different seed strategies. The thread-boosted path +is upgraded from 1-hop BFS to PPR for free, improving its graph +proximity quality as well. + +The entity matching and PPR walk run in parallel with Qdrant vector +search via `asyncio.gather`, adding zero latency to the critical path +when the graph computation completes before the vector search. + +## Alternatives rejected + +1. **BFS with uniform hop-decay** — empirically worse than no expansion + per HippoRAG ablation. Hop-decay (score = 0.4^hops) assigns equal + scores to all entities at the same depth, ignoring graph topology. + A well-connected entity 2 hops away scores the same as a dead-end + entity 2 hops away. PPR naturally distinguishes them. + +2. **BFS with edge-confidence weighting** — partially captures topology + (`score = decay^hop * edge_confidence`) but still misses multi-path + reinforcement. An entity reachable through 3 independent paths should + score higher than one reachable through 1 path with the same hop + count. PPR handles this natively. + +3. **igraph or networkx dependency for PPR** — igraph's PRPACK solver + is optimal for large graphs (>50K edges). For FormicOS's graph sizes + (hundreds to low thousands), pure Python iterative PPR is fast enough + (<20ms). Adding a C-extension dependency for a ~30-line algorithm is + unnecessary. If graph sizes grow significantly, igraph can be adopted + later as a drop-in replacement. + +4. **Substring entity matching** — too crude for code-level entity names. + `"test"` would match `"test_runner"`, `"test_data"`, `"latest_version"`, + producing noisy seeds that degrade PPR quality. Embedding similarity + produces semantically relevant matches. + +5. **Keeping graph_proximity at 0.0 in standard path** — permanently + wastes 6% of the composite score. The weight was allocated in Wave + 59.5 specifically to reward graph-connected entries. Leaving it at + zero undermines the rebalancing rationale from ADR-044. + +## Consequences + +- KG adapter gains two methods: `personalized_pagerank()` (~30 lines) + and `match_entities_by_embedding()` (~25 lines). Both are in the + Adapters layer — pure computation + data access, no Surface imports. +- `knowledge_catalog.py` gains `_enrich_with_graph_scores()` (~30 lines) + and refactors `_search_thread_boosted()` to use it (net reduction of + ~20 lines from removing inline code). +- Standard retrieval now produces real graph proximity scores. Score + breakdown will show non-zero `graph_proximity` for the first time in + non-thread queries. +- No weight changes. The 0.06 weight from ADR-044 D4 / Wave 59.5 + remains unchanged. +- No new events. No new projection state. No Qdrant schema changes. +- Performance: entity match + PPR adds <50ms to the non-thread retrieval + path, running in parallel with Qdrant search. diff --git a/docs/decisions/051-dynamic-context-caps.md b/docs/decisions/051-dynamic-context-caps.md new file mode 100644 index 0000000..453ed0e --- /dev/null +++ b/docs/decisions/051-dynamic-context-caps.md @@ -0,0 +1,154 @@ +# ADR-051: Dynamic Queen Context Caps + +**Status:** Accepted +**Date:** 2026-03-25 +**Wave:** 68 (updated Wave 70.0 — project_plan; Wave 71.0 — 9-slot expansion) + +## Context + +The Queen still uses several hardcoded caps in `queen_runtime.py`: + +| Constant | Current value | +|----------|---------------| +| `_THREAD_TOKEN_BUDGET` | 6000 tokens | +| `_RECENT_WINDOW` | 10 messages | +| `_QUEEN_TOOL_OUTPUT_CAP` | 2000 chars | +| `_QUEEN_MAX_TOOL_HISTORY_CHARS` | 16000 chars | +| project context slice | 2000 chars | +| tool memory join cap | 6000 chars | +| cloud routing threshold | 2000 tokens | + +These defaults were tuned for relatively small context windows. They waste +capacity on large-context models and make it harder to budget newer context +sources such as tags, plans, session summaries, and deliberation frames. + +## Decision + +Replace hardcoded caps with proportional slot budgeting derived from the +model's `context_window`, while preserving current behavior as the floor. + +### No-regression rule + +Every slot uses: + +```python +slot_value = max(fallback_default, proportional_value) +``` + +Proportional scaling may grow budgets. It must never shrink them below today's +effective defaults. + +### Budget slots + +Budget is computed from: + +```python +available = max(0, context_window - output_reserve) +``` + +Where `output_reserve` comes from the existing `_queen_max_tokens()` logic. + +| Slot | Fraction | Purpose | +|------|----------|---------| +| `system_prompt` | 15% | Caste recipe + Queen notes + system guidance | +| `memory_retrieval` | 13% | Institutional memory retrieval block | +| `project_context` | 8% | `project_context.md` | +| `project_plan` | 5% | Cross-thread project plan milestones (Wave 70.0) | +| `operating_procedures` | 5% | Workspace operating procedures (Wave 71.0) | +| `queen_journal` | 4% | Recent Queen working-memory journal (Wave 71.0) | +| `thread_context` | 13% | Thread state, tags, session context, plan, deliberation frame | +| `tool_memory` | 9% | Prior-turn tool results | +| `conversation_history` | 28% | Compacted Queen thread history | + +Fractions sum to 1.0. Wave 71.0 expanded from 7 to 9 slots by adding +`operating_procedures` and `queen_journal`, trading 9% across four +existing slots (memory_retrieval -2%, project_context -2%, +thread_context -2%, tool_memory -1%, conversation_history -2%). +No single slot lost more than 2 absolute points. + +### Fallback floors + +These floors preserve current behavior: + +| Slot | Fallback | +|------|----------| +| `system_prompt` | 2000 | +| `memory_retrieval` | 1500 | +| `project_context` | 500 | +| `project_plan` | 400 | +| `operating_procedures` | 400 | +| `queen_journal` | 300 | +| `thread_context` | 1500 | +| `tool_memory` | 4000 | +| `conversation_history` | 6000 | + +### Source of truth + +- `context_window` comes from `ModelRecord.context_window` +- output reserve comes from `_queen_max_tokens()` +- when `context_window` is missing, invalid, or too small, return the fallback + budget unchanged + +## Implementation + +Add `src/formicos/surface/queen_budget.py` with: + +- `QueenContextBudget` +- `compute_queen_budget(context_window, output_reserve)` + +Thread the resulting budget through `queen_runtime.py` so these seams use +budget-backed values: + +- compacted conversation history +- recent-window derivation +- tool-memory cap +- project-context slice +- cloud routing threshold + +## Examples + +Assume `output_reserve = 4096`: + +| Model | context_window | Available | History slot | Thread slot | Tool memory | +|-------|----------------|-----------|--------------|-------------|-------------| +| 8K model | 8192 | 4096 | 6000 floor | 1500 floor | 4000 floor | +| 32K model | 32768 | 28672 | 8601 | 5734 | 4000 floor | +| 200K model | 200000 | 195904 | 58771 | 39180 | 19590 | +| 4K model | 4096 | 0 | 6000 floor | 1500 floor | 4000 floor | + +This keeps small-model behavior stable while letting larger models breathe. + +## Consequences + +### Positive + +- large-context models get proportionally richer Queen context +- small or unknown models behave like today +- budgets become explicit and inspectable +- new context sources fit into named slots instead of one-off constants + +### Negative + +- one more data structure threads through `queen_runtime.py` +- slot fractions may need tuning later + +### Neutral + +- `_CHARS_PER_TOKEN = 4` remains an approximation +- no new events or schema changes are required +- this is runtime-only behavior, not replayed state + +## Alternatives Considered + +**1. Flat output reserve percentage.** +Rejected because `_queen_max_tokens()` already computes the real output ceiling. + +**2. Per-model manual caps.** +Rejected because it adds too much operator surface for a problem that should +be deterministic. + +**3. Adaptive runtime reallocation.** +Rejected because it makes prompt assembly harder to reason about and verify. + +**4. Single context multiplier.** +Rejected because different context sources should grow at different rates. diff --git a/docs/decisions/INDEX.md b/docs/decisions/INDEX.md index 0751197..a32fed3 100644 --- a/docs/decisions/INDEX.md +++ b/docs/decisions/INDEX.md @@ -56,3 +56,6 @@ | [046](046-autonomy-levels.md) | Autonomy Levels for Self-Maintenance Colonies | Proposed | | [047](047-outcome-metrics-retention.md) | Colony Outcome Metrics Retention and Surfacing | Proposed | | [048](048-memory-entry-refined.md) | MemoryEntryRefined Event — In-Place Knowledge Curation | Proposed | +| [049](049-knowledge-hierarchy.md) | Knowledge Hierarchy — Materialized Paths on Projections | Proposed | +| [050](050-two-pass-retrieval.md) | Two-Pass Retrieval — Personalized PageRank for Graph Proximity | Proposed | +| [051](051-dynamic-context-caps.md) | Dynamic Queen Context Caps — Proportional Budget Allocation | Proposed | diff --git a/docs/waves/PROGRESS.md b/docs/waves/PROGRESS.md index 64d11af..e6dfaf7 100644 --- a/docs/waves/PROGRESS.md +++ b/docs/waves/PROGRESS.md @@ -1,12 +1,412 @@ # FormicOS v2 -- Wave Progress -**Last updated:** 2026-03-25 -- Wave 65.5 landed (Addons Made Real + Polish). 3640 tests green. Addon runtime context injection, real codebase index + git control, trigger wiring, Queen autonomous agency (36 tools), addon dev guide, polish pass (porcelain parsing, forbidden ops, cron DOW, trigger loop, schema validation). +**Last updated:** 2026-03-27 -- Wave 73 landed and integrated. Developer Bridge: MCP composition layer (27 tools, 9 resources, 6 prompts), init-mcp CLI, frontend governance truth, workspace creation, addon polish. 3911 tests green. 43 Queen tools. 9-slot Queen context budget. -**Note:** Detailed per-wave docs are on disk through Wave 65. Consolidated numeric metrics further down this file are still historical snapshots from earlier milestones until a dedicated metrics refresh is done. +**Note:** Detailed per-wave docs are on disk through Wave 73. Consolidated numeric metrics further down this file are still historical snapshots from earlier milestones until a dedicated metrics refresh is done. --- -## Current: Wave 65.5 -- Addon Polish Pass +## Current: Wave 73 -- The Developer Bridge + +**Status:** Landed and integrated (3 teams) +**Theme:** Make FormicOS usable from Claude Code. MCP composition layer +(prompts that compose existing tools into developer workflows), prose +resources, init-mcp CLI, frontend truth fixes, workspace creation UI. +No new event types — event count stays at 69. + +### Team A — MCP Prompts + Resources + Addon Tools + init-mcp: LANDED. +1. **4 MCP prompts:** `morning-status`, `delegate-task`, + `review-overnight-work`, `knowledge-for-context` — read-only, compose + existing operational state modules. +2. **2 MCP tools:** `log_finding` (creates knowledge entries), + `handoff_to_formicos` (creates thread + spawns colony with developer + context) — mutating, use `@mcp.tool(annotations=_MUT)`. +3. **3 MCP resources:** `formicos://plan` (global), + `formicos://procedures/{workspace_id}`, + `formicos://journal/{workspace_id}` — return prose markdown. +4. **3 addon tools:** `addon_status` (RO), `toggle_addon` (MUT), + `trigger_addon` (MUT). +5. **init-mcp CLI:** `python -m formicos init-mcp` generates `.mcp.json` + (type: http) and `.formicos/DEVELOPER_QUICKSTART.md`. +6. **Runtime wiring:** `addon_registrations` exposed on runtime for MCP + server access. + +### Team B — Frontend Truth + Workspace Creation: LANDED. +7. **Colony creator governance:** Replaced hardcoded budget=2.0 and + maxRounds=10 with governance-configured defaults via + `_applyGovernanceDefaults()`. Removed fabricated tier cost rates. +8. **Template editor governance:** Same pattern — replaced hardcoded 1.0/5 + with governance defaults. Governance passed through playbook-view.ts. +9. **Workspace creation:** `POST /api/v1/workspaces` REST endpoint. Frontend + sidebar button with inline form. Uses fetch (not WS) — snapshot + auto-refreshes via WorkspaceCreated event. +10. **Addon config type coercion:** boolean/integer string→native coercion + in `put_addon_config()`. + +### Team C — Settings Protocol Detail + Addon Polish + Documentation: LANDED. +11. **Protocol detail:** Verified Wave 72.5 protocol summary already complete. +12. **Addon search/filter:** Text filter on sidebar addon list. +13. **Addon health summary:** Aggregate stats card (total, tools, calls, + errors) at top of detail panel. +14. **DEVELOPER_BRIDGE.md:** 5-minute developer onboarding guide. +15. **CLAUDE.md refresh:** Updated MCP counts, key paths, commands. + +### Seam integration +- `MCP_TOOL_NAMES` tuple updated to include all 27 tools (was 19). +- `view_state.py` fallback updated from 19 to 27. +- `formicos://plan` resource URI corrected (global, no workspace_id). +- DEVELOPER_BRIDGE.md jargon removed from Architecture section. + +**Post-wave state:** 27 MCP tools, 9 resources, 6 prompts, 4 CLI +subcommands, 69 events, 43 Queen tools. REST workspace creation. + +--- + +## Wave 72.5 -- Topbar Simplification + Addon Lifecycle (landed) + +**Status:** Landed (3 teams) +**Theme:** Clean topbar, interactive addon management, protocol detail migration. + +- Removed protocol badges and connection indicator from topbar; added + clickable budget popover. +- Fixed addon trigger handler calling convention; added interactive "Try It" + tool testing forms and inline config editing. +- Migrated protocol detail (tool counts, event counts, endpoints) from + topbar badges to Settings Integrations section. +- Addon lifecycle: soft disable toggle, hello-world scaffold hiding. + +--- + +## Wave 72 -- Autonomous Learning + Workflow Patterns (landed) + +**Status:** Landed (3 teams) +**Theme:** System improves its own knowledge, learns from patterns, continues +work autonomously, stays legible to the operator. + +- Knowledge review lifecycle: scanning, queuing, operator confirmation/ + invalidation of problematic entries. +- Autonomous continuation: Queen proposes and executes low-risk work across + sessions and during idle time. +- Workflow learning: deterministic pattern recognition for reusable templates + (`extract_workflow_patterns`) and operator procedure suggestions + (`detect_operator_patterns`). +- Product polish: trigger fixes, active Knowledge tab, writable Settings, + addon disable, model filtering. + +--- + +## Wave 71 -- Operational Coherence (landed) + +**Status:** 71.0 + 71.5 landed and integrated +**Theme:** Turn operational intelligence into a durable file-backed substrate +(71.0) and surface it in a dedicated Mission Control tab (71.5). No new event +types — event count stays at 69. + +Split into two dispatches: 71.0 (backend operational state layer) and 71.5 +(frontend Operations tab consuming those contracts). + +### Wave 71.0 — Operational Coherence Substrate (9 tracks, 3 teams) + +**Team A — Queen Working Memory: LANDED.** +1. **Track 1 -- Queen context budget expansion:** 7-slot → 9-slot + `QueenContextBudget` frozen dataclass. New slots: `operating_procedures` + (5%), `queen_journal` (4%), carved from `thread_context` (15→13%) and + `memory_retrieval` (15→13%). Remaining slots rebalanced gently (no slot + loses >2 points). (`queen_budget.py`) +2. **Track 2 -- Operating procedures injection:** File-backed procedures at + `.formicos/operations/{workspace_id}/procedures.md`. Structured rule parser + extracts rules from markdown. Injected into Queen context after briefing, + before deliberation. `GET/PUT /api/v1/workspaces/{id}/operating-procedures` + endpoints. (`queen_runtime.py`, `routes/api.py`) +3. **Track 3 -- Queen journal injection:** File-backed journal at + `.formicos/operations/{workspace_id}/journal.md`. Session summary writes + appended to journal. Injected into Queen context as working-memory block. + `GET /api/v1/workspaces/{id}/queen-journal` endpoint. + (`queen_runtime.py`, `routes/api.py`) + +**Team B — Durable Action Queue: LANDED.** +4. **Track 4 -- Action queue ledger:** Generic typed action envelope with + `kind` as semantic authority. Statuses: `pending_review`, `approved`, + `rejected`, `executed`, `self_rejected`, `failed`. JSONL-backed at + `.formicos/operations/{workspace_id}/action_queue.jsonl`. Size management + via `compact_action_log()` at 1000 lines. (`routes/api.py`) +5. **Track 5 -- Approve/reject endpoints:** + `POST .../actions/{id}/approve` and `POST .../actions/{id}/reject` + (with optional reason). Dispatcher wiring for approved actions. + (`routes/api.py`) +6. **Track 6 -- 30-minute operational sweep:** Second asyncio task alongside + 24-hour consolidation loop. Processes approved actions, queues medium/high- + risk work. Configurable via `FORMICOS_OPS_SWEEP_INTERVAL_S` env var. + (`app.py`) + +**Team C — Operations Coordinator: LANDED.** +7. **Track 7 -- Thread plan helper:** Shared helper extracts structured thread + context for the coordinator. Budget-aware truncation via + `[:budget.thread_context * 2]`. (`queen_runtime.py`) +8. **Track 8 -- Operations coordinator:** Synthesizes project plan, thread + plans, session summaries, outcomes, and action queue into + `continuation_candidates`, `sync_issues`, and operator-idle signals. + `GET /api/v1/workspaces/{id}/operations/summary` endpoint. + (`routes/api.py`) +9. **Track 9 -- Queen continuity cue:** Coordinator output injected as + structured context for the Queen to reason about next steps. + (`queen_runtime.py`) + +Integration fixes: Team A operations-view.ts rewired to mount real Team C +leaf components instead of inline previews. Team C `get_operations_summary` +fixed bare `projections` reference → `runtime.projections if runtime else None`. + +21 + 17 + 24 = 62 new tests. Queen context budget: 7 → 9 slots. + +### Wave 71.5 — Mission Control Surface (3 teams) + +**Team A — Operations Shell: LANDED.** +- `fc-operations-view` Lit component: header with journal-count badge, + summary row (journal entries, procedures status, pending actions), two-column + layout mounting Team B and Team C leaf components. 8th nav tab added to + `formicos-app.ts` (ViewId union, NAV array, grid-template-columns). + (`operations-view.ts`, `formicos-app.ts`) + +**Team B — Action Inbox: LANDED.** +- `fc-operations-inbox` Lit component: kind/status-driven rendering with + sections for pending review, recent automatic, deferred/self-rejected. + Approve (one-click) and reject (with optional reason) workflow. Blast-radius + visual language following proposal-card pattern. Extensible for future action + kinds without inbox redesign. + (`operations-inbox.ts`) + +**Team C — Operational Memory Surfaces: LANDED.** +- `fc-queen-journal-panel`: operational log view with load-more, empty state. +- `fc-operating-procedures-editor`: inline text editing with PUT save, success/ + failure feedback, empty template for first-time users. +- `fc-operations-summary-card`: compact at-a-glance orientation — pending + review count, active milestones, operator idle/active state, top continuation + candidate, top sync issue, recent progress snippet. + (`queen-journal-panel.ts`, `operating-procedures-editor.ts`, + `operations-summary-card.ts`) + +3870 tests passing. CI: ruff clean, imports clean. + +### Post-integration audit + +Comprehensive UI/UX seam audit completed. Reference doc at +`docs/waves/wave_72_polish_reference.md` catalogs 9 items across model +management, settings editability, document ingestion, addon triggers, and +navigation. Key findings: addon manual trigger bug (docs-index and +codebase-index both miswired to `indexer.py::incremental_reindex` instead +of `search.py::handle_reindex`), model status type contract divergence, +and Settings page structural inversion (too much read-only inventory, +not enough writable controls). + +--- + +## Previous: Wave 70 -- Operational Flexibility + +**Status:** 70.0 + 70.5 landed and integrated +**Theme:** Backend contracts (70.0) + operator trust surface (70.5) for MCP +access, project-level intelligence, and earned autonomy. + +Split into two dispatches: 70.0 (backend/control-plane contracts) and 70.5 +(frontend rendering consuming those contracts). No new event types — event +count stays at 69. + +### Wave 70.0 — Backend Substrate (9 tracks, 3 teams) + +**Team A — MCP Bridge Substrate: LANDED.** +1. **Track 1 -- MCP bridge addon core:** New `addons/mcp-bridge/` addon with + FastMCP `>=3.0,<4.0` Client. Bridge registers as addon via existing + `addon_loader.py` pipeline. Generic capability protocol for health exposure + (no addon-name branching). `call_mcp_tool` Queen tool for remote tool + invocation. (`addons/mcp_bridge/`) +2. **Track 2 -- Dynamic MCP tool discovery:** `discover_mcp_tools` Queen tool + queries connected MCP servers and returns available tools with schemas. + (`queen_tools.py`) +3. **Track 3 -- Bridge health exposure:** Generic addon health via + `AddonRegistration.health_status` property. Bridge health visible through + `/api/v1/addons` endpoint without hardcoded addon-name checks. + (`addon_loader.py`, `routes/api.py`) + +**Team B — Project Intelligence Substrate: LANDED.** +4. **Track 4 -- Project plan helper:** `project_plan.py` shared parser/helper + — single source of truth for resolving plan path, parsing milestones, + rendering compact Queen context text, updating timestamps. + (`surface/project_plan.py`) +5. **Track 5 -- Milestone tools + endpoint + budget:** `propose_project_milestone` + and `complete_project_milestone` Queen tools. `GET /api/v1/project-plan` + returns structured JSON. Dedicated 7th Queen context budget slot + (`project_plan` at 5%, 400-token fallback, carved from `thread_context` + which went from 20% to 15%). ADR-051 updated. + (`queen_tools.py`, `queen_budget.py`, `routes/api.py`) +6. **Track 6 -- Project plan injection:** Parsed project plan injected into + Queen context as its own system message block, capped by `project_plan` + budget, labeled `# Project Plan (cross-thread)`. Separate from + `project_context.md` and thread plans. + (`queen_runtime.py`) + +**Team C — Autonomy Trust Substrate: LANDED.** +7. **Track 7 -- Daily autonomy budget:** `check_autonomy_budget` Queen tool + surfaces daily budget spend, remaining capacity, and recent autonomous + actions. (`queen_tools.py`) +8. **Track 8 -- Blast radius estimator:** `BlastRadiusEstimate` dataclass with + 6 heuristic factors (task length, caste risk, round count, strategy, + keywords coder-only, outcome history). Thresholds: >=0.6 escalate, + >=0.3 notify, <0.3 proceed. Dispatch gate in `evaluate_and_dispatch()`. + Proposal metadata carries blast-radius truth. + (`self_maintenance.py`, `queen_tools.py`) +9. **Track 9 -- Autonomy scoring + status endpoint:** `AutonomyScore` with + 4 weighted components (success_rate, volume, cost_efficiency, + operator_trust). `compute_autonomy_score()` pure function. + `GET /api/v1/workspaces/{id}/autonomy-status` returns structured trust + data. (`self_maintenance.py`, `routes/api.py`) + +**Integration fix:** Blast radius keyword weight set to 0.0 for non-coder +castes — researcher investigating "authentication" is not the same as +modifying it. + +### Wave 70.5 — Operator Surface (3 teams) + +**Team A — MCP Settings UX: LANDED.** +- `fc-mcp-servers-card` Lit component: server list with health dots, add/remove + forms, three empty states. Reads from `/api/v1/addons`, writes through + `PUT /api/v1/addons/mcp-bridge/config`. Self-contained, no store dependency. + (`mcp-servers-card.ts`) + +**Team B — Project Visibility: LANDED.** +- `fc-project-plan-card` Lit component: plan goal, progress bar, milestone + checklist with status chips, thread links, completion dates. Mounted in + `queen-overview.ts` after budget panel. Data from `GET /api/v1/project-plan` + only — no frontend markdown parsing. + (`project-plan-card.ts`, `queen-overview.ts`) + +**Team C — Trust Integration: LANDED.** +- `fc-autonomy-card` Lit component: grade badge (A-F), trust score, daily + budget bar, component breakdown, recent autonomous actions table. Mounted + in `settings-view.ts`. + (`autonomy-card.ts`, `settings-view.ts`) +- Proposal card blast-radius rendering: score, level pill, recommendation pill, + factors list. Color-coded border. Additive only — unchanged when absent. + (`proposal-card.ts`, `queen-chat.ts`) +- `system-overview.ts` tool count updated to 43. +- `BlastRadiusData` and `AutonomyStatusData` interfaces added to `types.ts`. + +**Integration fix:** Mounted Team A's `fc-mcp-servers-card` in +`settings-view.ts` (Card G, between Addons and Autonomy Trust). + +Queen tools: 38 → 43 (+discover_mcp_tools, +call_mcp_tool, ++propose_project_milestone, +complete_project_milestone, ++check_autonomy_budget). Queen context budget: 6 → 7 slots. +3808 tests passing. CI: ruff clean, imports clean. + +--- + +## Earlier: Wave 67 -- The Knowledge Architecture + +**Status:** 67.0 landed + polish pass complete, 67.5 landed +**Theme:** Give knowledge structure, integrity, and auditability. + +Split into two dispatches: 67.0 (foundation) lands first, 67.5 (surfaces) +builds on it. No new event types — all changes are projection-level +enrichments. Event count stays at 69. + +### Wave 67.0 — Foundation (3 tracks, 2 teams) + +1. **Track 1 -- Knowledge Hierarchy (Team A): LANDED.** Materialized paths + on projections (`hierarchy_path`, `parent_id`). Qdrant payload gains + keyword-indexed `hierarchy_path` for filtered branch search. Branch + confidence aggregation caps at ESS 150, filters by workspace. + `GET /api/v1/workspaces/{id}/knowledge-tree` endpoint. Knowledge + browser gains tree subview (collapsible branches, confidence bars, + click-to-filter). LLM-only offline bootstrap script (zero new deps). + 12 new tests. ADR-049 proposed. + (`projections.py`, `memory_store.py`, `hierarchy.py`, `routes/api.py`, + `knowledge-browser.ts`, `bootstrap_hierarchy.py`) + - **Polish:** Fixed `compute_branch_confidence` negative Beta bug (aggregated + beta could go < 0 when children have conf_beta < prior 5.0, producing + invalid mean > 1.0). Added floor clamp at 1.0. Added Qdrant keyword + index for `hierarchy_path` in `vector_qdrant.py`. +1 test. +2. **Track 2 -- Domain Normalization (Team B): LANDED.** Existing domain + tags from up to 10 similar entries injected into extraction prompt as + guidance ("use one of these if applicable, do not create synonyms"). + Caps at 20 domains. Fires on all three prompt paths. Call site verified: + `colony_manager.py:2069` populates via `knowledge_catalog.search()`. + 5 new tests. (`memory_extractor.py`) +3. **Track 3 -- Outcome-Confidence Reinforcement (Team B): LANDED.** + Geometric credit 0.7^rank (Position-Based Model) replaces flat delta. + Rank-0 entry gets full credit, rank-5 gets ~17%. ESS capped at 150 via + `rescale_preserving_mean()` in Engine layer — applied after mastery + restoration, before event emission. Preserves posterior mean. Co-occurrence + reinforcement unchanged. Auto-promotion verified. 11 new tests. + (`colony_manager.py`, `scoring_math.py`) + +### Wave 67.5 — Surfaces (3 tracks, 3 coders) + +4. **Track 4 -- Two-Pass Retrieval (Team B):** Replace hardcoded 0.0 + graph proximity in standard retrieval with iterative Personalized + PageRank (damping=0.5, pure Python, no igraph dep). Entity seeding + via embedding similarity. Shared `_enrich_with_graph_scores()` method + refactors thread-boosted path. ADR-050 proposed. + (`knowledge_graph.py`, `knowledge_catalog.py`) +5. **Track 5 -- Provenance Chains (Team A):** Append-only + `provenance_chain` list on projection entries from 6 event handlers. + REST endpoint. Provenance timeline in entry detail UI. Score breakdown + bar visible by default on search results. **Contract change blocker:** + `ProvenanceChainItem` interface needs operator approval. + (`projections.py`, `knowledge_api.py`, `types.ts`, `knowledge-browser.ts`) +6. **Track 6 -- Documentation Indexer Addon (Team C):** New + `addons/docs-index/` addon. Chunks .md/.rst/.txt/.html on section + headers. Registers `semantic_search_docs` and `reindex_docs` Queen + tools. Separate `docs_index` Qdrant collection. Follows codebase-index + addon pattern and keeps raw corpus chunks out of `memory_entries`. + (`addons/docs-index/`, `addons/docs_index/`) + +### Blockers + +- **ADR-049:** Knowledge Hierarchy (proposed, awaiting approval) +- **ADR-050:** Two-Pass Retrieval with PPR (proposed, awaiting approval) +- **Contract change:** ProvenanceChainItem interface (67.5 only) + +No new dependencies — UMAP+HDBSCAN rejected in favor of LLM-only +bootstrap (entries already carry domain tags, LLM sub-clusters within +domains, ~15 calls for 300 entries). + +--- + +## Previous: Wave 66 -- Addons as First-Class Software + +Makes addons visible, configurable, and extensible. Six tracks across +three teams. No new events — reuses existing WorkspaceConfigChanged. + +1. **Track 1 -- Addons Tab (Team 1):** `GET /api/v1/addons` returns installed + addons with health summaries (tool call counts, handler errors, trigger + schedules). `POST /api/v1/addons/{name}/trigger` manually fires trigger + handlers. `AddonRegistration` tracks health counters updated by tool/handler + wrappers. (`routes/api.py`, `addon_loader.py`) +2. **Track 2 -- Addon Config Surface:** `AddonConfigParam` model declares + configurable parameters in addon manifests (key, type, default, label, + options). `GET /api/v1/addons/{name}/config?workspace_id=X` returns config + schema + current values. `PUT /api/v1/addons/{name}/config` persists values + via WorkspaceConfigChanged events at `addon.{name}.{key}` dimension. All + three shipped addons declare config blocks: git_auto_stage (boolean), + chunk_size/skip_dirs (integer/string), disabled_rules (string). + (`addon_loader.py`, `routes/api.py`, addon manifests) +3. **Track 3 -- Addon Panels + Routes:** `register_addon()` now resolves + `routes` and `panels` manifest fields (previously warned as unimplemented). + Catch-all route at `/addons/{name}/{path}` mounts addon HTTP endpoints. + `fc-addon-panel` Lit component renders status_card, table, and log display + types with 60s auto-refresh. Panel injection zones in knowledge-browser.ts + and workspace-browser.ts. Status endpoints: codebase-index (chunk count from + vector store), git-control (branch + modified files). + (`addon_loader.py`, `app.py`, `addon-panel.ts`, status endpoints, manifests) +4. **S2 -- Knowledge ROI Rule Fix:** Extended `_rule_knowledge_roi` to track + `entries_accessed` alongside `entries_extracted`. New insight when 3+ + successful colonies access zero knowledge and score below 0.7 quality. + (`addons/proactive_intelligence/rules.py`) +5. **S4 -- CLAUDE.md Weight Update:** Updated composite retrieval formula to + 7-signal Wave 59.5 values including graph_proximity. + +3654 tests passing (+14 net new). CI: ruff clean, imports clean. + +## Previous: Wave 65.5 -- Addon Polish Pass Bug fixes and test hardening for the addon system shipped in Wave 65. No new features, events, or tools. Six fixes: diff --git a/docs/waves/wave_66/wave_66_plan.md b/docs/waves/wave_66/wave_66_plan.md new file mode 100644 index 0000000..c06deff --- /dev/null +++ b/docs/waves/wave_66/wave_66_plan.md @@ -0,0 +1,629 @@ +# Wave 66: Addons as First-Class Software + +**Status:** Planning +**Predecessor:** Wave 65.5 (Addon System Polish) +**Theme:** Make addons visible, configurable, and extensible to the operator. + +Wave 65 made every addon functional -- real git operations, real vector +search, real proactive intelligence. Wave 66 makes them visible: an Addons +tab with health monitoring, a config surface backed by existing events, and +a generic panel renderer that lets addons contribute UI to existing tabs. + +## Contract Change Blocker + +**Track 1 requires operator approval before work begins.** The +`OperatorStateSnapshot` interface must gain an `addons` field. This is a +contract file change affecting: + +- `docs/contracts/types.ts:436-447` -- OperatorStateSnapshot interface +- `frontend/src/types.ts:650-661` -- mirrored OperatorStateSnapshot + +Both files are under `docs/contracts/` governance. Approve before dispatch. + +## Pre-existing State + +**Addon infrastructure (Wave 64-65):** Manifest loader discovers +`addons/*/addon.yaml`, resolves handlers via `_resolve_handler()`, +registers tools into Queen's dispatcher, event handlers into +`service_router`, triggers into `TriggerDispatcher`. Runtime context +injection passes vector_port, event_store, projections, settings, and +workspace_root_fn to handlers that accept `runtime_context` kwargs. + +**Addon manifests declare three unimplemented fields:** +`addon_loader.py:267-278` logs warnings for `panels`, `routes`, and +`templates` -- "registration is not yet implemented." No addon currently +declares any of these fields. + +**AddonManifest has no `config` field:** `addon_loader.py:52-64` supports +name, version, description, author, tools, handlers, panels, templates, +routes, triggers. No config schema. + +**No addon UI:** Zero addon references in `frontend/src/components/`. +`AddonLoaded`, `AddonUnloaded`, `ServiceTriggerFired` events are defined +in the type system but the frontend store ignores them. + +**No addon REST endpoints:** No `/api/v1/addons` routes exist. + +**Snapshot has no addon data:** `build_snapshot()` (`view_state.py:21-42`) +returns 10 fields. No `addons` field. `app.state.addon_manifests` is set +at `app.py:780` but `_addon_registrations` is a local variable +(`app.py:738`) never stored on `app.state`. T1 must add +`app.state.addon_registrations = _addon_registrations` after line 780 +and pass it to the snapshot builder. + +**WebSocket handler:** `ws_handler.py:332-349` calls `build_snapshot()` +and sends the result. No addon data flows to clients. + +**TriggerDispatcher.fire_manual():** Returns a descriptor dict but does +not execute the handler (`trigger_dispatch.py:140-149`). Execution logic +exists in `queen_tools.py:_trigger_addon()`. + +**MemoryEntryMerged Qdrant bug (confirmed):** `runtime.py:535-536` syncs +only `target_id` after a merge. The source entry is marked `rejected` in +projections (`projections.py:1820-1833`) but its Qdrant vector is never +deleted. `memory_store.py:82-99` has the correct deletion path for +rejected entries -- it just never receives the source_id. Stale embeddings +accumulate daily. + +**Knowledge ROI rule gap (confirmed):** `_rule_knowledge_roi()` in +`rules.py:691-727` only checks `entries_extracted`. `entries_accessed` is +computed on `ColonyOutcome` (`projections.py:1057-1065`) but unused by +any proactive intelligence rule. + +**CLAUDE.md composite weights stale:** Documents Wave 34 formula +(`0.15*freshness`, `0.05*cooccurrence`). Actual Wave 59.5 weights in +`knowledge_constants.py:33-41`: freshness=0.10, cooccurrence=0.04, +graph_proximity=0.06. + +--- + +## Side Task S3: MemoryEntryMerged Qdrant Source Cleanup (Merge First) + +**Correctness bug. Merge independently before tracks start.** + +In `src/formicos/surface/runtime.py:535-536`, the `MemoryEntryMerged` +handler only syncs `target_id`: + +```python +elif etype == "MemoryEntryMerged": + sync_id = str(getattr(event_with_seq, "target_id", "")) +``` + +The source entry is marked `rejected` by the projection handler +(`projections.py:1829`) but `sync_entry()` is never called for it. Since +`sync_entry()` (`memory_store.py:89-91`) correctly calls +`vector_port.delete()` for rejected entries, the fix is to sync both IDs: + +```python +elif etype == "MemoryEntryMerged": + for _attr in ("target_id", "source_id"): + _eid = str(getattr(event_with_seq, _attr, "")) + if _eid: + await self.memory_store.sync_entry( + _eid, self.projections.memory_entries, + ) + continue +``` + +**File:** `src/formicos/surface/runtime.py` -- lines 535-542 only. + +**Test:** 1 new -- merge event triggers delete for source entry vector. + +**Do not touch:** Any other file. + +--- + +## Track 1: Addons Tab + Health Monitoring + Manual Trigger UI + +### Problem + +Addons are invisible. The operator cannot see what addons are installed, +whether they're healthy, when their tools were last used, or fire manual +triggers without asking the Queen. The 6-tab nav (Queen, Knowledge, +Workspace, Playbook, Models, Settings) has no addon surface. + +### Fix + +**1. Add `addons` field to OperatorStateSnapshot.** + +After operator approval, add to both contract files and the frontend +mirror: + +```typescript +// docs/contracts/types.ts:436-447 and frontend/src/types.ts:650-661 +export interface OperatorStateSnapshot { + // ... existing 10 fields ... + addons: AddonSummary[]; +} + +export interface AddonSummary { + name: string; + version: string; + description: string; + tools: { name: string; description: string; callCount: number }[]; + handlers: { event: string; lastFired: string | null; errorCount: number }[]; + triggers: { type: string; schedule: string; lastFired: string | null }[]; + status: 'healthy' | 'degraded' | 'error'; + lastError: string | null; +} +``` + +**2. Extend `build_snapshot()` to include addon data.** + +Add `addon_registrations` parameter to `build_snapshot()` +(`view_state.py:21-29`). Build `AddonSummary` entries from the +registration objects. Health status derived from handler error counts +(0 = healthy, 1-2 = degraded, 3+ = error). + +**3. Pass addon registrations from `ws_handler.py`.** + +`send_state()` (`ws_handler.py:332-349`) must pass addon registrations +to `build_snapshot()`. Store registrations on `WebSocketManager` the +same way `_projections` and `_settings` are stored. + +**4. Add `AddonHealthSnapshot` tracking to `AddonRegistration`.** + +Extend `AddonRegistration` (`addon_loader.py:142-150`) with runtime +counters: `tool_call_counts: dict[str, int]`, `last_tool_call: str | None`, +`handler_error_count: int`, `last_handler_fire: str | None`, +`last_error: str | None`. Increment counters in the tool/event wrapper +closures. + +**5. REST endpoints.** + +Add to `routes/api.py` (after line 1391): + +- `GET /api/v1/addons` -- list installed addons with health summary. + Data from `request.app.state.addon_registrations`. +- `POST /api/v1/addons/{name}/trigger` -- resolve handler via + `_resolve_handler()`, execute with `runtime_context`, return result. + Not just `fire_manual()` descriptor -- actual execution. Pattern from + `app.py:808-828` cron loop. + +**6. New `fc-addons-view.ts` component.** + +Two-column layout: left sidebar lists addons with status dots +(green/amber/red), right panel shows selected addon detail. Detail +sections: description, version, tools table (name, description, call +count), handlers table (event, last fired, errors), triggers table +(type, schedule, last fired, "Trigger Now" button for manual type). + +**7. Add Addons tab to nav.** + +In `formicos-app.ts:26-35`: +- Add `'addons'` to `ViewId` union +- Add `{ id: 'addons', label: 'Addons', icon: '\u2699' }` to NAV array + (position 4, after Workspace) +- Update `grid-template-columns` at line 60 from `repeat(6, ...)` to + `repeat(7, ...)` +- Add `'addons': () => this._renderAddons()` to `_viewRegistry` + (line 512-520) +- Add `_renderAddons()` method that renders `` +- Add `import './addons-view.js'` to imports + +**8. Update store.** + +In `store.ts:57-71`: add `addons: AddonSummary[]` to `StoreState`. In +`applySnapshot()` (line 126-143): map `snap.addons` to state. In +`emptyState()` (line 75): default `addons: []`. + +### Files + +- `docs/contracts/types.ts` -- add AddonSummary + snapshot field (~15 lines) +- `frontend/src/types.ts` -- mirror contract change (~15 lines) +- `frontend/src/components/addons-view.ts` -- **new** (~250 lines) +- `frontend/src/components/formicos-app.ts` -- nav + view registry (~15 lines) +- `frontend/src/state/store.ts` -- state + snapshot mapping (~10 lines) +- `src/formicos/surface/view_state.py` -- build_snapshot addon field (~30 lines) +- `src/formicos/surface/ws_handler.py` -- pass registrations (~5 lines) +- `src/formicos/surface/addon_loader.py` -- AddonRegistration counters (~20 lines) +- `src/formicos/surface/routes/api.py` -- 2 endpoints (~50 lines) +- `src/formicos/surface/app.py` -- add `app.state.addon_registrations` + after line 780 (currently only manifests are stored, not registrations) + (~3 lines) + +### Tests + +4 new: +- Addon health snapshot includes installed addons +- GET /api/v1/addons returns addon list with health +- POST /api/v1/addons/{name}/trigger executes handler +- Tool wrapper increments call count on AddonRegistration + +### Acceptance Gates + +- Addons tab visible in nav with 7 columns +- Clicking an addon shows detail with tools, handlers, triggers +- "Trigger Now" button fires manual trigger and shows result +- WebSocket snapshot includes addon data on connect +- All existing tests pass + +### Owner + +Team 1. Merge first among tracks (unblocks T2). + +### Do Not Touch + +`queen_tools.py`, `queen_runtime.py`, `knowledge_catalog.py`, any +`core/` or `engine/` files, addon manifest YAML files. + +--- + +## Track 2: Addon Config Surface + +### Problem + +Addons have no configurable parameters. The operator cannot toggle +git_auto_stage, change the reindex schedule, or disable specific +proactive rules without editing YAML files. `AddonManifest` has no +`config` field. + +### Fix + +**1. Add `AddonConfigParam` model and `config` field to manifest.** + +In `addon_loader.py` (after `AddonTriggerSpec`, line 50): + +```python +class AddonConfigParam(BaseModel): + """A configurable parameter declared by an addon.""" + key: str + type: Literal["boolean", "string", "integer", "cron", "select"] = "string" + default: Any = None + label: str = "" + options: list[str] = Field(default_factory=list) # for select type +``` + +Add `config: list[AddonConfigParam] = Field(default_factory=list)` to +`AddonManifest` (line 64). + +**2. REST endpoints for config.** + +Add to `routes/api.py`: + +- `GET /api/v1/addons/{name}/config` -- returns config schema from + manifest + current values from workspace config. Current values stored + under `addon.{name}.{key}` dimension in `WorkspaceConfigChanged` events. + Falls back to manifest defaults. +- `PUT /api/v1/addons/{name}/config` -- accepts `{key: value}` dict, + emits `WorkspaceConfigChanged` event for each key using + `field="addon.{name}.{key}"`, `new_value=value`. Existing pattern from + `routes/api.py` config-overrides endpoint. + +**3. Config form in addons-view.ts.** + +Add a `_renderConfigPanel()` method to `addons-view.ts` (Team 1's +component). Renders controls based on config schema: toggle for boolean, +text input for string/cron, number input for integer, dropdown for select. +Save button POSTs to `/api/v1/addons/{name}/config`. + +**4. Update addon manifests with config declarations.** + +`addons/git-control/addon.yaml`: +```yaml +config: + - key: git_auto_stage + type: boolean + default: true + label: "Auto-stage modified files after colony completion" +``` + +`addons/codebase-index/addon.yaml`: +```yaml +config: + - key: chunk_size + type: integer + default: 500 + label: "Chunk size (characters) for code splitting" + - key: skip_dirs + type: string + default: "__pycache__,node_modules,.git,.venv" + label: "Directories to skip (comma-separated)" +``` + +`addons/proactive-intelligence/addon.yaml`: +```yaml +config: + - key: disabled_rules + type: string + default: "" + label: "Rules to disable (comma-separated names)" +``` + +### Files + +- `src/formicos/surface/addon_loader.py` -- AddonConfigParam + field (~15 lines) +- `src/formicos/surface/routes/api.py` -- 2 config endpoints (~40 lines) +- `frontend/src/components/addons-view.ts` -- config form section (~60 lines) +- `addons/git-control/addon.yaml` -- config block (~5 lines) +- `addons/codebase-index/addon.yaml` -- config block (~8 lines) +- `addons/proactive-intelligence/addon.yaml` -- config block (~5 lines) + +### Overlap Rule + +Team 2 adds a `_renderConfigPanel(addon: AddonSummary)` method to +`addons-view.ts`. Team 1 owns the component structure and calls +`this._renderConfigPanel(this._selectedAddon)` from the detail panel. +Team 2 must reread the component after Team 1 merges to confirm the +integration point. If T1 changes the selected addon property name or +detail panel structure, Team 2 adapts `_renderConfigPanel` to match -- +the method signature (receives the selected addon object, returns a +`TemplateResult`) is the stable contract, not the call site. + +### Tests + +3 new: +- GET /api/v1/addons/{name}/config returns schema with defaults +- PUT /api/v1/addons/{name}/config emits WorkspaceConfigChanged +- AddonManifest parses config field from YAML + +### Acceptance Gates + +- Config panel renders controls matching manifest schema +- Changing a config value persists via WorkspaceConfigChanged event +- Config values survive replay (event-sourced, no shadow state) +- Addon handlers can read config from `runtime_context["settings"]` + +### Owner + +Team 2. Depends on T1 for component structure. Merge after T1. + +### Do Not Touch + +`queen_tools.py`, `queen_runtime.py`, `core/events.py`, any `core/` or +`engine/` files. Do not add new event types. + +--- + +## Track 3: Addon Panels + Routes + +### Problem + +The manifest declares `panels` and `routes` fields but +`addon_loader.py:267-278` explicitly warns they are unimplemented. Addons +cannot contribute visible UI to existing tabs or register HTTP endpoints. +The codebase-index addon has no way to show "last indexed: 3h ago, 1,247 +chunks" in the Knowledge tab. The git-control addon has no way to show +branch status in the Workspace tab. + +### Fix + +**1. Wire `routes` field in addon_loader.py.** + +In `register_addon()` (`addon_loader.py:152-280`), after the existing +handler registration block: iterate `manifest.routes`, resolve each +handler via `_resolve_handler()`, store resolved routes on +`AddonRegistration` as `registered_routes: list[dict]`. Each dict: +`{"path": str, "handler": Callable, "addon_name": str}`. + +**2. Mount addon routes in app.py.** + +In `app.py` (after line 784, before routes construction at line 952): +iterate `_addon_registrations`, for each registered route create a +Starlette `Route` at `/addons/{addon_name}{path}`. Handler wraps the +resolved function in a Starlette request handler that extracts query +params and calls the addon handler with `runtime_context`. + +**3. Wire `panels` field.** + +Store panel declarations on `AddonRegistration` as +`registered_panels: list[dict]`. Each dict from manifest: +`{"target": str, "display_type": str, "path": str, "handler": str}`. +Include panel data in the `AddonSummary` sent via WebSocket snapshot +(coordinate with T1's snapshot field). + +**4. New `fc-addon-panel.ts` component.** + +Generic panel renderer. Receives a `src` URL (addon route endpoint) and +`display-type` attribute. On connect, fetches JSON from `src`. Renders +based on `display_type`: + +- `status_card`: key-value grid. Each item: `{label: str, value: str}`. + Rendered as a compact card with label/value pairs. +- `table`: `{columns: str[], rows: any[][]}`. Rendered as a data table. +- `log`: `{entries: {ts: str, message: str}[]}`. Rendered as timestamped + list. + +Auto-refreshes every 60 seconds. + +**5. Panel injection into existing tabs.** + +`knowledge-browser.ts`: at the top of the render method (after the title +row, line 18-19 area), add a panel injection zone. For each addon panel +with `target: "knowledge"`, render +``. +Panel data comes from `store.state.addons`. + +`workspace-browser.ts`: same injection zone at the top of the component. +Panels with `target: "workspace"` render here. + +**6. Addon status endpoints.** + +`src/formicos/addons/codebase_index/status.py` -- **new file**: +```python +async def get_status( + inputs: dict, workspace_id: str, thread_id: str, + *, runtime_context: dict | None = None, +) -> dict: + """Return index status as status_card data.""" + # Query vector_port for collection stats + # Return {display_type: "status_card", items: [...]} +``` + +`src/formicos/addons/git_control/status.py` -- **new file**: +```python +async def get_status( + inputs: dict, workspace_id: str, thread_id: str, + *, runtime_context: dict | None = None, +) -> dict: + """Return git workspace status as status_card data.""" + # Run git status, git branch --show-current + # Return {display_type: "status_card", items: [...]} +``` + +**7. Update addon manifests.** + +`addons/codebase-index/addon.yaml`: +```yaml +panels: + - target: knowledge + display_type: status_card + path: /status + handler: status.py::get_status +routes: + - path: /status + handler: status.py::get_status +``` + +`addons/git-control/addon.yaml`: +```yaml +panels: + - target: workspace + display_type: status_card + path: /status + handler: status.py::get_status +routes: + - path: /status + handler: status.py::get_status +``` + +### Files + +- `frontend/src/components/addon-panel.ts` -- **new** (~100 lines) +- `frontend/src/components/knowledge-browser.ts` -- panel injection zone (~10 lines) +- `frontend/src/components/workspace-browser.ts` -- panel injection zone (~10 lines) +- `src/formicos/surface/addon_loader.py` -- route + panel wiring (~30 lines) +- `src/formicos/surface/app.py` -- route mounting (~20 lines) +- `src/formicos/addons/codebase_index/status.py` -- **new** (~30 lines) +- `src/formicos/addons/git_control/status.py` -- **new** (~30 lines) +- `addons/codebase-index/addon.yaml` -- panels + routes block (~8 lines) +- `addons/git-control/addon.yaml` -- panels + routes block (~8 lines) + +### Tests + +4 new: +- Route mounting resolves addon handler and returns 200 +- Panel endpoint returns valid status_card JSON +- fc-addon-panel renders status_card items +- Addon registration includes resolved routes + +### Acceptance Gates + +- `GET /addons/codebase-index/status` returns index stats +- `GET /addons/git-control/status` returns branch/staged info +- Knowledge browser shows codebase-index status card at top +- Workspace browser shows git-control status card at top +- Panels auto-refresh without page reload + +### Owner + +Team 3. Independent of T1 (panels inject into existing tabs, not the +Addons tab). Can merge before or after T2. + +### Do Not Touch + +`queen_tools.py`, `queen_runtime.py`, `formicos-app.ts` (Team 1 owns +nav), `store.ts` (Team 1 owns snapshot mapping), any `core/` or +`engine/` files. + +--- + +## Side Task S2: Knowledge ROI Rule Fix + +`_rule_knowledge_roi()` in `src/formicos/addons/proactive_intelligence/rules.py:691-727` +only checks `entries_extracted == 0` for successful colonies. The +`entries_accessed` field is computed on `ColonyOutcome` +(`projections.py:1057-1065`) but unused. Add a secondary insight: when +successful colonies access knowledge entries and produce good outcomes +(quality_score > 0.7), note the correlation. When colonies access zero +entries and have low quality, flag the pattern. ~15 lines added to the +existing function. + +**File:** `src/formicos/addons/proactive_intelligence/rules.py` only. + +**Test:** 1 new -- ROI rule fires when entries_accessed is zero across +multiple colonies. + +**Owner:** Team 2 (touching proactive-intelligence addon config anyway). + +--- + +## Side Task S4: CLAUDE.md Composite Weight Update + +Update the retrieval formula in `CLAUDE.md` from the stale Wave 34 values: + +``` +# Old (stale): +0.38*semantic + 0.25*thompson + 0.15*freshness + 0.10*status + 0.07*thread + 0.05*cooccurrence + +# New (Wave 59.5 actuals from knowledge_constants.py:33-41): +0.38*semantic + 0.25*thompson + 0.10*freshness + 0.10*status + 0.07*thread + 0.04*cooccurrence + 0.06*graph_proximity +``` + +**File:** `CLAUDE.md` only. + +**Owner:** Whoever finishes first. + +--- + +## Team Assignment + +| Team | Tracks | Rationale | +|------|--------|-----------| +| Team 1 (Addons Tab) | T1, S3 | Addon tab is the anchor. S3 is a standalone bugfix merged first. | +| Team 2 (Config) | T2, S2, S4 | Config surface + proactive-intelligence config overlap. S2 and S4 are small. | +| Team 3 (Panels) | T3 | Panels + routes + status endpoints. Independent of T1. | + +## Merge Order + +``` +S3 (merge Qdrant fix) -- standalone bugfix, merge immediately + | +T1 (addons tab + health) -- anchor, merge first among tracks + | + +---> T2 (config surface) -- depends on T1's component structure + | +T3 (panels + routes) -- independent, merge any time + | +S2, S4 -- independent, merge whenever +``` + +T3 is independent of T1 because panels inject into Knowledge and +Workspace tabs, not the Addons tab. The Addons tab showing panels inline +is additive after both T1 and T3 merge. + +## What Wave 66 Does NOT Do + +- No tab consolidation (Playbook and Models stay as-is) +- No knowledge hierarchy (parent_id, tree view) -- Wave 67 +- No provenance chains -- Wave 67 +- No graph proximity activation in standard retrieval -- Wave 67 (needs + seed selection design for non-thread path) +- No new event types (stays at 69) +- No hot-reload for addons (restart required) +- No addon dependency graphs +- No addon marketplace or discovery +- No custom Lit components per addon (generic panel renderer only) +- No session continuity or doc ingestion +- No RL/self-evolution + +## Acceptance Criteria + +- Addons tab in nav shows installed addons with health status +- Addon detail view shows tools, handlers, triggers, config, and panels +- Config changes persist via WorkspaceConfigChanged events (replay-safe) +- Addon panels render in Knowledge and Workspace tabs +- Addon REST routes mountable from manifest declarations +- MemoryEntryMerged correctly cleans up source vectors from Qdrant +- CLAUDE.md composite weights match Wave 59.5 actuals +- Knowledge ROI rule uses entries_accessed signal +- 3650+ tests passing +- CI: ruff clean, pyright clean, imports clean + +## Estimated Scope + +~400 lines new frontend (addons-view.ts ~250, addon-panel.ts ~100, +store/types ~50). ~150 lines new backend (health monitoring, config +endpoints, route mounting, panel wiring). ~60 lines addon implementations +(status endpoints). ~50 lines side task fixes (S2, S3, S4). ~30 lines +manifest updates. 13 new tests. diff --git a/docs/waves/wave_67/team_a_hierarchy.md b/docs/waves/wave_67/team_a_hierarchy.md new file mode 100644 index 0000000..34ceeb8 --- /dev/null +++ b/docs/waves/wave_67/team_a_hierarchy.md @@ -0,0 +1,318 @@ +# Wave 67.0 — Team A: Knowledge Hierarchy with Materialized Paths + +**Track:** 1 +**Mission:** Give knowledge entries structural organization via hierarchy +paths on projections. Ship a tree view in the knowledge browser and a +REST endpoint for the hierarchy. No new event types. No core model changes. + +--- + +## Coordination Context + +- `CLAUDE.md` defines the evergreen repo rules (4-layer architecture, + 69-event closed union, Pydantic v2, Beta posteriors). +- This prompt is the authority for Team A's scope. If `AGENTS.md` conflicts + with this prompt, this prompt wins for this dispatch. +- Team B works in parallel on Tracks 2+3 (memory_extractor.py, + colony_manager.py, scoring_math.py). No file overlap. +- **Merge order:** Team A merges first. Team B rebases on Team A's landing. + +--- + +## ADR Reference + +Read `docs/decisions/049-knowledge-hierarchy.md` before writing code. Key +decisions: + +- Materialized path, not closure table (250x write amplification rejected) +- `hierarchy_path` and `parent_id` are projection-level fields only — NOT + on `core/types.py` MemoryEntry model +- Topic nodes are synthetic projection entries (`entry_type="topic"`), not + event-sourced +- ESS cap at 150 for branch confidence aggregation +- Bootstrap is LLM-only, zero new dependencies + +--- + +## Owned Files + +| File | Action | Est. Lines | +|------|--------|------------| +| `src/formicos/surface/projections.py` | Add hierarchy_path/parent_id in `_on_memory_entry_created` handler (line 1584) | ~8 | +| `src/formicos/surface/memory_store.py` | Add hierarchy_path to Qdrant payload metadata (lines 57–79) | ~3 | +| `src/formicos/surface/hierarchy.py` | **New file** — branch confidence aggregation (`compute_branch_confidence`) | ~40 | +| `src/formicos/surface/routes/api.py` | Add `GET /api/v1/workspaces/{id}/knowledge-tree` endpoint | ~45 | +| `frontend/src/components/knowledge-browser.ts` | Add tree subview, branch rendering, path filter | ~120 | +| `scripts/bootstrap_hierarchy.py` | **New file** — offline LLM-only hierarchy bootstrap | ~100 | +| `tests/unit/surface/test_hierarchy.py` | **New file** — hierarchy tests | ~80 | + +--- + +## Do Not Touch + +- `core/types.py` — No new MemoryEntry fields. Hierarchy is projection-only. +- `core/events.py` — No new events. The 69-event union is closed. +- `queen_runtime.py` — Queen orchestration, not in scope. +- `queen_tools.py` — Queen tools, not in scope. +- `knowledge_catalog.py` — Team B owns retrieval (Wave 67.5). +- `colony_manager.py` — Team B owns outcome confidence path. +- `memory_extractor.py` — Team B owns domain normalization. +- `scoring_math.py` — Team B owns ESS helper. + +--- + +## Implementation Steps + +### Step 1: hierarchy_path on projections + +In `projections.py`, `_on_memory_entry_created()` handler (line 1584). +Before `store.memory_entries[entry_id] = data` (line 1595), after the +scope default (line 1594), add hierarchy path computation: + +```python +# Wave 67: hierarchy path from primary domain +domains = data.get("domains", []) +primary_domain = domains[0] if domains else "uncategorized" +# Normalize (same logic as memory_extractor._normalize_domain, line 31-33) +import re +normalized = re.sub(r"[\s\-]+", "_", primary_domain.strip()).lower() +data["hierarchy_path"] = f"/{normalized}/" +data["parent_id"] = "" +``` + +**Why inline normalization instead of importing?** `_normalize_domain` is in +`memory_extractor.py` (line 31–33). Both files are Surface layer, so the +import is legal. But the function is 3 lines — inlining avoids a dependency +on an unrelated module. Either approach is acceptable. + +**Note:** `parent_id` is scaffolding for future topic-level nesting. Nothing +reads it yet — it exists so the field is present from day one when topic +assignment is added later. + +### Step 2: Qdrant payload field + +In `memory_store.py`, where metadata is assembled for `VectorDocument` +(lines 57–79). Add `hierarchy_path` to the metadata dict: + +```python +"hierarchy_path": entry.get("hierarchy_path", "/"), +``` + +This goes alongside the existing metadata fields (domains, status, decay_class, +etc.). Qdrant automatically indexes string payload fields as keyword indexes, +enabling filtered search via `must: [{key: "hierarchy_path", match: {value: "/engineering/"}}]`. + +### Step 3: Branch confidence aggregation + +Create `src/formicos/surface/hierarchy.py`: + +```python +"""Knowledge hierarchy utilities — branch confidence aggregation.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from formicos.surface.projections import ProjectionStore + + +def compute_branch_confidence( + store: "ProjectionStore", + path_prefix: str, +) -> dict[str, Any]: + """Aggregate Beta confidence for entries under a hierarchy branch. + + Returns {"alpha": float, "beta": float, "count": int, "mean": float}. + Sums children's evidence (subtracting the Beta(5,5) prior from each), + re-adds a single prior, and caps effective sample size at 150. + + ESS 150 is mathematically equivalent to exponential decay with + gamma ≈ 0.993. Balances stability with responsiveness per production + Thompson Sampling literature. + """ + total_alpha = 0.0 + total_beta = 0.0 + count = 0 + for entry in store.memory_entries.values(): + if entry.get("entry_type") == "topic": + continue # don't count synthetic nodes + hp = entry.get("hierarchy_path", "/") + if hp.startswith(path_prefix): + total_alpha += entry.get("conf_alpha", 5.0) - 5.0 + total_beta += entry.get("conf_beta", 5.0) - 5.0 + count += 1 + agg_alpha = 5.0 + total_alpha + agg_beta = 5.0 + total_beta + ess = agg_alpha + agg_beta + if ess > 150: + scale = 150.0 / ess + agg_alpha *= scale + agg_beta *= scale + mean = agg_alpha / (agg_alpha + agg_beta) if (agg_alpha + agg_beta) > 0 else 0.5 + return {"alpha": agg_alpha, "beta": agg_beta, "count": count, "mean": mean} +``` + +This is called on-demand by the API, not stored in projection state. Pure +computation over existing data. The `ProjectionStore` type hint is +forward-referenced to avoid circular imports. + +### Step 4: REST endpoint + +In `routes/api.py`, add: + +``` +GET /api/v1/workspaces/{id}/knowledge-tree +``` + +Build a tree from all `memory_entries` for the workspace by grouping on +`hierarchy_path` segments. For each branch node, call +`compute_branch_confidence()` to get aggregated posteriors. + +Response shape: + +```json +{ + "branches": [ + { + "path": "/engineering/", + "label": "engineering", + "entryCount": 42, + "confidence": {"alpha": 28.3, "beta": 12.1, "mean": 0.70}, + "children": [ + { + "path": "/engineering/auth/", + "label": "auth", + "entryCount": 12, + "confidence": {"alpha": 15.2, "beta": 4.8, "mean": 0.76}, + "children": [] + } + ] + } + ] +} +``` + +Place this near the existing knowledge endpoints. Use `_err_response()` for +errors (follow the existing api.py patterns — do NOT return raw JSONResponse +for errors). + +### Step 5: Knowledge browser tree view + +In `knowledge-browser.ts` (1,225 lines): + +**5a. Extend SubView type** (line 12): +```typescript +// Current: type SubView = 'catalog' | 'graph'; +// New: +type SubView = 'catalog' | 'graph' | 'tree'; +``` + +**5b. Add tree toggle button** alongside existing catalog/graph buttons. + +**5c. Add `_renderTreeView()` method:** +- Fetch from `GET /api/v1/workspaces/{wsId}/knowledge-tree` +- Render collapsible tree with branch name, entry count, and confidence bar +- Each branch is clickable → filters catalog to that hierarchy path prefix +- Use existing `_renderConfidenceBar()` patterns for branch confidence display + +**5d. Existing `_renderScoreBar` (line 885)** is NOT moved in this track. +Score bar visibility changes are Track 5 (Wave 67.5). + +### Step 6: Bootstrap script + +Create `scripts/bootstrap_hierarchy.py`: + +- **Offline tool**, not imported by the runtime +- Reads entries from REST API (`GET /api/v1/workspaces/{id}/knowledge`) +- Groups entries by existing domain tag (~20 entries per batch) +- For each domain batch, calls the LLM to identify 2–5 topic sub-clusters +- Assigns `hierarchy_path` values (e.g., `/python/testing/`, `/python/async/`) +- Persists by writing updated `hierarchy_path` values back via a PATCH or + PUT endpoint on entries, or by replaying with a modified extraction + prompt that includes hierarchy hints. The simplest approach: the script + directly updates the projection dict via the REST API and the updated + paths are picked up on next Qdrant sync. Since this is a one-time offline + tool, exact persistence mechanism is left to the implementer — just + ensure the result is visible in `GET /api/v1/workspaces/{id}/knowledge-tree` +- ~15 LLM calls for 300 entries across 15 domains +- Zero new dependencies + +This script is a one-time bootstrap. Going forward, extraction-time domain +suggestion (Team B, Track 2) keeps new entries aligned organically. + +--- + +## Tests + +Write in `tests/unit/surface/test_hierarchy.py`: + +1. **`test_memory_entry_created_sets_hierarchy_path`** — + Process a `MemoryEntryCreated` event with `domains=["Python Testing"]`. + Verify the projection entry has `hierarchy_path="/python_testing/"` and + `parent_id=""`. + +2. **`test_memory_entry_created_no_domains_gets_uncategorized`** — + Process a `MemoryEntryCreated` event with `domains=[]`. Verify + `hierarchy_path="/uncategorized/"`. + +3. **`test_qdrant_payload_includes_hierarchy_path`** — + Verify that `sync_entry()` includes `hierarchy_path` in the + `VectorDocument.metadata` dict. + +4. **`test_branch_confidence_aggregation`** — + Create 3 entries under `/engineering/` with known alpha/beta values. + Call `compute_branch_confidence(store, "/engineering/")`. Verify the + aggregated alpha/beta are correct and the mean is right. + +5. **`test_branch_confidence_ess_cap`** — + Create entries whose combined ESS exceeds 150. Verify the result is + capped at 150 while preserving the mean ratio. + +6. **(Optional) `test_knowledge_tree_endpoint`** — + If time allows, test the REST endpoint returns a valid tree structure. + +--- + +## Acceptance Gates + +All must pass before declaring done: + +- [ ] Entry projections include `hierarchy_path` derived from primary domain +- [ ] `hierarchy_path` uses normalized domain (lowercase, underscores) +- [ ] Entries with no domains get `hierarchy_path="/uncategorized/"` +- [ ] `parent_id` is set to empty string (flat start) +- [ ] Qdrant payload includes `hierarchy_path` field +- [ ] `compute_branch_confidence()` aggregates children correctly +- [ ] ESS cap at 150 preserves mean ratio +- [ ] `GET /api/v1/workspaces/{id}/knowledge-tree` returns valid tree +- [ ] Knowledge browser shows catalog/graph/tree toggle +- [ ] Tree view shows collapsible branches with entry counts +- [ ] Hierarchy paths survive replay (derived from existing event data) +- [ ] Bootstrap script exists at `scripts/bootstrap_hierarchy.py` +- [ ] No new event types (stays at 69) +- [ ] No changes to `core/types.py` MemoryEntry model + +--- + +## Validation + +Run the full CI suite before declaring done: + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +All must pass clean. Target: 3654 + 5 = 3659+ tests. + +--- + +## Overlap Reread Rules + +After completing your work, reread: + +- `src/formicos/surface/projections.py` lines 1580–1610 (your changes) +- `src/formicos/surface/memory_store.py` lines 55–85 (your payload change) +- `src/formicos/surface/routes/api.py` (your new endpoint) + +Verify your changes don't break existing projection replay or Qdrant sync. diff --git a/docs/waves/wave_67/team_a_provenance.md b/docs/waves/wave_67/team_a_provenance.md new file mode 100644 index 0000000..d9e26fa --- /dev/null +++ b/docs/waves/wave_67/team_a_provenance.md @@ -0,0 +1,316 @@ +# Wave 67.5 - Team A: Provenance Chain and Browser Detail Surface + +**Wave:** 67.5 (surfaces) +**Track:** 5 - Provenance Chain on Projections +**Prerequisite:** Wave 67.0 landed and stable +**Dispatch note:** This track is a single vertical slice. Do not split the +backend and frontend work across separate coders unless staffing changes force it. + +--- + +## Mission + +The knowledge browser can show where an entry came from, but it still cannot +show how that entry evolved over time. Operators cannot answer: + +- when confidence changed +- whether the entry was merged or refined +- which operator actions touched it + +Your job: add an append-only `provenance_chain` to projection entries, expose +it through a dedicated knowledge API endpoint, and surface it in the browser +detail view. While you are in the browser, make the existing score bar visible +in the default list item body instead of hiding it inside the confidence hover. + +--- + +## Contract Blocker + +Operator approval is required before changing: + +- `docs/contracts/types.ts` +- `frontend/src/types.ts` + +Use snake_case to match the existing knowledge API payload style: + +```typescript +export interface ProvenanceChainItem { + event_type: string; + timestamp: string; + actor_id: string; + detail: string; + confidence_delta: number | null; +} +``` + +If you add a typed response interface for the new endpoint, mirror that in +both contract files in the same patch. + +--- + +## Owned Files + +| File | Change | +|------|--------| +| `src/formicos/surface/projections.py` | Append provenance items from the relevant event handlers | +| `src/formicos/surface/routes/knowledge_api.py` | Add `GET /api/v1/knowledge/{item_id}/provenance` | +| `frontend/src/components/knowledge-browser.ts` | Fetch and render provenance timeline; move score bar into default card body | +| `frontend/src/types.ts` | Add `ProvenanceChainItem` mirror | +| `docs/contracts/types.ts` | Add `ProvenanceChainItem` contract mirror | +| `tests/unit/surface/test_provenance_chain.py` | New backend tests | + +--- + +## Do Not Touch + +- `core/events.py`, `core/types.py` - no new events and no model changes +- `knowledge_catalog.py` - Team B owns retrieval changes +- `memory_extractor.py`, `colony_manager.py`, `memory_store.py` - landed in 67.0 +- Any addon files - Team C owns the docs indexer + +--- + +## Repo Truth You Must Read First + +### `src/formicos/surface/projections.py` + +Relevant handlers already exist: + +- `_on_memory_entry_created()` +- `_on_memory_confidence_updated()` +- `_on_memory_entry_merged()` +- `_on_memory_entry_refined()` +- `_on_knowledge_entry_operator_action()` +- `_on_knowledge_entry_annotated()` + +Important repo-truth detail: + +- `MemoryEntryMerged` should update both the target and source entry chains +- if you only annotate the surviving target, the absorbed source entry loses + part of its lifecycle + +### `src/formicos/core/events.py` + +Read the event payloads before writing details: + +- `MemoryConfidenceUpdated` +- `MemoryEntryMerged` +- `MemoryEntryRefined` +- `KnowledgeEntryOperatorAction` +- `KnowledgeEntryAnnotated` + +### `frontend/src/components/knowledge-browser.ts` + +Repo truth today: + +- `_toggleDetail()` already fetches entry detail and relationships +- `_renderScoreBar()` already exists +- the score bar currently looks for `score_breakdown`, but search payloads may + still expose `_score_breakdown` +- the bar is rendered inside the confidence hover detail, not in the main card body + +Your UI work should stay compatible with both `score_breakdown` and +`_score_breakdown`. + +--- + +## Implementation Steps + +### Step 1: Add a small helper in `projections.py` + +Avoid repeating the same list-append logic in six handlers. Add a local helper +near the knowledge-entry handlers, for example: + +```python +def _append_provenance_item( + entry: dict[str, Any], + *, + event_type: str, + timestamp: str, + actor_id: str, + detail: str, + confidence_delta: float | None = None, +) -> None: + chain = entry.setdefault("provenance_chain", []) + chain.append({ + "event_type": event_type, + "timestamp": timestamp, + "actor_id": actor_id, + "detail": detail, + "confidence_delta": confidence_delta, + }) +``` + +Keep it append-only. Do not create separate shadow state. + +### Step 2: Instrument the relevant handlers + +Append a provenance item from these handlers: + +1. `MemoryEntryCreated` +2. `MemoryConfidenceUpdated` +3. `MemoryEntryMerged` +4. `MemoryEntryRefined` +5. `KnowledgeEntryOperatorAction` +6. `KnowledgeEntryAnnotated` + +Suggested details: + +- `MemoryEntryCreated`: `"Created by colony "` +- `MemoryConfidenceUpdated`: `"Confidence updated ()"` +- `MemoryEntryMerged` target: `"Merged entry into this entry"` +- `MemoryEntryMerged` source: `"Merged into entry "` +- `MemoryEntryRefined`: `"Refined via "` +- `KnowledgeEntryOperatorAction`: `"Operator action: "` +- `KnowledgeEntryAnnotated`: `"Annotation added"` plus tag when present + +Use `actor_id` consistently: + +- source colony id for colony-driven events when available +- operator `actor` for operator events +- empty string for maintenance/system events with no actor id + +For `confidence_delta`, use a single numeric delta in posterior mean: + +```python +old_mean = e.old_alpha / (e.old_alpha + e.old_beta) +new_mean = e.new_confidence +confidence_delta = round(new_mean - old_mean, 4) +``` + +Do not try to encode both alpha and beta deltas into the typed field. Put any +extra alpha/beta context into the human-readable `detail` string if useful. + +### Step 3: Add the provenance endpoint + +In `routes/knowledge_api.py`, add: + +```text +GET /api/v1/knowledge/{item_id}/provenance +``` + +Return shape: + +```json +{ + "entry_id": "mem-123", + "chain": [...], + "total": 6 +} +``` + +Guidance: + +- use the existing `_err_response()` helper +- return `KNOWLEDGE_ITEM_NOT_FOUND` when the entry is absent +- keep the endpoint read-only and projection-backed + +### Step 4: Mirror the frontend contract + +Add `ProvenanceChainItem` to: + +- `docs/contracts/types.ts` +- `frontend/src/types.ts` + +If you add a typed endpoint response, mirror that too. + +### Step 5: Render the browser timeline + +In `knowledge-browser.ts`: + +1. add provenance cache state, for example: + +```typescript +@state() private _provCache: Record = {}; +``` + +2. add `_fetchProvenance(entryId)` alongside `_fetchRelationships()` +3. call it from `_toggleDetail()` when an entry expands +4. render a timeline block in the expanded detail area + +Suggested rendering: + +- `timeAgo(item.timestamp)` +- event label from `event_type` +- detail text +- optional confidence delta badge such as `+0.07` / `-0.03` + +Keep the UI compact. This is an audit trail, not a second full detail page. + +### Step 6: Make score bars visible by default + +Move the score bar into the main card body so it is visible without hovering. + +Update `_renderScoreBar()` to read either field: + +```typescript +const sb = + e.score_breakdown ?? + (e as Record)._score_breakdown as Record | undefined; +``` + +Do not add retrieval logic here. Team B owns the backend scoring changes. + +--- + +## Tests + +Create `tests/unit/surface/test_provenance_chain.py`. + +Required tests: + +1. `test_memory_entry_created_seeds_provenance_chain` +2. `test_memory_confidence_updated_appends_delta` +3. `test_memory_entry_merged_updates_target_and_source_chains` +4. `test_provenance_endpoint_returns_chain` + +Optional fifth test if time allows: + +5. `test_operator_annotation_appends_provenance_item` + +You do not need frontend unit tests unless there is already a nearby pattern, +but the browser change should be exercised manually by code review and local run. + +--- + +## Acceptance Gates + +1. Relevant events append to `provenance_chain` +2. `MemoryEntryMerged` annotates both target and source entries +3. Provenance survives replay because it is projection-derived from events +4. `GET /api/v1/knowledge/{item_id}/provenance` returns the chain +5. Browser detail view shows a provenance timeline +6. Score bar is visible in the default result card body +7. `_renderScoreBar()` works with either `score_breakdown` or `_score_breakdown` +8. No new events and no core model changes + +--- + +## Validation + +Run before declaring done: + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +--- + +## Merge Order + +This track is independent of Team C's addon work. + +Prefer Team B landing first because retrieval changes may improve the score-bar +payloads this browser work can display, but this track should not block on Team B. + +--- + +## Track Summary Template + +When done, report: + +1. Which handlers append provenance items +2. Whether `MemoryEntryMerged` updates both source and target chains +3. Which route file owns the provenance endpoint +4. Whether the browser now reads both `score_breakdown` and `_score_breakdown` +5. Any small audit fixes found within the owned files diff --git a/docs/waves/wave_67/team_b_feedback.md b/docs/waves/wave_67/team_b_feedback.md new file mode 100644 index 0000000..98481c3 --- /dev/null +++ b/docs/waves/wave_67/team_b_feedback.md @@ -0,0 +1,400 @@ +# Wave 67.0 — Team B: Domain Normalization + Outcome-Confidence Reinforcement + +**Tracks:** 2 (Domain Normalization), 3 (Rank Credit + ESS Cap) +**Mission:** Fix two knowledge feedback loop problems. (1) Domain tags drift +because extraction doesn't suggest existing domains. (2) Outcome confidence +gives equal credit to all retrieved entries regardless of rank, and alpha/beta +can grow unbounded. No new event types. No core model changes. + +--- + +## Coordination Context + +- `CLAUDE.md` defines the evergreen repo rules (4-layer architecture, + 69-event closed union, Pydantic v2, Beta posteriors). +- This prompt is the authority for Team B's scope. If `AGENTS.md` conflicts + with this prompt, this prompt wins for this dispatch. +- Team A works in parallel on Track 1 (projections.py hierarchy fields, + memory_store.py, hierarchy.py, routes/api.py, knowledge-browser.ts, + bootstrap script). No file overlap with Team B. +- **Merge order:** Team A merges first. Team B rebases on Team A's landing. + Team B's work is fully independent — rebase is for cleanliness, not + because of code dependencies. + +--- + +## ADR Reference + +- `docs/decisions/049-knowledge-hierarchy.md` — Context on hierarchy design. + Team B does not implement hierarchy, but the domain normalization in Track 2 + prevents orphan hierarchy branches from forming. +- No dedicated ADR for Track 2 or Track 3. These are implementation + refinements within the existing knowledge metabolism framework (ADR-039). + +--- + +## Owned Files + +| File | Action | Est. Lines | +|------|--------|------------| +| `src/formicos/surface/memory_extractor.py` | Inject existing domain suggestions into extraction prompt | ~15 | +| `src/formicos/surface/colony_manager.py` | Rank-based credit + ESS cap in `_hook_confidence_update` | ~25 | +| `src/formicos/engine/scoring_math.py` | New `rescale_preserving_mean` helper | ~12 | +| `tests/unit/surface/test_domain_normalization.py` | **New file** — domain suggestion tests | ~40 | +| `tests/unit/engine/test_scoring_math_ess.py` | **New file** — ESS cap tests | ~50 | + +--- + +## Do Not Touch + +- `core/types.py` — No new fields on any model. +- `core/events.py` — No new events. The 69-event union is closed. +- `projections.py` — Team A owns hierarchy additions. +- `memory_store.py` — Team A owns Qdrant payload changes. +- `knowledge_catalog.py` — Retrieval changes are Wave 67.5 scope. +- `queen_runtime.py` — Queen orchestration, not in scope. +- `queen_tools.py` — Queen tools, not in scope. +- `routes/api.py` — Team A owns new endpoints. +- `knowledge-browser.ts` — Team A owns tree view. + +--- + +## Track 2: Domain Normalization at Extraction Time + +### Problem + +Domain tags drift without constraint. The same concept gets multiple names: +"python_testing", "python_test_patterns", "testing_python". The existing +`_normalize_domain()` (memory_extractor.py:31–33) handles case/whitespace +but not semantic equivalence. With hierarchy in place (Track 1), drift +creates orphan branches that should be the same node. + +### Implementation + +#### Step 1: Inject domain suggestions into extraction prompt + +In `memory_extractor.py`, `build_extraction_prompt()` (line 88). + +The function signature (line 88–93) includes `existing_entries: list[dict[str, Any]] | None = None`. +These are entries retrieved from the knowledge catalog before extraction. + +After line 94 (the three-path branch for prompt construction), before the +prompt string is assembled, extract unique domain tags from existing entries +and inject them as guidance: + +```python +# Wave 67: domain normalization via existing entry suggestion +existing_domains: set[str] = set() +if existing_entries: + for e in existing_entries[:10]: + for d in e.get("domains", []): + existing_domains.add(d) + +# Add to prompt (in the domain field instruction section): +domain_hint = "" +if existing_domains: + sorted_domains = sorted(existing_domains)[:20] + domain_hint = ( + "\nUse one of these existing domain tags if applicable " + "(do not create synonyms): " + + ", ".join(sorted_domains) + ) +``` + +Append `domain_hint` to the prompt after the three-path branch (lines 133–205) +completes, near line 206 where `parts.append()` assembles the final prompt. +This ensures the hint applies to all three prompt paths. Look for where the +`domains` field schema is described — the hint should appear just before or +after that section. + +**Key constraints:** +- Cap at 20 domains to avoid prompt bloat +- Cap at 10 existing entries to limit iteration +- Keep the hint as guidance, not a hard constraint — if none of the existing + domains match, the LLM should still freely name a new one +- Do NOT modify `_normalize_domain()` or `_normalize_domains()` — those + functions are fine as-is + +#### Step 2: Verify existing_entries is populated + +Check the call sites of `build_extraction_prompt()`. Verify that +`existing_entries` is actually passed with real data. If it's always `None` +at the call site, the domain hint will never fire. Trace the caller chain +and confirm existing entries are retrieved before extraction. + +If the call site doesn't pass existing entries, that's a pre-existing gap — +note it in your track summary but don't expand scope to fix it (that would +touch files outside your ownership). + +--- + +## Track 3: Outcome-Confidence Reinforcement with Rank Credit + +### Problem + +`_hook_confidence_update()` (colony_manager.py:1476) gives equal credit to +all accessed entries regardless of retrieval rank. The #1 result and the #10 +result get the same alpha/beta delta. This dilutes the reinforcement signal. + +Additionally, there is no effective sample size cap. Alpha+beta can grow +unbounded, making high-evidence entries increasingly resistant to confidence +updates over time — eventually they become immovable. + +### ADR-049 Reference + +The ESS cap at 150 is documented in ADR-049's `compute_branch_confidence` +function. Team B implements the same cap at the individual entry level in +the outcome confidence path. The math is identical: rescale alpha and beta +proportionally to cap total ESS while preserving the posterior mean. + +### Implementation + +#### Step 1: Add `rescale_preserving_mean()` to scoring_math.py + +In `src/formicos/engine/scoring_math.py` (79 lines total, `exploration_score` +at line 32). Add a new function: + +```python +def rescale_preserving_mean( + alpha: float, beta: float, max_ess: float = 150.0, +) -> tuple[float, float]: + """Rescale Beta parameters to cap effective sample size. + + Mathematically equivalent to exponential decay with gamma = 1 - 1/max_ess. + Default cap of 150 (not 100) lets high-evidence entries stabilize + without becoming immovable. 100 would be too aggressive per production + Thompson Sampling literature (Russo et al. recommend N_eff ≈ 200 for + nonstationary environments). + + Preserves the posterior mean: alpha/(alpha+beta) is unchanged. + """ + ess = alpha + beta + if ess <= max_ess: + return alpha, beta + scale = max_ess / ess + return alpha * scale, beta * scale +``` + +This is Engine layer — pure computation, no Surface imports. The layer +boundary is correct: `engine/` may not import from `surface/`. + +#### Step 2: Rank-based credit assignment in colony_manager.py + +In `_hook_confidence_update()` (line 1476). The current code at line 1542: + +```python +delta_alpha = min(max(0.5 + quality_score, 0.5), 1.5) +``` + +And line 1562: + +```python +delta_beta = min(max(0.5 + failure_penalty, 0.5), 1.5) +``` + +These apply the same delta to every accessed entry. The access records in +`colony.knowledge_accesses` preserve item order — items within each access +dict maintain their ranked retrieval position. + +**Read the access record structure carefully** before modifying. Look at: +- How `knowledge_accesses` is populated (search for `KnowledgeAccessRecorded` + event handler in projections.py, line ~1566) +- What shape each access record has (it's a list of dicts, each with an + `"items"` key or similar — verify the actual field name) +- Whether item order corresponds to retrieval rank + +Once you understand the structure, apply geometric credit: + +```python +# Geometric credit: 0.7^rank (Position-Based Model examination probabilities) +# Yields [1.0, 0.7, 0.49, 0.34, 0.24, ...] — models declining attention +# better than harmonic 1/(rank+1) per production recommendation system +# findings (Udemy, Scribd). +credit = 0.7 ** rank # rank is 0-indexed position within the access items + +if succeeded: + base_delta = min(max(0.5 + quality_score, 0.5), 1.5) + delta_alpha = base_delta * credit +else: + base_delta = min(max(0.5 + failure_penalty, 0.5), 1.5) + delta_beta = base_delta * credit +``` + +**Rank tracking with dedup:** The existing loop (lines 1501–1506) has a +dedup guard that skips already-seen item IDs. Use `enumerate()` on the +items list within each trace. The raw enumerate index is the correct rank — +deduped items should still consume a rank slot since they occupied a +retrieval position. + +**Important:** The existing code iterates over accessed entries. Read the +code between lines 1476 and 1620 to understand the full loop structure +before modifying. The access records are structured: each trace has an +`"items"` list preserving retrieval order. + +#### Step 3: ESS cap after confidence update + +After computing `new_alpha` and `new_beta` (lines 1543, 1564), apply the +ESS cap before emitting the `MemoryConfidenceUpdated` event (line ~1573): + +```python +from formicos.engine.scoring_math import rescale_preserving_mean + +# Cap effective sample size at 150 +new_alpha, new_beta = rescale_preserving_mean(new_alpha, new_beta) +``` + +This import is legal: Surface may import from Engine. + +**Placement:** The actual code flow in the success path is: + +1. Line 1542: `delta_alpha` computed +2. Line 1543: `new_alpha = decayed_alpha + delta_alpha` +3. Lines 1547–1556: mastery restoration bonus added to `new_alpha` +4. Line 1565: `new_confidence` computed from `new_alpha / (new_alpha + new_beta)` +5. Line 1572: `MemoryConfidenceUpdated` event emitted +6. Line 1592: auto-promotion check + +The ESS cap must go AFTER step 3 (mastery restoration) but BEFORE step 4 +(confidence computation). Insert between line 1556 and line 1565. In the +failure path, insert between line 1564 (`new_beta` assignment) and line 1565. + +#### Step 4: Verify mastery restoration still works + +The mastery restoration logic (lines 1547–1556) adds a 20% gap-recovery +bonus when `current_alpha < peak_alpha * 0.5` for stable/permanent entries. +After ESS capping, `peak_alpha` tracking still needs to work correctly. + +Check that `peak_alpha` is tracked on the projection entry (it is — see +projections.py line 1677 in `_on_memory_confidence_updated`). The projection +handler sets `peak_alpha = max(peak, e.new_alpha)` — and since the ESS cap +is applied before emission, `e.new_alpha` is the **post-cap** value. This +means `peak_alpha` tracks the capped peak, not the theoretical uncapped peak. + +This is the correct behavior: mastery restoration checks +`decayed_alpha < peak_alpha * 0.5`, and since both the current value and the +peak are in the same capped space, the comparison remains meaningful. No +additional tracking needed. + +--- + +## Tests + +### Track 2 tests — `tests/unit/surface/test_domain_normalization.py` + +1. **`test_extraction_prompt_includes_existing_domains`** — + Call `build_extraction_prompt()` with `existing_entries` containing 3 + entries with domains `["python", "testing", "auth"]`. Verify the returned + prompt string contains "Use one of these existing domain tags" and lists + the domain names. + +2. **`test_extraction_prompt_caps_domains_at_20`** — + Pass entries with 30 unique domains. Verify only 20 appear in the prompt. + +3. **`test_extraction_prompt_no_domains_without_existing`** — + Call with `existing_entries=None`. Verify no domain hint appears. + +### Track 3 tests — `tests/unit/engine/test_scoring_math_ess.py` + +4. **`test_rescale_preserving_mean_under_cap`** — + Call `rescale_preserving_mean(10.0, 5.0)`. Verify returned unchanged + (ESS=15 < 150). + +5. **`test_rescale_preserving_mean_over_cap`** — + Call `rescale_preserving_mean(100.0, 80.0)`. Verify ESS is capped at + 150 and mean ratio is preserved: `100/180 ≈ new_alpha/(new_alpha+new_beta)`. + +6. **`test_rescale_preserving_mean_exact_cap`** — + Call `rescale_preserving_mean(75.0, 75.0)`. Verify returned unchanged + (ESS=150, exactly at cap). + +7. **`test_rank_credit_top_entry_gets_more`** — + Integration test: simulate a colony outcome with 3 accessed entries at + ranks 0, 1, 2. Verify rank-0 entry gets `1.0x` delta, rank-1 gets + `0.7x`, rank-2 gets `0.49x`. + +8. **`test_ess_cap_after_outcome_update`** — + Set an entry's alpha+beta to 145. Apply an outcome update. Verify the + result is capped at 150. + +9. **`test_auto_promotion_works_with_ess_cap`** — + Verify that auto-promotion (candidate → verified when alpha >= threshold) + still triggers correctly after ESS rescaling. + +--- + +## Acceptance Gates + +All must pass before declaring done: + +**Track 2:** +- [ ] Extraction prompt shows "Use one of these existing domain tags" when + existing entries have domains +- [ ] Domain hint caps at 20 domains +- [ ] No hint appears when `existing_entries` is None or empty +- [ ] No regression in extraction quality (domains still free-form if no + existing entries match) + +**Track 3:** +- [ ] `rescale_preserving_mean()` exists in `engine/scoring_math.py` +- [ ] ESS cap at 150 preserves posterior mean +- [ ] Top-ranked entries get stronger confidence reinforcement (0.7^rank) +- [ ] Alpha+beta never exceeds 150 after outcome update +- [ ] Mastery restoration still works correctly with capped entries +- [ ] Auto-promotion still triggers when alpha crosses threshold +- [ ] Co-occurrence reinforcement unchanged (line ~1618) +- [ ] No new event types (stays at 69) +- [ ] No changes to `core/types.py` + +--- + +## Validation + +Run the full CI suite before declaring done: + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +All must pass clean. Target: 3654 + 9 = 3663+ tests (3 Track 2 + 6 Track 3). + +The `lint_imports.py` check is critical for Track 3: the `rescale_preserving_mean` +function is in Engine, imported by Surface. This is the correct direction +(Surface → Engine). If you accidentally create an Engine → Surface import, +`lint_imports.py` will catch it. + +--- + +## Overlap Reread Rules + +After completing your work, reread: + +- `src/formicos/surface/memory_extractor.py` lines 88–130 (your Track 2 changes) +- `src/formicos/surface/colony_manager.py` lines 1476–1620 (your Track 3 changes) +- `src/formicos/engine/scoring_math.py` (your new function) + +Verify: +- Domain hint doesn't break existing prompt structure +- Rank credit doesn't change co-occurrence reinforcement logic +- ESS cap doesn't prevent mastery restoration or auto-promotion +- `peak_alpha` tracking in projections still records the true peak + +--- + +## Track Summary Template + +When done, report: + +``` +Track 2: Domain Normalization +- Files modified: [list] +- Tests added: [count] +- existing_entries populated at call site: [yes/no/partially — explain] + +Track 3: Rank Credit + ESS Cap +- Files modified: [list] +- Tests added: [count] +- Access record structure: [describe what you found] +- Mastery restoration verified: [yes/no] +- Auto-promotion verified: [yes/no] +- Additional bugs found/fixed (audit allowance): [list or none] +``` diff --git a/docs/waves/wave_67/team_b_retrieval.md b/docs/waves/wave_67/team_b_retrieval.md new file mode 100644 index 0000000..b0f3724 --- /dev/null +++ b/docs/waves/wave_67/team_b_retrieval.md @@ -0,0 +1,307 @@ +# Wave 67.5 - Team B: Two-Pass Retrieval with Personalized PageRank + +**Wave:** 67.5 (surfaces) +**Track:** 4 - Two-Pass Retrieval for Graph Proximity +**ADR:** `docs/decisions/050-two-pass-retrieval.md` (proposed - read before coding) +**Prerequisite:** Wave 67.0 landed and stable + +--- + +## Mission + +The `graph_proximity` signal is still dead weight in the standard retrieval +path. `_composite_key()` in `knowledge_catalog.py` hardcodes that term to +`0.0`, so non-thread retrieval never benefits from the knowledge graph even +though the weight exists. Only `_search_thread_boosted()` computes graph +scores today, and it does so with a 1-hop binary neighbor lookup. + +Your job: make graph proximity real in both retrieval paths using +Personalized PageRank (PPR), with embedding-based entity seeding for the +standard path and a shared enrichment helper for both paths. + +--- + +## Owned Files + +| File | Change | +|------|--------| +| `src/formicos/adapters/knowledge_graph.py` | Add `match_entities_by_embedding()` and `personalized_pagerank()` | +| `src/formicos/surface/knowledge_catalog.py` | Add shared graph-scoring helpers and wire them into both retrieval paths | +| `tests/unit/surface/test_two_pass_retrieval.py` | New retrieval tests | + +--- + +## Do Not Touch + +- `core/types.py`, `core/events.py` - closed union, no changes +- `projections.py` - Team A owns provenance additions +- `colony_manager.py` - landed in 67.0 +- `memory_extractor.py` - landed in 67.0 +- `memory_store.py` - no Qdrant schema changes needed here +- Any frontend files - Team A owns the 67.5 browser work +- Any addon files - Team C owns the docs indexer + +--- + +## Repo Truth You Must Read First + +### `src/formicos/surface/knowledge_catalog.py` + +Read these paths before editing: + +- `_composite_key()` - the non-thread scorer still hardcodes `graph_proximity` + to `0.0` +- `_search_vector()` - the standard retrieval path that needs real graph scores +- `_search_thread_boosted()` - the thread path that already computes graph + scores inline and should be refactored to the shared helper + +Important constraint: + +- `_composite_key()` is a module-level function, not a method +- It only sees the item dict and weights +- So the clean pattern is to inject `_graph_proximity` onto each item before + sorting, matching existing `_thread_bonus` / `_pin_boost` behavior + +Repo-truth caveat for 67.5 coordination: + +- The knowledge browser score bar is already implemented +- The browser will be easier to wire if the non-thread path also emits + `_score_breakdown` metadata using the same signal names as the thread path +- Team A will handle the UI; do not edit frontend here + +### `src/formicos/adapters/knowledge_graph.py` + +Read these methods first: + +- `_embed_for_similarity()` - existing async/sync embedding helper +- `get_neighbors()` - existing 1-hop edge fetch you will reuse to build the + local adjacency list +- `search_entities()` - existing substring fallback + +Relevant data shape: + +- `kg_nodes`: `id, name, entity_type, summary, source_colony, workspace_id, created_at` +- `entry_kg_nodes` in projections maps `entry_id -> kg_node_id` + +--- + +## Implementation Steps + +### Step 1: Add embedding-based entity matching + +In `knowledge_graph.py`, add: + +```python +async def match_entities_by_embedding( + self, + query: str, + workspace_id: str, + *, + limit: int = 5, +) -> list[dict[str, Any]]: +``` + +Behavior: + +1. Try `_embed_for_similarity([query])` +2. If no embedding function is available, fall back to `search_entities()` +3. If embeddings are available: + - load workspace entities from `kg_nodes` + - embed `query` plus candidate `name + summary` strings + - compute cosine similarity + - return top-k `{id, name, entity_type, score}` sorted descending +4. Bound cost: + - if the workspace has more than ~500 entities, skip full embedding and + fall back to substring matching for this first version + +### Step 2: Add localized Personalized PageRank + +In `knowledge_graph.py`, add: + +```python +async def personalized_pagerank( + self, + seed_ids: list[str], + workspace_id: str, + *, + damping: float = 0.5, + iterations: int = 20, +) -> dict[str, float]: +``` + +Requirements: + +- Pure Python, no new dependencies +- Build a bounded local adjacency list by expanding outward from seeds up to + 3 hops with repeated `get_neighbors()` calls +- Use a restart-biased PPR update: + +```python +pr[v] = (1 - damping) * reset[v] + damping * incoming_mass +``` + +- Normalize max score to `1.0` +- Return `{entity_id: score}` +- Return `{}` on empty seeds or no reachable edges + +### Step 3: Add shared graph scoring on `KnowledgeCatalog` + +In `knowledge_catalog.py`, add: + +```python +async def _enrich_with_graph_scores( + self, + seed_entity_ids: list[str], + workspace_id: str, +) -> dict[str, float]: +``` + +Behavior: + +- Guard on missing KG adapter / projections +- Run `personalized_pagerank()` +- Reverse-map entity ids back to entry ids via `self._projections.entry_kg_nodes` +- Return `{entry_id: proximity_score}` + +Also add: + +```python +async def _compute_graph_scores( + self, + query: str, + workspace_id: str, +) -> dict[str, float]: +``` + +This helper should: + +1. call `match_entities_by_embedding()` +2. extract seed entity ids +3. call `_enrich_with_graph_scores()` + +### Step 4: Wire graph scoring into `_search_vector()` + +In `_search_vector()`: + +- start the graph work in parallel with the existing institutional and legacy + searches +- after merge + overlay application, inject `_graph_proximity` onto each item +- update `_composite_key()` so the `graph_proximity` weight reads from + `item.get("_graph_proximity", 0.0)` + +Pattern: + +```python +item["_graph_proximity"] = graph_scores.get(item.get("id", ""), 0.0) +``` + +If practical, also emit `_score_breakdown` parity on the non-thread results +using the same signal names as the thread path: + +```python +item["_score_breakdown"] = { + "semantic": ..., + "thompson": ..., + "freshness": ..., + "status": ..., + "thread": 0.0, + "cooccurrence": 0.0, + "graph_proximity": float(item.get("_graph_proximity", 0.0)), + "composite": ..., + "weights": dict(ws_weights), +} +``` + +That keeps Team A from needing retrieval-specific frontend branching. + +### Step 5: Refactor `_search_thread_boosted()` to the shared helper + +Replace the inline neighbor walk with: + +1. top-3 semantic seed entries +2. map those entries to KG node ids via `entry_kg_nodes` +3. call `_enrich_with_graph_scores()` + +Keep the rest of the thread-path ranking flow intact. The goal is: + +- one graph-scoring implementation +- continuous PPR scores instead of binary `1.0` / `0.0` + +--- + +## Tests + +Create `tests/unit/surface/test_two_pass_retrieval.py`. + +Required tests: + +1. `test_match_entities_by_embedding_returns_semantically_relevant` +2. `test_match_entities_falls_back_to_substring` +3. `test_personalized_pagerank_seed_nodes_highest` +4. `test_search_vector_populates_graph_proximity` +5. `test_search_thread_boosted_uses_shared_graph_enrichment` + +Strongly recommended sixth test: + +6. `test_search_vector_emits_score_breakdown_parity` + +What to verify: + +- embedding path sorts by cosine similarity +- substring fallback still works when embeddings are unavailable +- PPR favors seed / better-connected nodes over distant nodes +- non-thread results now carry non-zero `_graph_proximity` +- thread path no longer relies on the old inline 1-hop block + +--- + +## Acceptance Gates + +1. `_composite_key()` no longer hardcodes graph proximity to `0.0` +2. Standard retrieval computes real graph proximity scores +3. Entity matching from the query runs in parallel with vector search +4. Thread retrieval uses the shared graph-scoring helper +5. Thread retrieval upgrades from binary neighbor scores to continuous PPR scores +6. Standard-path results carry non-zero `_graph_proximity` when graph context exists +7. If `_score_breakdown` parity is emitted, the `graph_proximity` term is populated there too +8. Graceful degradation: if KG adapter is unavailable, graph scoring falls back to `0.0` +9. No new events, no new projection state, no Qdrant schema changes +10. Typical graph scoring stays comfortably below the existing retrieval budget + +--- + +## Validation + +Run before declaring done: + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +The import lint matters here: + +- `knowledge_graph.py` is Adapters layer +- `knowledge_catalog.py` is Surface layer +- do not create Adapters -> Surface imports + +--- + +## Merge Order + +Team B should merge first among the 67.5 tracks because `_search_thread_boosted()` +is a shared retrieval seam. + +Teams A and C are otherwise independent and can merge after this track. + +--- + +## Track Summary Template + +When done, report: + +1. Which seed path worked: embedding, substring fallback, or both? +2. How you bounded the entity-matching cost +3. Whether non-thread `_score_breakdown` parity was added +4. Whether thread retrieval now uses the shared helper end-to-end +5. Any measured timing notes from tests or local instrumentation +6. Any low-risk audit fixes found inside the owned files diff --git a/docs/waves/wave_67/team_c_docs_index.md b/docs/waves/wave_67/team_c_docs_index.md new file mode 100644 index 0000000..1bd0fa4 --- /dev/null +++ b/docs/waves/wave_67/team_c_docs_index.md @@ -0,0 +1,309 @@ +# Wave 67.5 - Team C: Documentation Indexer Addon + +**Wave:** 67.5 (surfaces) +**Track:** 6 - Documentation Indexer Addon +**Prerequisite:** Wave 66 addon infrastructure is landed; Wave 67.0 is landed + +--- + +## Mission + +Operators already have semantic code search, but there is still no parallel +semantic path for project documentation. This track adds a new addon that: + +- indexes `.md`, `.rst`, `.txt`, and `.html` +- exposes `semantic_search_docs` and `reindex_docs` +- publishes a knowledge-tab status panel +- stores results in a separate `docs_index` collection + +Your job: build the addon as a new vertical slice by following the existing +`codebase-index` addon pattern as closely as possible. + +Structural invariant: + +- documentation chunks belong in the addon-owned `docs_index` collection +- they do **not** get written into `memory_entries` +- distilled institutional knowledge may later be extracted from docs, but raw + corpus chunks remain outside the memory-confidence pipeline + +--- + +## Owned Files + +| File | Change | +|------|--------| +| `addons/docs-index/addon.yaml` | New manifest | +| `src/formicos/addons/docs_index/__init__.py` | New package marker | +| `src/formicos/addons/docs_index/indexer.py` | New chunker and reindex functions | +| `src/formicos/addons/docs_index/search.py` | New Queen tool handlers | +| `src/formicos/addons/docs_index/status.py` | New status-card endpoint | +| `tests/unit/addons/test_docs_index.py` | New addon tests | + +--- + +## Do Not Touch + +- `src/formicos/surface/addon_loader.py` - Wave 66 infrastructure already landed +- `src/formicos/surface/app.py` - addon route/panel mounting already landed +- `addons/codebase-index/` and `src/formicos/addons/codebase_index/` - use as reference only +- Any frontend files - panel rendering already exists +- `knowledge_catalog.py`, `projections.py`, `core/`, `engine/` - out of scope + +--- + +## Repo Truth You Must Read First + +Study these shipped references before writing code: + +- `addons/codebase-index/addon.yaml` +- `src/formicos/addons/codebase_index/indexer.py` +- `src/formicos/addons/codebase_index/search.py` +- `src/formicos/addons/codebase_index/status.py` +- `tests/unit/addons/test_codebase_index.py` + +Important repo-truth constraints: + +- addon loader, panel registration, and addon routes are already live +- you should not need loader/runtime changes for this track +- status panels currently work with simple `status_card` payloads +- there is no existing replayed "last indexed at" field for addon-local state +- this addon is a corpus index, not a new institutional-memory ingestion path + +So: + +- do not invent a new event or projection just to track last indexed time +- a truthful status card with collection counts and collection name is enough +- do not route raw doc chunks through `memory_entries` + +--- + +## Implementation Steps + +### Step 1: Add the manifest + +Create `addons/docs-index/addon.yaml`. + +Follow the codebase-index structure closely: + +```yaml +name: docs-index +version: "1.0.0" +description: "Semantic search over project documentation" +author: "formicos-core" + +tools: + - name: semantic_search_docs + description: "Search documentation by meaning" + handler: search.py::handle_semantic_search + parameters: + type: object + properties: + query: + type: string + top_k: + type: integer + file_pattern: + type: string + + - name: reindex_docs + description: "Rebuild or incrementally update the documentation index" + handler: search.py::handle_reindex + parameters: + type: object + properties: + changed_files: + type: array + items: + type: string + +config: + - key: doc_extensions + type: string + default: ".md,.rst,.txt,.html" + label: "File extensions to index" + - key: skip_dirs + type: string + default: "__pycache__,.git,node_modules,.venv,venv" + label: "Directories to skip" + +panels: + - target: knowledge + display_type: status_card + path: /status + handler: status.py::get_status + +routes: + - path: /status + handler: status.py::get_status + +triggers: + - type: manual + handler: indexer.py::incremental_reindex +``` + +Keep the first version manual-only. Do not add a cron trigger unless you find +an existing addon pattern that requires it for correctness. + +### Step 2: Build `indexer.py` + +Create `src/formicos/addons/docs_index/indexer.py`. + +Required constants: + +```python +COLLECTION_NAME = "docs_index" +DOC_EXTENSIONS = frozenset({".md", ".rst", ".txt", ".html"}) +DEFAULT_SKIP_DIRS = frozenset({...}) +``` + +Define a chunk dataclass: + +```python +@dataclass +class DocChunk: + id: str + text: str + path: str + section: str + line_start: int + line_end: int +``` + +Implement: + +- `chunk_document(content, file_path, *, suffix)` +- `_chunks_to_docs(chunks)` +- `full_reindex(workspace_path, vector_port, *, doc_extensions=None, skip_dirs=None)` +- `incremental_reindex(workspace_path, vector_port, *, changed_files=None, doc_extensions=None, skip_dirs=None)` + +Chunking guidance: + +- Markdown: split on `#`, `##`, `###` headings +- RST: split on heading underline patterns +- TXT: split on blank-line-delimited sections / paragraphs +- HTML: split on `

` / `

` / `

` tags with a light regex heuristic + +Metadata should include at least: + +- `path` +- `section` +- `line_start` +- `line_end` +- `content` + +Keep the implementation pragmatic. This does not need a full parser. + +### Step 3: Build `search.py` + +Create `src/formicos/addons/docs_index/search.py`. + +Match the codebase-index handler style: + +- `handle_semantic_search(...)` +- `handle_reindex(...)` + +Requirements: + +- use `runtime_context["vector_port"]` +- use `runtime_context["workspace_root_fn"]` +- return helpful strings, not raw dicts +- filter by `file_pattern` with `fnmatch` when provided +- query the `docs_index` collection only + +Search result formatting should include: + +- relative path +- section name +- line range +- semantic score +- truncated chunk preview + +### Step 4: Build `status.py` + +Create `src/formicos/addons/docs_index/status.py`. + +Follow the codebase-index status-card pattern: + +- call `vector_port.collection_info("docs_index")` +- return `display_type: "status_card"` +- include truthful items such as: + - documents indexed / points count + - collection name + - supported extensions + +Do not fabricate a "last indexed" timestamp if you do not have durable truth for it. + +### Step 5: Keep the write set isolated + +This track should land without any loader/runtime/frontend edits if the manifest +and package shape match the established addon pattern. If the addon does not +register cleanly, stop and verify the manifest or handler names before expanding +scope. + +Future note, not in scope here: + +- later Queen routing may choose between code/docs/data indices using addon + capability metadata +- that future orchestration work should build on this addon rather than + changing the addon into a memory-extraction path + +--- + +## Tests + +Create `tests/unit/addons/test_docs_index.py`. + +Required tests: + +1. `test_markdown_chunking_splits_on_headings` +2. `test_chunk_metadata_includes_section` +3. `test_handle_semantic_search_queries_docs_index` +4. `test_handle_reindex_indexes_docs_from_workspace` + +Strongly recommended fifth test: + +5. `test_status_endpoint_returns_status_card` + +Use `tests/unit/addons/test_codebase_index.py` as the reference pattern. + +--- + +## Acceptance Gates + +1. `addons/docs-index/addon.yaml` loads without manifest errors +2. `semantic_search_docs` searches the `docs_index` collection +3. `reindex_docs` performs full or incremental reindex through the addon tool handler +4. Manual trigger points at `indexer.py::incremental_reindex` +5. Knowledge tab can render the docs-index status panel through the existing addon panel system +6. Doc chunks preserve section context in metadata +7. `docs_index` remains separate from `code_index` +8. No loader/runtime/frontend changes are required for the happy path + +--- + +## Validation + +Run before declaring done: + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +--- + +## Merge Order + +This track is independent of Teams A and B and can merge at any time after the +Wave 67.5 prompts are dispatched. + +--- + +## Track Summary Template + +When done, report: + +1. Which document formats were chunked in v1 +2. Whether the addon loaded without any loader/runtime changes +3. What metadata is stored on each `DocChunk` +4. Whether the status card stayed truthful without inventing new state +5. Any small audit fixes found inside the owned files diff --git a/docs/waves/wave_67/wave_67_plan.md b/docs/waves/wave_67/wave_67_plan.md new file mode 100644 index 0000000..cd06e55 --- /dev/null +++ b/docs/waves/wave_67/wave_67_plan.md @@ -0,0 +1,1413 @@ +# Wave 67: The Knowledge Architecture + +**Status:** 67.0 landed, 67.5 dispatch prep +**Predecessor:** Wave 66 (Addons as First-Class Software) +**Theme:** Give knowledge structure, integrity, and auditability. + +The thesis: hierarchy is the organizing principle that makes everything else +work. Domains become hierarchy nodes. Provenance becomes navigable through +the tree. The doc indexer's output slots into the hierarchy naturally. Ship +them together and they reinforce each other. + +## Contract Change Blocker (Wave 67.5) + +Track 5 requires operator approval to add `ProvenanceChainItem` to: + +- `docs/contracts/types.ts` +- `frontend/src/types.ts` + +Proposed interface: + +```typescript +interface ProvenanceChainItem { + event_type: string; + timestamp: string; + actor_id: string; + detail: string; + confidence_delta: number | null; +} +``` + +Use snake_case to match the existing knowledge API payload style. If Track 5 +adds a dedicated provenance endpoint response interface, mirror that in the +contract docs at the same time. + +Approve before 67.5 dispatch. Wave 67.0 is already landed and unaffected. + +--- + +## Scope Split: Wave 67.0 + Wave 67.5 + +Combined scope is too large for a single dispatch. Split at the natural seam: + +- **Wave 67.0** (foundation): Hierarchy data model, domain normalization, + outcome-confidence reinforcement. Changes the data model and feedback + loops. Three parallel teams. +- **Wave 67.5** (surfaces): Two-pass retrieval with graph proximity, provenance + chains, documentation indexer addon, UI surfaces. Builds on the 67.0 + data model. Three parallel teams. + +67.0 has landed and passed a polish pass. 67.5 is the active dispatch prep. +Both halves respect +the 69-event closed union -- all changes are projection-level enrichments. + +## Dependency Decision: Zero New Dependencies (Resolved) + +**UMAP+HDBSCAN rejected.** Entries already carry domain tags — there is +no structure discovery problem. What's needed is sub-clustering within +existing domains into topics, and an LLM does that better than HDBSCAN +because it produces human-readable topic names directly (vs "cluster 7" +that still needs LLM labeling). + +**Going forward (Track 2):** Extraction-time domain suggestion is the +real solution. Every new entry gets "use one of these existing domains +if applicable" in the prompt. Hierarchy converges organically. + +**Existing entries (bootstrap script):** Batch by existing domain tag +(~20 entries per batch), ask the LLM to identify 2-5 topic sub-clusters +within each domain, assign hierarchy paths. For 300 entries across 15 +domains, that's ~15 LLM calls — trivial cost, runs once. The script is +offline (`scripts/bootstrap_hierarchy.py`, not imported by runtime). + +No new dependencies. No approval gate. + +## ADR Requirements + +### ADR-049: Knowledge Hierarchy Data Model (Required) + +New projection-level fields on `memory_entries`: `hierarchy_path` and +`parent_id`. Qdrant payload gains a keyword-indexed `hierarchy_path` +field for filtered search within branches. Upward confidence aggregation +derives topic posteriors from children's evidence. + +**Why this needs an ADR:** Changes the knowledge data model, adds a new +Qdrant payload index, introduces aggregated confidence (new concept). +Affects retrieval, extraction, and UI. + +**Key decisions for the ADR:** +- Hierarchy lives on knowledge entry projections, NOT on the KG entity + model. KG tracks code-level entities (MODULE, TOOL, PERSON). Hierarchy + tracks knowledge organization (domain -> topic -> entry). Different + taxonomies. Keep them separate. +- Path format: `/domain/topic/` (no entry-level path segment; entries are + leaves). Example: `/engineering/auth/jwt-validation/`. +- `parent_id` points to a synthetic topic entry (or empty for root-level). +- No new event types. `hierarchy_path` is computed at projection time from + existing `MemoryEntryCreated` event data + extraction-time domain tags. + +### ADR-050: Two-Pass Retrieval (Required) + +Changes the retrieval algorithm: entity extraction from query via +embedding similarity, iterative Personalized PageRank (replacing BFS), +and shared graph scoring method across both retrieval paths. Currently +graph proximity is active only in `_search_thread_boosted()` (using +top-3 seed items from KG neighbor lookup). Extending to the standard +`_search_vector()` path with query-based entity extraction and PPR +is a meaningful algorithm change. ADR-050 proposed. + +## Algorithmic Design Notes (Research-Informed) + +These decisions are grounded in published research and production system +analysis. See the research reference document for full citations. + +**1. Materialized path over closure table (Track 1).** Benchmark data: +250x slower writes for closure table at 5,912 nodes. FormicOS's hierarchy +is shallow (3-4 levels), append-heavy, and rarely reparented. Materialized +path is the clear winner. Reparenting (if needed) is a single UPDATE with +string REPLACE. + +**2. Personalized PageRank over BFS (Track 4).** HippoRAG (NeurIPS 2024) +ablation: 1-hop BFS is *worse than no expansion* (R@5: 56.2 vs 59.2 +baseline), while PPR reaches 72.9. The difference: PPR weights neighbors +by graph topology instead of treating all 1-hop neighbors equally. For +FormicOS's graph sizes (<50K edges), iterative PPR in pure Python with +damping=0.5 converges in <20ms. No igraph dependency needed. + +**3. Geometric credit (0.7^rank) over harmonic (1/(rank+1)) (Track 3).** +The Position-Based Model (PBM) from production recommendation systems +models examination probability as a geometric decay: [1.0, 0.7, 0.49, +0.34, 0.24...]. This better captures declining attention patterns than +harmonic decay [1.0, 0.5, 0.33, 0.25...]. Validated by Udemy and Scribd +production deployments. + +**4. ESS cap at 150 (Track 3).** Mathematically equivalent to exponential +decay with gamma = 1 - 1/150 ≈ 0.993. Russo et al.'s TS tutorial +recommends N_eff ≈ 200 for nonstationary environments. 150 balances +stability with responsiveness — 100 would be too aggressive for entries +with genuine high evidence. + +**5. Topic nodes as synthetic projection entries (Track 1).** Derived +from children's hierarchy paths, not event-sourced. They exist in the +projection dict and in Qdrant (with LLM-generated topic summaries) but +don't require MemoryEntryCreated events. Replay-safe: projection rebuild +re-derives them from child entries. + +**6. Wave 68 note (not in scope).** Manus's todo.md pattern wastes ~33% +of actions on plan file updates. For the Queen's plan recitation: READ +the plan at context assembly time (cheap), only WRITE when +`propose_plan` creates/modifies a plan. Read-heavy, write-light. + +--- + +# WAVE 67.0: Foundation + +## Pre-existing State + +### Knowledge entry projection (projections.py) + +`ProjectionStore.memory_entries` (line 694) is a `dict[str, dict[str, Any]]` +keyed by entry ID. Each entry is a mutable dict populated by event handlers: + +**Core fields (from MemoryEntryCreated handler, lines 1585-1603):** +- `id`, `title`, `content`, `summary`, `status`, `domains` (list), + `tool_refs` (list), `source_colony_id`, `thread_id`, `scope`, + `created_at`, `entry_type`, `polarity`, `sub_type`, `decay_class` + +**Confidence fields (from MemoryConfidenceUpdated handler, lines 1661-1691):** +- `conf_alpha`, `conf_beta`, `confidence` (posterior mean), + `last_confidence_update`, `peak_alpha` (highest alpha ever, line 1677) + +**Missing fields (not yet present):** +- `hierarchy_path` -- does not exist +- `parent_id` -- does not exist +- `provenance_chain` -- does not exist (provenance is only on frontend + `KnowledgeProvenance` interface, not in projection) + +### MemoryEntry core model (core/types.py:383-446) + +Pydantic model with 20+ fields. Key fields for Wave 67: +- `domains: list[str]` (line 405) -- domain tags, currently free-form +- `conf_alpha` (line 415, default 5.0), `conf_beta` (line 420, default 5.0) +- `decay_class` (line 425) -- ephemeral | stable | permanent +- `sub_type` (line 432) -- technique | pattern | anti_pattern | trajectory | + decision | convention | learning | bug +- No `hierarchy_path` or `parent_id` fields on the core model + +### Domain normalization (memory_extractor.py) + +`_normalize_domain()` (line 31-33): lowercase, spaces/hyphens to underscores. +`_normalize_domains()` (line 36-45): applies to lists, deduplicates. + +`build_extraction_prompt()` (line 88-94): Three paths based on existing +entries and colony status. Domain handling is minimal -- line 214-215 sets +`primary_domain` to `task_class` parameter (defaults to "generic"). **No +semantic domain suggestion from existing entries.** The LLM chooses domains +independently. + +### Colony outcome confidence path (colony_manager.py) + +`_hook_confidence_update()` (line 1476): Already implemented. For each +knowledge item accessed by a completed colony: +- Success: `delta_alpha = clip(0.5 + quality_score, 0.5, 1.5)` (line 1542) +- Failure: `delta_beta = clip(0.5 + failure_penalty, 0.5, 1.5)` (line 1562) +- Mastery restoration: 20% gap-recovery bonus for stable/permanent entries + with >50% decay (lines 1547-1556) +- Emits `MemoryConfidenceUpdated` with `reason="colony_outcome"` (line 1573) +- Auto-promotes candidate -> verified when alpha >= threshold (line 1592) +- Reinforces co-occurrence weights between accessed entries (line 1618) + +**Gap identified:** The outcome confidence update does NOT use retrieval +rank for credit assignment. All accessed entries get the same delta +regardless of whether they were the #1 result or #10. The prompt says +an earlier version mentioned credit = 1/(rank+1) -- now superseded by +geometric 0.7^rank (Track 3). Either way, rank-based credit is new work. + +### Quality score computation (colony_manager.py:284-320) + +`compute_quality_score()`: geometric mean of 5 weighted signals: +- `round_efficiency` (0.20), `convergence_score` (0.25), + `governance_score` (0.20), `stall_score` (0.15), + `productive_ratio` (0.20) +- Returns 0.0 on failure, range [0.20, 1.0] on success + +### ColonyOutcome projection (projections.py:88-118) + +`entries_accessed` computed from `colony.knowledge_accesses` (lines 1060-1065). +Each access records the items retrieved. `KnowledgeAccessRecorded` event +handler appends to `colony.knowledge_accesses` (line 1566). + +**The access records include retrieval order** -- items within each access +dict preserve their ranked position. This means rank-based credit +assignment is possible without new events. + +### Composite scoring (knowledge_catalog.py) + +Seven signals, weights from `knowledge_constants.py:33-41`: +``` +semantic: 0.38, thompson: 0.25, freshness: 0.10, status: 0.10, +thread: 0.07, cooccurrence: 0.04, graph_proximity: 0.06 +``` + +`_composite_key()` (line 263-304): Used by `_search_vector()` non-thread +path. **graph_proximity hardcoded to 0.0** at line 301 with comment: +"Wave 59.5: graph_proximity only has real values in _search_thread_boosted; +here it's always 0.0 to keep the weight dict consistent across both paths." + +`_search_thread_boosted()` (lines 472-710): graph proximity is ACTIVE. +Seeds from top-3 items by semantic score (lines 543-545). For each seed: +looks up KG node ID via `entry_kg_nodes` projection, calls +`kg_adapter.get_neighbors()`, reverse-maps neighbor node IDs to entry IDs, +sets `graph_scores[eid] = 1.0` (lines 540-585). + +### Knowledge graph adapter (adapters/knowledge_graph.py) + +Entity table: `kg_nodes` (id, name, entity_type, summary, source_colony, +workspace_id, created_at). Indexes on name, type, workspace_id. + +Edge table: `kg_edges` (id, from_node, to_node, predicate, confidence, +valid_at, invalid_at, source_colony, source_round, workspace_id). +Bi-temporal validity. Predicates include `DERIVED_FROM`. + +`get_neighbors()` (lines 345-404): 1-hop only. `depth` parameter exists +but is **ignored**. Simple JOIN query, no recursive CTE. Returns edge +dicts with `from_node`/`to_node` fields. + +`search_entities()` (lines 443-466): Substring match on entity names +within a workspace. Returns list of `{id, name, entity_type, summary}`. + +**No BFS traversal capability.** No recursive CTE. Multi-hop discovery +would need to be added. + +### Scoring math (engine/scoring_math.py) + +`exploration_score()` (lines 32-75): Thompson Sampling via +`random.betavariate(alpha, beta)` (stochastic) or `alpha/(alpha+beta)` +(deterministic when `FORMICOS_DETERMINISTIC_SCORING=1`). Optional UCB +bonus. + +**No effective sample size capping.** Alpha+beta can grow unbounded. +The prompt mentions capping at ~100 -- this is new work. + +### Frontend knowledge interfaces (frontend/src/types.ts) + +`KnowledgeItemPreview` (lines 430-453): 20+ fields including `domains`, +`conf_alpha`, `conf_beta`, `score`, `score_breakdown`, `decay_class`, +`usage_count`, `thread_id`, `scope`. + +`KnowledgeItemDetail` (lines 488-492): Extends preview with `content`, +`provenance` (`KnowledgeProvenance`), `trust_rationale` (`TrustRationale`). + +`KnowledgeProvenance` (lines 468-479): source_colony_id, source_round, +source_agent, source_peer, is_federated, created_at, workspace_id, +thread_id, decay_class, forager_provenance. + +### Frontend knowledge browser (knowledge-browser.ts) + +Score bar (lines 884-902): Renders 7-signal stacked bar with color-coded +segments. Segment width proportional to weighted contribution. + +Entry detail (lines 1117-1127): Confidence bar, confidence summary, +trust panel, power panel. Score breakdown bar exists but is rendered +only on hover/expand of search results at standard/full tier. + +**No tree view.** Catalog is a flat list with filters (skill/experience, +domain dropdown, search). No hierarchy navigation. + +### Codebase-index addon (addons/codebase-index/) + +Chunking infrastructure in `indexer.py`: structural splitting on +function/class boundaries, sliding-window fallback. `CodeChunk` dataclass +with id (sha256), text, path, line_start, line_end. Converts to +`VectorDocument` for Qdrant. Collection name: `"code_index"`. + +Runtime context keys: `vector_port`, `workspace_root_fn`, `projections`, +`embed_fn`, `event_store`, `settings`. + +**No `addons/docs-index/` directory exists yet.** + +--- + +## Track 1: Knowledge Hierarchy with Materialized Paths + +### Problem + +Knowledge entries are flat. 300 entries across 15 domains have no +organization beyond free-form domain tags. The knowledge browser shows a +single scrollable list. There's no way to see "I have 40 entries about +auth, 12 about testing, 3 are contradicting" at a glance. Domain tags +drift -- "python_testing" vs "python_test_patterns" vs "testing_python" +all mean the same thing. + +### Fix + +**1. Add `hierarchy_path` and `parent_id` to entry projection.** + +**Storage model: materialized path.** For a shallow (3-4 level), append-heavy +hierarchy with 5K-50K entries, materialized path beats closure table +decisively. Benchmarks show closure table has 250x slower writes (O(depth) +rows per insert into junction table). Materialized path needs 1 table, no +joins for subtree queries, and trivial reparenting via +`UPDATE ... SET path = REPLACE(path, old_prefix, new_prefix)`. + +**Topic nodes are synthetic projection entries.** Topic nodes (e.g., +`/engineering/auth/`) are real entries in the `memory_entries` projection +dict with a synthetic `entry_type="topic"`. They don't require +`MemoryEntryCreated` events. They're derived from the hierarchy paths of +their children — on replay, the projection rebuild can re-derive them. +They exist in Qdrant with LLM-generated topic summaries as embeddings, +enabling filtered search within branches. + +In `projections.py`, `_on_memory_entry_created()` handler (lines 1585-1603): + +```python +# After line 1602 (data["scope"] = ...) +# Wave 67: hierarchy path from primary domain +domains = data.get("domains", []) +primary_domain = domains[0] if domains else "uncategorized" +data["hierarchy_path"] = f"/{_normalize_domain(primary_domain)}/" +data["parent_id"] = "" +``` + +Import `_normalize_domain` from `memory_extractor` (or inline the same +3-line logic to avoid cross-layer import -- prefer inline since projections +is Surface importing from Surface, which is allowed). + +The hierarchy path is initially flat: `/{domain}/`. Topic-level nesting +(`/{domain}/{topic}/`) comes from the extraction-time clustering or +operator-driven reclassification. Start simple, deepen later. + +**2. Add Qdrant payload field.** + +In `memory_store.py`, `sync_entry()`: when upserting to Qdrant, include +`hierarchy_path` in the payload metadata dict. Qdrant payload fields are +automatically indexed as keyword fields when present. + +Check `memory_store.py` for where payload is assembled -- the +`VectorDocument.metadata` dict. Add `"hierarchy_path": entry.get("hierarchy_path", "/")` +alongside existing metadata fields. + +**3. Projection-level upward confidence aggregation.** + +New function in `projections.py` (or a new `hierarchy.py` in surface/): + +```python +def compute_branch_confidence( + store: ProjectionStore, + path_prefix: str, +) -> dict[str, float]: + """Aggregate Beta confidence for entries under a hierarchy branch. + + Returns {"alpha": float, "beta": float, "count": int, "mean": float}. + Sum children's evidence, cap effective sample size at 150. + """ + total_alpha = 0.0 + total_beta = 0.0 + count = 0 + for entry in store.memory_entries.values(): + hp = entry.get("hierarchy_path", "/") + if hp.startswith(path_prefix): + total_alpha += entry.get("conf_alpha", 5.0) - 5.0 # subtract prior + total_beta += entry.get("conf_beta", 5.0) - 5.0 + count += 1 + # Re-add single prior and cap + agg_alpha = 5.0 + total_alpha + agg_beta = 5.0 + total_beta + ess = agg_alpha + agg_beta + if ess > 150: + scale = 150.0 / ess + agg_alpha *= scale + agg_beta *= scale + mean = agg_alpha / (agg_alpha + agg_beta) if (agg_alpha + agg_beta) > 0 else 0.5 + return {"alpha": agg_alpha, "beta": agg_beta, "count": count, "mean": mean} +``` + +Called on-demand by the API, not on every event. No new projection state +needed -- it's a pure computation over existing data. + +**4. REST endpoint for hierarchy tree.** + +In `routes/api.py`, add: + +``` +GET /api/v1/workspaces/{id}/knowledge-tree +``` + +Returns a tree structure built from `memory_entries` hierarchy paths: + +```json +{ + "branches": [ + { + "path": "/engineering/", + "label": "engineering", + "entryCount": 42, + "confidence": {"alpha": 28.3, "beta": 12.1, "mean": 0.70}, + "children": [ + { + "path": "/engineering/auth/", + "label": "auth", + "entryCount": 12, + "confidence": {"alpha": 15.2, "beta": 4.8, "mean": 0.76}, + "children": [] + } + ] + } + ] +} +``` + +**5. Knowledge browser tree view.** + +Add a `SubView` option: `'catalog' | 'graph' | 'tree'`. New +`_renderTreeView()` method that fetches from the knowledge-tree endpoint +and renders a collapsible tree. Each branch shows: name, entry count, +aggregated confidence bar. Clicking a branch filters the catalog to that +path prefix. + +**6. Bootstrap script (offline, not imported by runtime).** + +`scripts/bootstrap_hierarchy.py`: LLM-only, zero new dependencies. +Groups existing entries by domain tag (~20 entries per batch), asks +the LLM to identify 2-5 topic sub-clusters within each domain, and +assigns `hierarchy_path` values. For 300 entries across 15 domains, +that's ~15 LLM calls — trivial cost, runs once. + +The script reads entries from the projection (via a REST endpoint or +direct SQLite read), computes topic assignments, and emits +`WorkspaceConfigChanged` events to persist the hierarchy path updates. +It is a one-time bootstrap tool. Going forward, extraction-time domain +suggestion (Track 2) keeps new entries aligned organically. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `src/formicos/surface/projections.py` | hierarchy_path/parent_id in MemoryEntryCreated handler | ~8 | +| `src/formicos/surface/memory_store.py` | Add hierarchy_path to Qdrant payload metadata | ~3 | +| `src/formicos/surface/hierarchy.py` | **new** -- branch confidence aggregation | ~40 | +| `src/formicos/surface/routes/api.py` | knowledge-tree endpoint | ~45 | +| `frontend/src/components/knowledge-browser.ts` | Tree subview, branch rendering, path filter | ~120 | +| `scripts/bootstrap_hierarchy.py` | **new** -- offline clustering script | ~100 | + +### Tests + +5 new: +- MemoryEntryCreated projection handler sets hierarchy_path from domains +- Qdrant payload includes hierarchy_path +- Branch confidence aggregation caps effective sample size at 150 +- GET knowledge-tree returns valid tree structure +- Tree view filters catalog when branch clicked + +### Acceptance Gates + +- Entry projections include `hierarchy_path` derived from primary domain +- Knowledge browser shows tree/catalog/graph toggle +- Tree view shows collapsible domain branches with entry counts +- Branch confidence aggregates from children, capped at ESS 150 +- Hierarchy paths survive replay (derived from existing event data) +- Qdrant filtered search by hierarchy_path works + +### Owner + +Team A. Merge first among tracks (other teams may depend on hierarchy_path). + +### Do Not Touch + +`core/types.py` (no new MemoryEntry fields -- hierarchy is projection-only), +`core/events.py` (no new events), `queen_runtime.py`, `queen_tools.py`, +`knowledge_catalog.py` (Team B owns retrieval), `colony_manager.py` +(Team B owns outcome path). + +--- + +## Track 2: Domain Normalization at Extraction Time + +### Problem + +Domain tags drift. The same concept gets multiple names: +"python_testing", "python_test_patterns", "testing_python", +"test_patterns". `_normalize_domain()` handles case/whitespace but not +semantic equivalence. With hierarchy in place, this creates orphan +branches that should be the same node. + +### Fix + +**1. Inject existing domain suggestions into extraction prompt.** + +In `memory_extractor.py`, `build_extraction_prompt()` (after line 94): + +Before building the prompt, query existing entries for the top-5 most +similar by embedding, pull their unique domain tags, and inject them +into the prompt: + +```python +# Wave 67: domain normalization via existing entry suggestion +existing_domains: set[str] = set() +if existing_entries: + for e in existing_entries[:10]: + for d in e.get("domains", []): + existing_domains.add(d) +# Add to prompt (after task context, before field schemas): +if existing_domains: + domain_hint = ( + "Use one of these existing domain tags if applicable " + "(do not create synonyms): " + + ", ".join(sorted(existing_domains)[:20]) + ) +``` + +This is ~15 lines in `build_extraction_prompt()`. The existing entries +are already passed to the function (parameter `existing_entries`); we +just need to extract and present their domains. + +**2. Add hierarchy path suggestion.** + +When Track 1 lands, extend the domain hint to include hierarchy paths: + +```python +if existing_domains: + domain_hint = ( + "Existing knowledge branches (use one if applicable, " + "do not create synonyms):\n" + + "\n".join(f" - {d}" for d in sorted(existing_domains)[:20]) + ) +``` + +This naturally aligns new entries with the existing hierarchy. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `src/formicos/surface/memory_extractor.py` | Domain hint injection in build_extraction_prompt | ~15 | + +### Tests + +2 new: +- Extraction prompt includes existing domains when entries provided +- Domain hint limits to 20 domains max + +### Acceptance Gates + +- Extraction prompt shows "Use one of these existing domain tags" when + existing entries have domains +- New extractions converge on existing domain names instead of creating + synonyms +- No regression in extraction quality (domains still free-form if no + existing entries match) + +### Owner + +Team B. Independent of Team A (domain hint doesn't require hierarchy_path). +But when Team A's hierarchy_path is present, domain normalization +prevents orphan branches from forming. + +### Do Not Touch + +`core/types.py`, `core/events.py`, `projections.py` (Team A owns), +`knowledge_catalog.py` (retrieval is Team B Wave 67.5 scope). + +--- + +## Track 3: Outcome-Confidence Reinforcement with Rank Credit + +### Problem + +`_hook_confidence_update()` (colony_manager.py:1476) gives equal +credit to all accessed entries regardless of retrieval rank. The #1 +result and the #10 result get the same alpha/beta delta. This dilutes +the reinforcement signal -- entries that were actually relevant (high +rank) should get stronger updates than entries that happened to be in +the result set but weren't central to the colony's work. + +Additionally, there is no effective sample size cap. Alpha+beta can grow +unbounded, making entries increasingly resistant to confidence updates +over time. + +### Fix + +**1. Rank-based credit assignment.** + +In `colony_manager.py`, `_hook_confidence_update()`: The access +records in `colony.knowledge_accesses` preserve item order. Use position +for credit: + +```python +# Current (line 1542): +delta_alpha = min(max(0.5 + quality_score, 0.5), 1.5) + +# New: geometric credit = 0.7^rank (Position-Based Model examination probs) +# Yields [1.0, 0.7, 0.49, 0.34, 0.24, ...] — models declining attention +# better than harmonic 1/(rank+1) per HippoRAG/Udemy production findings. +for rank, item in enumerate(access.get("items", [])): + credit = 0.7 ** rank + if succeeded: + base_delta = min(max(0.5 + quality_score, 0.5), 1.5) + delta_alpha = base_delta * credit + else: + base_delta = min(max(0.5 + failure_penalty, 0.5), 1.5) + delta_beta = base_delta * credit +``` + +**2. Effective sample size cap.** + +After computing `new_alpha` and `new_beta`, cap the effective sample +size at 150. This is mathematically equivalent to exponential decay +with gamma = 1 - 1/150 ≈ 0.993. Cap of 150 (not 100) lets +high-evidence entries stabilize without becoming immovable — 100 is +too aggressive per production Thompson Sampling literature (Russo et al. +recommend N_eff ≈ 200 for nonstationary environments; 150 balances +stability with responsiveness for FormicOS's update frequency). + +```python +ess = new_alpha + new_beta +if ess > 150.0: + scale = 150.0 / ess + new_alpha *= scale + new_beta *= scale +``` + +This preserves the mean (alpha/alpha+beta ratio) while preventing +posterior collapse. Add this to `_hook_confidence_update()` right +before emitting the `MemoryConfidenceUpdated` event. + +**3. Add `rescale_preserving_mean()` helper to scoring_math.py.** + +```python +def rescale_preserving_mean( + alpha: float, beta: float, max_ess: float = 150.0, +) -> tuple[float, float]: + """Rescale Beta parameters to cap effective sample size. + + Equivalent to exponential decay with gamma = 1 - 1/max_ess. + Default 150 balances stability with responsiveness. + """ + ess = alpha + beta + if ess <= max_ess: + return alpha, beta + scale = max_ess / ess + return alpha * scale, beta * scale +``` + +This is Engine layer -- pure computation, no Surface imports. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `src/formicos/surface/colony_manager.py` | Rank credit + ESS cap in _hook_confidence_update | ~25 | +| `src/formicos/engine/scoring_math.py` | rescale_preserving_mean helper | ~12 | + +### Tests + +4 new: +- Rank 0 entry gets higher delta than rank 5 entry +- ESS cap rescales preserving mean ratio +- Auto-promotion still works after ESS cap +- rescale_preserving_mean returns unchanged when under cap + +### Acceptance Gates + +- Top-ranked retrieved entries get stronger confidence reinforcement +- Alpha+beta never exceeds 150 after outcome update +- Mean confidence ratio preserved after rescaling +- Mastery restoration still works correctly with capped entries +- Co-occurrence reinforcement unchanged + +### Owner + +Team B. Independent of Team A. Merge after Team A if hierarchy_path +is used in access records (it isn't -- independent). + +### Do Not Touch + +`core/events.py`, `core/types.py`, `projections.py` (Team A owns +hierarchy additions), `knowledge_catalog.py` (retrieval changes are +Wave 67.5), `memory_extractor.py` (Team B Track 2 owns extraction). + +--- + +## Team Assignment (Wave 67.0) + +| Team | Tracks | Rationale | +|------|--------|-----------| +| Team A (Hierarchy) | Track 1 | Heaviest track. Projection changes, Qdrant payload, tree API, tree view UI, bootstrap script. | +| Team B (Feedback) | Track 2, Track 3 | Domain normalization + outcome reinforcement. Both modify the knowledge feedback loop. Separate files, no conflicts. | + +Team A has more work. If a Team C is available, split Track 1's UI +(tree view in knowledge-browser.ts) from Track 1's backend (projections, +API, Qdrant payload) and assign Team C the frontend. + +## Merge Order (Wave 67.0) + +``` +Track 1 (hierarchy data model) -- merge first + | + +---> Track 2 (domain normalization) -- benefits from hierarchy_path + | + +---> Track 3 (rank credit + ESS cap) -- independent +``` + +Track 2 and Track 3 are independent of each other. Both can merge in +either order after Track 1. Track 3 is fully independent of Track 1 +(touches different files), but merge order ensures hierarchy_path is +available if we want to log it in confidence update events. + +--- + +# WAVE 67.5: Surfaces + +**Prerequisite:** Wave 67.0 merged and stable. + +## Pre-existing State (after 67.0) + +Entries have `hierarchy_path` on projections. Domain normalization is +active at extraction time. Outcome confidence uses rank credit with ESS +cap. The knowledge browser has a tree view. + +## Track 4: Two-Pass Retrieval for Graph Proximity + +### Problem + +The `graph_proximity` signal (weight 0.06) is dead weight in the +standard retrieval path. `_composite_key()` (knowledge_catalog.py:301) +hardcodes it to 0.0 with an explicit comment. Only +`_search_thread_boosted()` computes real graph scores, using top-3 +result items as KG seeds (lines 540-585). This means 6% of the composite +score is always zero for non-thread queries. + +The thread-boosted path's seed strategy (top-3 by semantic score) works +because thread context narrows the result set. For the general path, we +need a different seed strategy: extract entity names from the query +itself. + +### Algorithm Decision: PPR over BFS + +**HippoRAG's own ablation (NeurIPS 2024) shows simple 1-hop BFS is +worse than no expansion at all** (R@5: 56.2 for BFS vs 59.2 baseline +vs 72.9 for Personalized PageRank). BFS treats all neighbors equally; +PPR weights by graph topology, propagating activation through +high-connectivity paths. + +For FormicOS's graph sizes (hundreds to low thousands of edges), +iterative PPR in pure Python is fast enough (<50ms). No igraph +dependency needed. The key parameter: **damping = 0.5** (not the +standard 0.85), which keeps the random walk tightly localized around +seed nodes — exactly what focused retrieval needs. + +### Algorithm Decision: Entity Embedding Seeds over Substring Matching + +The plan originally proposed fuzzy substring matching of query terms +against KG entity names. This is too crude — entity names are often +abbreviated or context-dependent. Better approach: embed the query, +search KG entity summaries via existing Qdrant infrastructure (entity +summaries are already stored on `kg_nodes`). This gives semantic +matching with zero new dependencies and better precision than string +containment checks. + +### Fix + +**1. Add `match_entities_by_embedding()` to the KG adapter.** + +In `adapters/knowledge_graph.py`, add a method that finds entities +semantically similar to the query: + +```python +async def match_entities_by_embedding( + self, query: str, workspace_id: str, *, limit: int = 5, +) -> list[dict[str, Any]]: + """Find KG entities semantically similar to query. + + Primary: compute cosine similarity between query embedding and + entity name/summary embeddings via existing Qdrant infrastructure. + Fallback: normalized substring overlap on entity names if no + embedding function is available. + + Returns [{id, name, entity_type, score}, ...] sorted by score. + """ +``` + +Uses the existing `kg_nodes` table. Falls back to normalized substring +matching if no embedding function is available. The entity name index +(`idx_kg_nodes_name`) keeps this fast. + +**2. Add `personalized_pagerank()` to the KG adapter.** + +```python +async def personalized_pagerank( + self, seed_ids: list[str], workspace_id: str, + *, damping: float = 0.5, iterations: int = 20, +) -> dict[str, float]: + """Iterative Personalized PageRank from seed entities. + + Parameters + ---------- + damping : float + Probability of following an edge (0.5 = 50% restart). + Lower than standard PageRank's 0.85 to keep the walk + tightly localized around seeds. + iterations : int + Power iteration rounds. 20 is sufficient for convergence + on graphs under 50K edges. + + Returns {entity_id: proximity_score} normalized to [0, 1]. + """ + # 1. Build adjacency list from get_neighbors() for all reachable + # nodes within 3 hops of seeds (bounded expansion). + # 2. Initialize reset vector: uniform over seed_ids. + # 3. Power iteration: + # pr[v] = (1-damping) * reset[v] + damping * sum(pr[u]/degree[u]) + # 4. Normalize: max score -> 1.0. +``` + +Pure Python, ~30 lines. Uses existing `get_neighbors()` iteratively +to build the local adjacency list. No igraph, no networkx, no new deps. +For FormicOS's graph sizes this converges in <20ms. + +**3. Add `_enrich_with_graph_scores()` shared method.** + +In `knowledge_catalog.py`, extract a shared method from the existing +inline code in `_search_thread_boosted()` (lines 540-585): + +```python +async def _enrich_with_graph_scores( + self, + seed_entity_ids: list[str], + workspace_id: str, +) -> dict[str, float]: + """PPR walk from seed entities, return {entry_id: proximity_score}. + + Runs Personalized PageRank (damping=0.5, 20 iterations) from seeds. + Maps KG entity IDs back to knowledge entry IDs via + self._projections.entry_kg_nodes reverse lookup. + """ +``` + +Both `_search_vector()` and `_search_thread_boosted()` call this method. +The thread-boosted path continues to use top-3 items as seeds (via +entry_kg_nodes lookup). The standard path uses +`match_entities_by_embedding()` on the query text. + +**4. Wire into `_search_vector()` non-thread path.** + +Replace the hardcoded 0.0 at line 301 with actual graph scores: + +```python +# Before: + W.get("graph_proximity", 0.0) * 0.0 +# After: +query_entity_ids = [e["id"] for e in await self._kg_adapter.match_entities_by_embedding( + query, workspace_id, limit=5, +)] +graph_scores = await self._enrich_with_graph_scores( + query_entity_ids, workspace_id, +) +# ... then in _composite_key: ++ W.get("graph_proximity", 0.0) * graph_scores.get(entry_id, 0.0) +``` + +The entity matching and PPR walk run in parallel with the Qdrant +vector search via `asyncio.gather`. + +**5. Refactor `_search_thread_boosted()` to use shared method.** + +Replace lines 540-585 (inline graph neighbor discovery) with a call +to `_enrich_with_graph_scores()`. Keep the same seed strategy (top-3 +by semantic score -> entry_kg_nodes lookup). This reduces duplication +and upgrades the thread-boosted path from 1-hop BFS to PPR for free. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `src/formicos/adapters/knowledge_graph.py` | match_entities_by_embedding + personalized_pagerank | ~60 | +| `src/formicos/surface/knowledge_catalog.py` | _enrich_with_graph_scores shared method, wire into both paths | ~60 | + +### Tests + +5 new: +- match_entities_by_embedding finds semantically relevant entities +- personalized_pagerank returns topology-weighted scores (seed nodes highest) +- _search_vector non-thread path populates graph_proximity scores +- Graph proximity affects final ranking in standard retrieval +- _search_thread_boosted still works after refactor (upgraded to PPR) + +### Acceptance Gates + +- Standard retrieval (non-thread) computes real graph proximity scores +- Entity matching from query runs in parallel with Qdrant search +- PPR scores reflect graph topology (high-connectivity nodes rank higher) +- Thread-boosted path continues to work unchanged (refactored + upgraded) +- No performance regression: entity match + PPR < 50ms total +- Score breakdown shows non-zero graph_proximity in standard results + +### Owner + +Team B (retrieval team). This is the continuation of Team B's 67.0 work. + +### Do Not Touch + +`core/types.py`, `core/events.py`, `projections.py`, `colony_manager.py`, +`memory_extractor.py`, any frontend files. + +--- + +## Track 5: Provenance Chain on Projections + +### Problem + +Entry provenance is incomplete. The existing `KnowledgeProvenance` +metadata already shows source colony/peer plus temporal fields, but it +does not expose the full lifecycle. When was an entry's confidence updated? +Who merged it? Was it refined? Which operator acted on it? The entry detail +view can't answer +"how did this entry get to this state?" + +### Fix + +**1. Add `provenance_chain` to projection entries.** + +In `projections.py`, extend the following event handlers to append to +a provenance chain list on each entry: + +```python +# In each handler, after updating the entry: +chain = entry.setdefault("provenance_chain", []) +chain.append({ + "event_type": "MemoryEntryCreated", # or whichever event + "timestamp": str(event.timestamp), + "actor_id": str(getattr(event, "source_colony_id", "")), + "detail": "...", # human-readable summary + "confidence_delta": None, # float delta on confidence updates only +}) +``` + +Events to instrument: +- `MemoryEntryCreated` (line 1585) -- "Created by colony {id}" +- `MemoryConfidenceUpdated` (line 1661) -- "Confidence {old} -> {new}, reason: {reason}" +- `MemoryEntryMerged` (line 1820, target entry) -- "Merged with {source_id}" +- `MemoryEntryRefined` (line 1841) -- "Refined by {source}, refinement #{count}" +- `KnowledgeEntryOperatorAction` (grep for handler) -- "Operator: {action}" +- `KnowledgeEntryAnnotated` (grep for handler) -- "Annotated: {note}" + +Each handler adds ~5 lines. Total: ~30 lines across 6 handlers. + +**2. REST endpoint for entry provenance.** + +Prefer `routes/knowledge_api.py` so the new read endpoint lives beside the +existing `GET /api/v1/knowledge/{item_id}` detail route. + +``` +GET /api/v1/knowledge/{entry_id}/provenance +``` + +Returns the `provenance_chain` list from the projection entry. Simple +read from `projections.memory_entries[entry_id].get("provenance_chain", [])`. + +**3. Provenance timeline in entry detail view.** + +In `knowledge-browser.ts`, add `_renderProvenance(chain)` method to the +entry detail expanded view. Renders a vertical timeline with: +- Timestamp (formatted relative: "3 days ago") +- Event type icon/label +- Detail text +- Confidence delta (if present, shown as +0.3α / +0.2β) + +**4. Score breakdown default visibility.** + +Repo truth: `_renderScoreBar()` already exists, but the browser currently +renders it inside the confidence hover detail and the raw search payload may +carry `_score_breakdown` rather than `score_breakdown`. Track 5 should make +the bar visible in the main list item body and teach the component to read +either key so it can light up as soon as retrieval data is present. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `src/formicos/surface/projections.py` | provenance_chain append in 6 event handlers | ~30 | +| `src/formicos/surface/routes/knowledge_api.py` | GET provenance endpoint | ~15 | +| `frontend/src/types.ts` | ProvenanceChainItem interface | ~8 | +| `docs/contracts/types.ts` | ProvenanceChainItem interface (mirror) | ~8 | +| `frontend/src/components/knowledge-browser.ts` | Provenance timeline + score bar default visibility | ~60 | + +### Tests + +3 new: +- MemoryEntryCreated adds provenance chain item to projection +- MemoryConfidenceUpdated appends to existing provenance chain +- GET provenance endpoint returns chain for existing entry + +### Acceptance Gates + +- Every relevant event appends to the provenance chain +- Entry detail view shows provenance timeline +- Provenance survives replay (fully event-sourced projection data) +- Score breakdown bar visible on search results by default +- No new event types added + +### Owner + +Team A. Independent of Teams B and C. + +### Do Not Touch + +`core/events.py`, `core/types.py`, `colony_manager.py`, +`knowledge_catalog.py`, `memory_extractor.py`, `memory_store.py`. + +--- + +## Track 6: Documentation Indexer Addon + +### Problem + +FormicOS knowledge comes from colony work -- but operators often have +existing documentation (architecture docs, runbooks, API references) +that should be searchable alongside extracted knowledge. Currently +there's no way to import documentation. The codebase-index addon indexes +code but not docs. + +### Fix + +**1. New addon at `addons/docs-index/`.** + +Manifest follows the codebase-index pattern: + +```yaml +name: docs-index +version: "1.0.0" +description: "Semantic search over project documentation" +author: "formicos-core" + +tools: + - name: semantic_search_docs + description: "Search documentation by meaning" + handler: search.py::handle_semantic_search + parameters: + type: object + properties: + query: + type: string + description: "Natural language search query" + top_k: + type: integer + description: "Number of results (default 10)" + file_pattern: + type: string + description: "Glob filter (e.g. '*.md')" + + - name: reindex_docs + description: "Rebuild documentation index" + handler: search.py::handle_reindex + parameters: + type: object + properties: + changed_files: + type: array + items: { type: string } + description: "Files to reindex (omit for full)" + +config: + - key: doc_extensions + type: string + default: ".md,.rst,.txt,.html" + label: "File extensions to index (comma-separated)" + - key: skip_dirs + type: string + default: "node_modules,.git,.venv,__pycache__" + label: "Directories to skip" + +panels: + - target: knowledge + display_type: status_card + path: /status + handler: status.py::get_status + +routes: + - path: /status + handler: status.py::get_status + +triggers: + - type: manual + handler: indexer.py::incremental_reindex +``` + +**2. Python package at `src/formicos/addons/docs_index/`.** + +Three modules following codebase-index pattern: + +`indexer.py`: Chunks documentation files on section headers (H1/H2/H3 +for Markdown, `===`/`---` for RST). Each chunk preserves parent section +title as metadata context. Uses same `VectorDocument` pattern as +codebase-index. + +```python +COLLECTION_NAME = "docs_index" +DOC_EXTENSIONS = {".md", ".rst", ".txt", ".html"} + +@dataclass +class DocChunk: + id: str # sha256 of filepath:section_path + text: str + path: str # file path relative to workspace + section: str # parent section title + line_start: int + line_end: int +``` + +Chunking strategy: split on Markdown headers (`^#{1,3} `), preserve +the header as section context. For `.rst`, split on `===` / `---` +underlines. For `.txt`, split on blank-line-separated paragraphs. +For `.html`, split on `

` through `

` tags. + +`search.py`: Handler for `semantic_search_docs` and `reindex_docs` +tools. Same pattern as codebase-index/search.py. Uses `runtime_context` +for `vector_port` and `workspace_root_fn`. + +`indexer.py` should mirror the codebase-index addon shape: +`full_reindex()`, `incremental_reindex()`, and an optional +`on_scheduled_reindex()` wrapper if we decide to add a cron trigger later. + +`status.py`: Returns status_card with doc count, last indexed timestamp. + +**3. With hierarchy in place, imported docs create hierarchy nodes.** + +If Track 1 is landed, the indexer can set `hierarchy_path` on doc +chunks to match existing knowledge branches. For example, a doc at +`docs/auth/jwt-setup.md` gets `hierarchy_path: /auth/` if that branch +exists. This is optional and deferred to a polish pass. + +### Files + +| File | Change | Lines | +|------|--------|-------| +| `addons/docs-index/addon.yaml` | **new** manifest | ~45 | +| `src/formicos/addons/docs_index/__init__.py` | **new** empty | ~1 | +| `src/formicos/addons/docs_index/indexer.py` | **new** chunker | ~120 | +| `src/formicos/addons/docs_index/search.py` | **new** handlers | ~80 | +| `src/formicos/addons/docs_index/status.py` | **new** status card | ~30 | + +### Tests + +4 new: +- Markdown chunking splits on H1/H2/H3 boundaries +- Chunk metadata includes parent section title +- semantic_search_docs handler returns results from docs_index collection +- reindex_docs handler indexes .md files from workspace root + +### Acceptance Gates + +- `addons/docs-index/addon.yaml` loads without errors +- `semantic_search_docs` Queen tool searches documentation +- `reindex_docs` Queen tool rebuilds the doc index +- Knowledge tab shows docs-index status panel (via Wave 66 panel system) +- Doc chunks include section context in metadata +- Separate Qdrant collection (`docs_index`) from code_index + +### Owner + +Team C. Independent of retrieval and provenance changes. + +### Do Not Touch + +`core/events.py`, `core/types.py`, `colony_manager.py`, +`knowledge_catalog.py`, `memory_extractor.py`. +`addons/codebase-index/` (parallel addon, don't modify). + +--- + +## Team Assignment (Wave 67.5) + +| Team | Tracks | Rationale | +|------|--------|-----------| +| Team A (Provenance) | Track 5 | Projection + route + frontend detail work is a coherent vertical slice and leaves retrieval/addon work untouched. | +| Team B (Retrieval) | Track 4 | Continues from 67.0 feedback work. Owns `knowledge_catalog.py` and KG adapter seams. | +| Team C (Docs Indexer) | Track 6 | New addon slice with isolated write set and established codebase-index pattern to follow. | + +Dispatch 67.5 as three bounded coder prompts, one per track. Do not split +Track 5 across separate frontend/backend coders unless staffing changes force it. + +## Merge Order (Wave 67.5) + +``` +Track 4 (two-pass retrieval) -- merge first (refactors shared code) + | +Track 5 (provenance) -- independent, merge any time + | +Track 6 (doc indexer) -- independent, merge any time +``` + +Track 5 and Track 6 are fully independent of each other and of Track 4. +Merge in any order. Track 4 merges first only because it refactors +`_search_thread_boosted()` which other developers should rebase against. + +--- + +## Post-67.5 Extension Contract (Reference Only, Not In Scope) + +Wave 67.5 is the foundation for future flexibility, but it should not absorb +that flexibility work directly. The structural rules below are the intended +follow-on contract for later waves. + +### 1. Distilled Memory vs Raw Corpora + +`memory_entries` remain the home for distilled institutional knowledge: +curated skills, experiences, and other replay-safe memory objects that +participate in Beta confidence evolution, Thompson Sampling, co-occurrence, +hierarchy, and provenance. + +Raw corpora do **not** belong in `memory_entries`: + +- documentation chunks +- code chunks +- structured data rows or records +- large imported reference sets + +These should live in addon-owned indices such as `docs_index` and +`code_index`, with their own chunking and search logic. This keeps the +institutional memory pipeline high-signal and prevents raw chunk corpora from +polluting confidence evolution and composite retrieval scoring. + +### 2. Future Flexibility Comes from More Indexing Strategies, Not More Core Types + +The right extension path is new addon indexing strategies for new content +shapes, all feeding into the existing retrieval/tooling surface: + +- prose docs -> docs-index addon +- source code -> codebase-index addon +- structured datasets / schemas -> future data-index addon + +The wrong path is expanding the core knowledge model with custom entry types, +entry-specific retrieval rules, or per-shape confidence logic. Keep the core +memory model narrow; expand via addons. + +### 3. Capability Metadata Before Hardcoded Queen Routing + +When we add content-routing later, do not hardcode addon names or file-shape +rules into Queen prompt prose. + +Instead, extend addon manifests with declarative capability metadata such as: + +```yaml +capabilities: + content_kinds: ["markdown", "rst", "html"] + path_globs: ["docs/**", "*.md", "*.rst"] + search_tool: semantic_search_docs + reindex_tool: reindex_docs +``` + +The Queen can then route by inspecting installed addon capabilities rather +than by hardcoded addon-specific logic. This makes new indexers additive. + +### 4. Taxonomy Should Be Workspace-Scoped and Soft + +Wave 67.0's domain normalization is the correct starting point: suggest known +domains without rejecting new ones. If we add richer taxonomy later, it +should be workspace config and guidance, not hard validation. + +Future shape: + +```yaml +knowledge_schema: + domains: [engineering, product, operations, security] + tag_dimensions: + language: [python, typescript, rust, go] + layer: [core, engine, adapters, surface] + priority: [critical, standard, exploratory] + aliases: + python_testing: testing + ts: typescript +``` + +Desired behavior: + +- prefer configured values +- canonicalize obvious aliases +- allow genuinely new values when nothing matches +- optionally flag drift for operator review + +Do not block memory extraction on schema misses. + +### 5. Queen-Mediated Addon Search, Not Automatic Colony Cross-Index Retrieval + +Colony retrieval should continue to search institutional memory through the +existing `memory_search` / knowledge catalog path. + +Do **not** auto-search every addon index during colony retrieval. That would: + +- inflate retrieval cost +- inject irrelevant corpus hits +- force incompatible scoring models into one composite ranker + +The intended future boundary is: + +- **Queen** searches addon-owned corpora when planning / deliberating +- **Queen** curates the relevant excerpts into colony task context +- **Colonies** continue using institutional-memory retrieval during work + +This keeps corpus routing in the orchestration layer and avoids conflating raw +corpus search with curated memory retrieval. + +### 6. Wave 68 Design-Note Hook + +After Wave 67.5 lands, write a small design note covering: + +- distilled memory vs raw corpora +- addon capability metadata for content routing +- soft workspace taxonomy +- Queen-mediated addon search during deliberation / plan composition + +That note should align with Wave 68 deliberation-frame work so addon indices +become part of Queen context assembly rather than automatic colony retrieval. + +--- + +## What Wave 67 Does NOT Do + +- No session continuity (Wave 68) +- No Queen deliberation frame changes (Wave 68) +- No todo.md attention pattern (Wave 68) +- No dynamic context caps (Wave 68) +- No new event types (stays at 69) -- hierarchy and provenance are + projection-level enrichments +- No A2A outbound, no metering, no IDE/CLI, no multi-user +- No RL/self-evolution +- No hot-reload for addons +- No raw doc/code/data corpora in `memory_entries` +- No hardcoded Queen routing by addon name +- No hard validation of taxonomy values during extraction +- No automatic colony retrieval across addon-owned indices +- No hierarchy_path on the core MemoryEntry model (projection-only) +- No recursive CTE (iterative PPR via existing get_neighbors) +- No UMAP/HDBSCAN (LLM-only bootstrap, zero new dependencies) + +## Validation Commands + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +All tracks must pass this. Target: 3670+ tests (16+ net new from 3654). + +## Acceptance Criteria (Combined) + +**Wave 67.0:** +- Knowledge entries have `hierarchy_path` on projections +- Knowledge browser has tree view with collapsible domain branches +- Extraction prompt suggests existing domains to prevent drift +- Outcome confidence uses rank-based credit assignment +- Effective sample size capped at 150 (preserving mean) +- All hierarchy data replay-safe (no new events, no shadow state) + +**Wave 67.5:** +- Standard retrieval computes real graph proximity (not hardcoded 0.0) +- Entity extraction from query seeds Personalized PageRank walk +- Every knowledge entry has a provenance chain +- Entry detail shows provenance timeline +- Score breakdown visible by default on search results +- Documentation indexer addon indexes .md/.rst/.txt/.html +- `semantic_search_docs` Queen tool operational +- 3670+ tests passing +- CI: ruff clean, pyright clean, imports clean + +## Estimated Scope + +**Wave 67.0:** ~200 lines backend (projections, hierarchy, memory_store, +colony_manager, scoring_math, memory_extractor). ~120 lines frontend +(tree view). ~100 lines bootstrap script. 11 new tests. + +**Wave 67.5:** ~110 lines backend (KG adapter, knowledge_catalog, +projections, API). ~70 lines frontend (provenance timeline, score bar). +~275 lines new addon (docs-index). ~16 lines contract types. 12 new +tests. + +**Combined:** ~875 lines new/modified code. 23 new tests. 0 new events. diff --git a/docs/waves/wave_68/addon_extension_contract.md b/docs/waves/wave_68/addon_extension_contract.md new file mode 100644 index 0000000..06d55f0 --- /dev/null +++ b/docs/waves/wave_68/addon_extension_contract.md @@ -0,0 +1,46 @@ +# Addon Extension Contract: Capability Metadata + +**Type:** Compatibility note +**Date:** 2026-03-25 +**Authority:** Superseded by `docs/waves/wave_68/design_note.md` and +`docs/waves/wave_68/team_c_prompt.md` + +## Summary + +Wave 68 keeps the addon extension contract deliberately small and additive. + +Three optional manifest fields are the core contract: + +```python +content_kinds: list[str] = Field(default_factory=list) +path_globs: list[str] = Field(default_factory=list) +search_tool: str = Field(default="") +``` + +These fields are enough to make addon coverage legible to the Queen without +adding a new registry, event type, or retrieval path. + +## Routing Rule + +The Queen should route by source coverage, not by hardcoded addon names. +That means: + +- use `content_kinds` to identify the corpus type +- use `path_globs` to narrow the match +- use `search_tool` to identify the primary search entry point +- when an addon already exposes an obvious refresh/index trigger or handler, + surface that in `list_addons()` text as routing guidance rather than adding + a new core type + +## Design Constraints + +- additive only; existing manifests must continue to parse unchanged +- `content_kinds` stays free-form to avoid core-type churn +- capability data must appear in the text returned by `list_addons()` +- routing behavior lives in the Queen prompt/runtime, not in a new registry + +## Out of Scope + +- changing the core knowledge retrieval model +- automatic cross-index retrieval +- hard validation of workspace taxonomy or content kinds diff --git a/docs/waves/wave_68/design_note.md b/docs/waves/wave_68/design_note.md new file mode 100644 index 0000000..2c3b4ee --- /dev/null +++ b/docs/waves/wave_68/design_note.md @@ -0,0 +1,82 @@ +# Wave 68 Design Note: Three Architectural Invariants + +**Date:** 2026-03-25 +**Status:** Locked - all Wave 68 tracks must respect these boundaries. + +## Invariant 1: `memory_entries` for distilled knowledge only + +The institutional memory pipeline (`memory_entries` projection, Qdrant +`memory` collection, Thompson Sampling, co-occurrence, confidence evolution) +is reserved for distilled knowledge: skills, experiences, patterns, +conventions, and bug reports. + +**What does NOT enter `memory_entries`:** +- Session state (plan files, session summaries) +- Raw indexed corpora (code chunks, documentation chunks) +- Operational snapshots (colony logs, deployment state) + +**Why:** Entries in `memory_entries` receive Beta posteriors, participate in +Thompson Sampling retrieval, accumulate co-occurrence weights, and undergo +confidence reinforcement from colony outcomes. Mixing operational state into +this pipeline would pollute institutional knowledge with ephemeral noise, +inflate co-occurrence graphs, and make confidence scores meaningless. + +**How it applies to Wave 68:** +- Track 1 (plan attention): Plans live in `.formicos/plans/{thread_id}.md`, + read from disk, never enter `memory_entries`. +- Track 2 (session continuity): Session summaries live in + `.formicos/sessions/{thread_id}.md`, injected via file read, never enter + `memory_entries`. + +## Invariant 2: New content shapes enter through addon-owned indices + +Each content domain gets its own addon with its own Qdrant collection: +- Code chunks -> `codebase-index` addon -> `code_index` collection +- Doc chunks -> `docs-index` addon -> `docs_index` collection +- Future data/spec indexers -> their own addons and collections + +**The core retrieval model (`knowledge_catalog.py`) is not touched.** +It serves `memory_entries` only. Addon search tools handle their own +collections. + +**Why:** Knowledge entries carry rich metadata (Beta posteriors, decay +classes, provenance, co-occurrence, hierarchy paths) that corpus chunks do +not have. Mixing them in one retrieval pipeline would require either +degrading the metadata model or bolting on fake posteriors for chunks. +Separate indices keep each domain's retrieval semantics clean. + +**How it applies to Wave 68:** +- Track 5 (addon capability metadata): Addons declare `content_kinds`, + `path_globs`, and `search_tool` so the Queen can route across sources + without cross-index retrieval. + +## Invariant 3: The Queen is the router/composer across sources + +Colonies search `memory_entries` via `memory_search`. The Queen searches +everything - knowledge, code, docs, external tools - via addon tools during +deliberation and plan composition, then injects curated results into colony +context via task descriptions. + +**No automatic cross-index retrieval.** The Queen decides what to search, +combines results, and passes relevant context to colonies. This keeps the +retrieval pipeline simple and gives the Queen explicit control over what +evidence informs each colony's work. + +Routing is explicit in two dimensions: + +- **search** - which source should answer this question? +- **refresh/index** - which addon should update coverage for this corpus? + +Source labels matter. The Queen should see institutional memory, code/doc +corpora, and workspace hints as different evidence classes, not as one blob. + +**How it applies to Wave 68:** +- Track 4 (deliberation frame): The frame includes source-labeled addon + coverage so the Queen knows which addons can search or refresh what + content. +- Track 5 (capability metadata): `list_addons` output includes + `content_kinds` and `search_tool` so the Queen can route queries. Existing + addon handlers/triggers can surface the refresh path without a new core + type. +- Track 6 (workspace taxonomy): Tags bias the Queen's routing decisions, + not the retrieval algorithm. They are soft hints, not hard validation. diff --git a/docs/waves/wave_68/team_a_prompt.md b/docs/waves/wave_68/team_a_prompt.md new file mode 100644 index 0000000..924d113 --- /dev/null +++ b/docs/waves/wave_68/team_a_prompt.md @@ -0,0 +1,441 @@ +# Wave 68 — Team A: Queen Memory & Planning + +**Theme:** The Queen remembers plans and sessions across restarts. + +## Context + +Read `docs/waves/wave_68/design_note.md` first. You are bound by all three +invariants. In particular: plans and session summaries are FILES, not +`memory_entries`. They never enter the knowledge pipeline. + +Read `CLAUDE.md` for hard constraints (event closed union, layer rules, etc.). +Read `AGENTS.md` for file ownership. This prompt overrides stale root +`AGENTS.md` for file ownership within this wave. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/queen_tools.py` — `_propose_plan()` modification + `mark_plan_step` new tool +- `src/formicos/surface/queen_runtime.py` — `_build_thread_context()` plan injection (BOTTOM of method, after workflow steps section ~line 1411) + `respond()` session injection + `emit_session_summary()` new method +- `src/formicos/surface/runtime.py` — shutdown hook wiring +- `tests/unit/surface/test_plan_attention.py` — **new** +- `tests/unit/surface/test_session_continuity.py` — **new** + +## Do Not Touch + +- `projections.py` — no projection field changes, no replay handlers +- `core/types.py` — no EntrySubType additions +- `core/events.py` — no new event types +- `addon_loader.py` — Team C owns +- `knowledge_catalog.py` — invariant 2 (no retrieval changes) +- `colony_manager.py` — no colony lifecycle changes +- `_build_messages()` in `queen_runtime.py` — Team B owns +- Any frontend files +- `ThreadProjection.active_plan` — **DO NOT USE.** It carries + `DelegationPlanPreview` from `ParallelPlanCreated` events + (projections.py:1915-1919, typed as `DelegationPlanPreview` in + frontend/src/types.ts:294). Using it for proposal-shaped data would + break the parallel planning UI. + +## Overlap Coordination + +- **Team C** will insert ~4 lines at the TOP of `_build_thread_context()` + (after line ~1356) for workspace tag injection. You insert at the BOTTOM + (after line ~1411) for plan injection. No conflict. +- **Team B** touches `respond()` for deliberation frame injection and budget + threading. Your session injection goes in `respond()` AFTER memory + retrieval (same area as project_context injection, lines 795-815). + Team B's deliberation detection goes INSIDE the tool loop (lines 968+). + Different code regions. + +--- + +## Track 1: Plan File Persistence + +### Problem + +The Queen proposes plans via `propose_plan` (queen_tools.py:3056-3176) but +immediately forgets them. The `active_plan` field on `ThreadProjection` +(projections.py:535) carries `DelegationPlanPreview` from +`ParallelPlanCreated` — it is NOT available for proposal plans. +`propose_plan` returns `(text, action_dict)` where the action dict has +`render: "proposal_card"`. The plan text scrolls out of context after +`_RECENT_WINDOW=10` messages (queen_runtime.py:146). For multi-step +threads, the Queen loses track of what it planned. + +### Implementation + +**1. Write plan file from `_propose_plan()`.** + +In `queen_tools.py`, `_propose_plan()` (line 3056): after building the +proposal dict (line 3170), write a structured plan file. + +```python +# After line 3170 (proposal dict built): +# Wave 68: persist plan to file for attention injection +try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _plan_dir = Path(_data_dir) / ".formicos" / "plans" + _plan_dir.mkdir(parents=True, exist_ok=True) + # thread_id must be passed — add it as a parameter + _plan_path = _plan_dir / f"{thread_id}.md" + _plan_lines = [f"# Plan: {summary[:200]}", ""] + if recommendation: + _plan_lines.append(f"**Approach:** {recommendation}") + _plan_lines.append("") + if enriched_options: + _plan_lines.append("## Options") + for i, opt in enumerate(enriched_options, 1): + label = opt.get("label", f"Option {i}") + desc = opt.get("description", "") + _plan_lines.append(f"{i}. **{label}:** {desc}") + _plan_lines.append("") + _plan_lines.append("## Steps") + _plan_lines.append("*(No steps defined yet. Use mark_plan_step to add.)*") + _plan_path.write_text("\n".join(_plan_lines), encoding="utf-8") +except (OSError, TypeError): + pass # plan file is best-effort, not critical path +``` + +**Note:** `_propose_plan()` currently receives `inputs` and `workspace_id` +(line 3056-3059) but NOT `thread_id`. The dispatcher (queen_tools.py:167-206) +uses lambda wrapping: `"propose_plan": lambda i, w, t: self._propose_plan(i, w)` +which drops `t`. To get `thread_id`, change the lambda to pass `t` through +and add `thread_id: str` to the method signature: +```python +# In the handler registry (~line 185): +"propose_plan": lambda i, w, t: self._propose_plan(i, w, t), +``` +Then update the method signature: +```python +def _propose_plan(self, inputs, workspace_id, thread_id): +``` +Same pattern for `mark_plan_step` — register with the full `(i, w, t)` lambda. + +The `.formicos/` directory pattern is established: +- Backups: `queen_runtime.py:658` — `target.parent / ".formicos" / "backups"` +- Project context: `queen_runtime.py:799` — `Path(_data_dir) / ".formicos" / "project_context.md"` +- Colony manager: `colony_manager.py:676` — `Path(_ws_dir) / ".formicos" / "project_context.md"` + +Use the `data_dir` pattern from `queen_runtime.py:797`: +```python +_data_dir = self._runtime.settings.system.data_dir +``` + +**2. Add `mark_plan_step` Queen tool.** + +New tool spec in the tool specs list: + +```python +{ + "name": "mark_plan_step", + "description": ( + "Update a plan step's status. Call after spawning a colony for " + "a plan step or when a step completes/blocks." + ), + "parameters": { + "type": "object", + "properties": { + "step_index": { + "type": "integer", + "description": "Zero-based step index in the plan" + }, + "status": { + "type": "string", + "enum": ["pending", "started", "completed", "blocked"], + "description": "New status for this step" + }, + "description": { + "type": "string", + "description": "Step description (required when adding a new step)" + }, + "colony_id": { + "type": "string", + "description": "Colony executing this step (optional)" + }, + "note": { + "type": "string", + "description": "Brief status note (optional)" + } + }, + "required": ["step_index", "status"] + } +} +``` + +Handler implementation: +1. Read the plan file from `.formicos/plans/{thread_id}.md` +2. Parse the `## Steps` section +3. Update or append the step at `step_index` +4. Write the file back +5. Return confirmation text + +The step format in the file: +```markdown +## Steps +- [0] [started] Implement auth module (colony abc12345) +- [1] [pending] Write integration tests +- [2] [completed] Update API docs — Done, merged. +``` + +Register `mark_plan_step` in the tool dispatch. Follow the same pattern as +other Queen tools: add to the tool specs list, add to the dispatch handler. +Also add it to the Queen's tool list in `caste_recipes.yaml` (line 203) — +coordinate with Team C who also modifies this file. Add `mark_plan_step` +to the existing comma-separated list. + +**3. Inject plan into `_build_thread_context()`.** + +In `queen_runtime.py`, `_build_thread_context()` (line 1347): after the +workflow steps section (which ends at line 1411), inject the plan file: + +```python +# Wave 68: inject plan file for persistent attention +try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _plan_path = Path(_data_dir) / ".formicos" / "plans" / f"{thread_id}.md" + if _plan_path.is_file(): + _plan_text = _plan_path.read_text(encoding="utf-8")[:2000] + if _plan_text: + lines.append(f"\n{_plan_text}") +except (OSError, TypeError, AttributeError): + pass +``` + +Cap at 2000 chars. The plan file is read on every `respond()` call — this +is the "read-heavy, write-light" pattern. The file is the attention +mechanism. + +### Tests (`tests/unit/surface/test_plan_attention.py`) + +5 tests: + +1. **`test_propose_plan_writes_plan_file`** — Mock `_runtime.settings.system.data_dir` + to a temp directory. Call `_propose_plan()` with summary/options/recommendation. + Assert `.formicos/plans/{thread_id}.md` exists with correct content. + +2. **`test_mark_plan_step_updates_file`** — Write a plan file with steps section. + Call `mark_plan_step` handler with `step_index=0, status="completed"`. + Assert file updated correctly. + +3. **`test_mark_plan_step_adds_new_step`** — Call with a new `step_index` and + `description`. Assert step appended. + +4. **`test_build_thread_context_includes_plan`** — Write a plan file. Call + `_build_thread_context()`. Assert output contains plan summary. + +5. **`test_plan_injection_caps_at_2000_chars`** — Write an oversized plan file + (5000 chars). Assert injected text is truncated. + +--- + +## Track 2: Session Continuity via Files + +### Problem + +When the operator reopens a workspace, the Queen has no memory of what +happened in previous sessions. Thread context shows colony counts and +step statuses, but not what the Queen learned, what worked, or what was +abandoned. + +### Implementation + +**1. `emit_session_summary()` method on `QueenRuntime`.** + +New method in `queen_runtime.py`: + +```python +async def emit_session_summary( + self, workspace_id: str, thread_id: str, +) -> None: + """Write a session summary file for later startup injection. + + Content assembled deterministically from projections — no LLM call. + File written to .formicos/sessions/{thread_id}.md. + """ + thread = self._runtime.projections.get_thread(workspace_id, thread_id) + if thread is None: + return + + lines: list[str] = [ + f"# Session Summary: {thread.name}", + f"**Thread:** {thread_id}", + f"**Status:** {thread.status}", + "", + ] + + # Plan state (from plan file, if exists) + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _plan_path = Path(_data_dir) / ".formicos" / "plans" / f"{thread_id}.md" + if _plan_path.is_file(): + _plan_text = _plan_path.read_text(encoding="utf-8")[:1000] + lines.append("## Active Plan") + lines.append(_plan_text) + lines.append("") + except (OSError, TypeError, AttributeError): + pass + + # Colony outcomes this session + lines.append("## Colony Activity") + lines.append( + f"- {thread.completed_colony_count} completed, " + f"{thread.failed_colony_count} failed, " + f"{thread.colony_count} total" + ) + + # Workflow step status + if thread.workflow_steps: + completed = sum(1 for s in thread.workflow_steps if s.get("status") == "completed") + pending = sum(1 for s in thread.workflow_steps if s.get("status") == "pending") + lines.append(f"- Workflow: {completed} steps completed, {pending} pending") + + # Last few Queen decisions (from conversation, last 5 queen messages) + queen_msgs = [m for m in thread.queen_messages if m.role == "queen"] + if queen_msgs: + lines.append("") + lines.append("## Recent Queen Activity") + for msg in queen_msgs[-5:]: + content = msg.content[:200] if hasattr(msg, "content") else "" + if content: + lines.append(f"- {content}") + + summary_text = "\n".join(lines) + + # Write to file + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _session_dir = Path(_data_dir) / ".formicos" / "sessions" + _session_dir.mkdir(parents=True, exist_ok=True) + _session_path = _session_dir / f"{thread_id}.md" + _session_path.write_text(summary_text, encoding="utf-8") + except (OSError, TypeError, AttributeError): + log.warning("session_summary.write_failed", + workspace_id=workspace_id, thread_id=thread_id) +``` + +**2. Always-inject session summary in `respond()`.** + +In `queen_runtime.py`, `respond()`: after the memory retrieval block +(lines 778-793) and project context block (lines 795-815), inject the +session summary file. **NOT gated on `if not thread.queen_messages`** — +always inject if the file exists. Cap at ~1000 tokens (~4000 chars). + +```python +# Wave 68: session continuity — always inject prior session summary +try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _session_path = ( + Path(_data_dir) / ".formicos" / "sessions" / f"{thread_id}.md" + ) + if _session_path.is_file(): + _session_text = _session_path.read_text(encoding="utf-8")[:4000] + if _session_text: + # Insert after system prompts, before conversation history + _ss_insert = 0 + for _si, _sm in enumerate(messages): + if _sm.get("role") != "system": + _ss_insert = _si + break + else: + _ss_insert = len(messages) + messages.insert(_ss_insert, { + "role": "system", + "content": f"# Prior Session Context\n{_session_text}", + }) +except (OSError, TypeError, AttributeError): + pass +``` + +This mirrors the project_context injection pattern (queen_runtime.py:795-815) +exactly — same `data_dir` resolution, same try/except, same insert-after- +system-prompts logic. + +**3. Shutdown hook in `runtime.py`.** + +In `runtime.py`, find the shutdown/cleanup sequence. Add a call that +iterates active workspaces and their threads, calling +`queen_runtime.emit_session_summary()` for each thread with recent +activity (any `QueenMessage` in the last 30 minutes). + +Look for an existing `async def shutdown()` or `async def cleanup()` method +on the `Runtime` class. If none exists, add a method and wire it from the +application shutdown sequence in `app.py`. + +```python +# In Runtime shutdown sequence: +async def _emit_session_summaries(self) -> None: + """Emit session summaries for recently active threads on shutdown.""" + cutoff = datetime.now(UTC) - timedelta(minutes=30) + for ws_id, ws in self.projections.workspaces.items(): + for thread_id, thread in ws.threads.items(): + if not thread.queen_messages: + continue + # Check last message timestamp + last_msg = thread.queen_messages[-1] + ts = _parse_projection_timestamp( + last_msg.timestamp if hasattr(last_msg, "timestamp") else "" + ) + if ts and ts > cutoff: + try: + await self.queen.emit_session_summary(ws_id, thread_id) + except Exception: + log.warning("shutdown.session_summary_failed", + workspace_id=ws_id, thread_id=thread_id) +``` + +Note: `_parse_projection_timestamp` is already defined in `queen_runtime.py` +(line 112). You may need to extract it or duplicate it depending on where +the shutdown method lives. + +### Tests (`tests/unit/surface/test_session_continuity.py`) + +4 tests: + +1. **`test_emit_session_summary_writes_file`** — Mock projections with a + thread that has colonies and messages. Call `emit_session_summary()`. + Assert `.formicos/sessions/{thread_id}.md` exists with expected sections. + +2. **`test_session_injection_always_fires`** — Write a session file. Create + a thread with existing queen_messages (non-empty). Call `respond()` mock + path. Assert session summary appears in messages list. + +3. **`test_session_injection_caps_at_4000_chars`** — Write an oversized + session file. Assert injected text is truncated. + +4. **`test_session_summary_includes_plan_state`** — Write both a plan file + and call `emit_session_summary()`. Assert session summary references + the plan. + +--- + +## Acceptance Gates + +All gates must pass before declaring done: + +- [ ] `propose_plan` writes `.formicos/plans/{thread_id}.md` +- [ ] `mark_plan_step` reads/writes plan file, updates step status +- [ ] `_build_thread_context()` includes plan file content (capped at 2000 chars) +- [ ] Plan survives conversation compaction (10+ messages) +- [ ] Plan file does NOT touch `ThreadProjection.active_plan` +- [ ] Session summary writes to `.formicos/sessions/{thread_id}.md` on shutdown +- [ ] Session summary is NOT a `MemoryEntryCreated` event +- [ ] Session injection fires on every `respond()`, not gated on empty messages +- [ ] Session injection capped at ~4000 chars +- [ ] No new event types added (event count stays at 69) +- [ ] No changes to `projections.py` + +## Validation + +```bash +# Unit tests +pytest tests/unit/surface/test_plan_attention.py -v +pytest tests/unit/surface/test_session_continuity.py -v + +# Full CI +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_68/team_b_prompt.md b/docs/waves/wave_68/team_b_prompt.md new file mode 100644 index 0000000..fc6cf21 --- /dev/null +++ b/docs/waves/wave_68/team_b_prompt.md @@ -0,0 +1,306 @@ +# Wave 68 - Team B: Queen Intelligence & Context Scaling + +**Theme:** The Queen reasons with structured, source-labeled evidence and +scales cleanly from small to large context models. + +## Context + +Read `docs/waves/wave_68/design_note.md` first. You are bound by all three +invariants. + +Read `CLAUDE.md` for hard constraints (event closed union, layer rules, etc.). +Read `AGENTS.md` for repo norms. This prompt overrides stale root `AGENTS.md` +for file ownership within this wave. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/queen_budget.py` - **new**, `QueenContextBudget` + + `compute_queen_budget()` +- `src/formicos/surface/queen_runtime.py` - budget threading in + `_build_messages()`, constant replacement in `respond()`, + `_build_deliberation_frame()` new helper, deliberation detection/injection +- `tests/unit/surface/test_queen_budget.py` - **new** +- `tests/unit/surface/test_deliberation_frame.py` - **new** + +## Do Not Touch + +- `src/formicos/surface/queen_tools.py` - Team A owns plan tools; Team C owns + `_list_addons()` and workspace tags tooling +- `src/formicos/surface/projections.py` +- `src/formicos/core/types.py` +- `src/formicos/core/events.py` +- `src/formicos/surface/knowledge_catalog.py` +- `src/formicos/surface/colony_manager.py` +- `_build_thread_context()` in `queen_runtime.py` - Team A owns bottom + insertion; Team C owns top insertion +- `config/caste_recipes.yaml` - Team C owns +- any frontend files + +## Overlap Coordination + +- Team A touches `respond()` for session-summary injection after the + project-context area. You own early budget computation and the + deliberation-frame injection path before the LLM call. +- Team C makes addon capability metadata visible. Your deliberation frame + should prefer that metadata once it exists, but must still work before + Team C lands. +- You own `_build_messages()` exclusively. + +--- + +## Track 3: Dynamic Context Budget (ADR-051) + +### Problem + +`queen_runtime.py` still relies on seven hardcoded caps: + +| Constant | Current value | +|----------|---------------| +| `_THREAD_TOKEN_BUDGET` | 6000 tokens | +| `_RECENT_WINDOW` | 10 messages | +| `_QUEEN_TOOL_OUTPUT_CAP` | 2000 chars | +| `_QUEEN_MAX_TOOL_HISTORY_CHARS` | 16000 chars | +| project context cap | 2000 chars | +| tool memory join cap | 6000 chars | +| cloud routing threshold | 2000 tokens | + +These values are too small on large-context models and too rigid to express +how the Queen should use new context sources such as plans, session summaries, +tags, and deliberation frames. + +### Non-negotiable rule + +Every computed slot must use: + +```python +slot_value = max(current_default, proportional_value) +``` + +This is the no-regression guarantee. Proportional scaling may grow budgets. +It must never shrink behavior below current defaults. + +### Implementation + +**1. New module `src/formicos/surface/queen_budget.py`.** + +Add a small surface-only module: + +```python +from dataclasses import dataclass + + +@dataclass(frozen=True) +class QueenContextBudget: + system_prompt: int + memory_retrieval: int + project_context: int + thread_context: int + tool_memory: int + conversation_history: int +``` + +Budget fractions: + +- `system_prompt`: 15% +- `memory_retrieval`: 15% +- `project_context`: 10% +- `thread_context`: 20% +- `tool_memory`: 10% +- `conversation_history`: 30% + +Fallback floors must match current behavior exactly: + +- `system_prompt=2000` +- `memory_retrieval=1500` +- `project_context=500` +- `thread_context=1500` +- `tool_memory=4000` +- `conversation_history=6000` + +`compute_queen_budget(context_window, output_reserve)` should: + +- read `context_window` as the model's total context window +- subtract `output_reserve` +- compute each proportional slot +- return `max(fallback, proportional)` for every slot +- return the fallback object unchanged when `context_window` is missing, + invalid, or too small + +**2. Compute the budget in `respond()`.** + +Use the same model-registry lookup pattern already used by +`_queen_max_tokens()`. Read: + +- `rec.context_window` from `ModelRecord` +- output reserve from `_queen_max_tokens(workspace_id)` + +Then: + +```python +budget = compute_queen_budget(_ctx_window, _output_reserve) +``` + +Pass `budget` into `_build_messages()`. + +**3. Thread the budget through `_build_messages()`.** + +Change the signature to accept an optional budget object. If none is supplied, +use the fallback budget from `queen_budget.py`. + +Replace hardcoded caps in `_build_messages()`: + +- tool-memory join cap -> `budget.tool_memory * 4` +- any other local hardcoded limit there -> budget-backed equivalent + +**4. Replace the remaining hardcoded caps in `respond()`.** + +Replace: + +- project-context file slice -> `budget.project_context * 4` +- cloud routing threshold -> `budget.system_prompt` + +**5. Replace `_compact_thread_history()` inputs.** + +Preferred approach: + +```python +def _compact_thread_history( + queen_messages: list[Any], + token_budget: int = 6000, + recent_window: int = 10, +) -> list[dict[str, str]]: +``` + +Call it with: + +```python +_compact_thread_history( + thread.queen_messages, + token_budget=budget.conversation_history, + recent_window=max(5, budget.conversation_history // 600), +) +``` + +If you discover a lower-risk seam that preserves the same behavior, note it in +the summary, but do not fall back to the old constants for normal operation. + +**6. Add a lightweight debug log.** + +Emit one debug log showing model, context window, output reserve, and the final +slot allocation. Keep it small and deterministic. + +--- + +## Track 4: Deliberation Frame Assembly + +### Problem + +The Queen's `CLASSIFY -> DIRECT -> COLONY` flow is guided mostly by the system +prompt. On exploratory or open-ended operator messages +(`_DELIBERATION_RE` in `queen_intent_parser.py`), the Queen still lacks a +structured pre-LLM snapshot of: + +- institutional memory coverage +- recent colony outcomes +- addon-owned corpus coverage +- thread momentum +- active intelligence alerts + +The remaining routing weakness is that addon coverage can degrade into a tool +inventory. A strong router needs source-labeled evidence: what is institutional +memory, what is docs/code corpus coverage, and how each source is meant to be +used. + +### Implementation + +**1. Add `_build_deliberation_frame()`.** + +Create a helper on `QueenRuntime` that assembles a deterministic frame from +projections only. No LLM calls. No network. + +Use sections like: + +- `## Institutional Memory Coverage` +- `## Recent Colony Outcomes` +- `## Addon Corpus Coverage` +- `## Thread Progress` +- `## Active Alerts` + +For institutional memory, summarize top domains by entry count and average +confidence. + +For recent outcomes, summarize the latest few outcomes with success marker, +strategy, rounds, and cost. + +For addon coverage, prefer manifest-backed capability metadata once Team C +lands. The goal is routing signal, not just tool names. The frame should +ideally read like: + +```text +- docs-index: content documentation; files **/*.md, **/*.rst; search via search_docs +- codebase-index: content source_code; files **/*.py, **/*.ts; search via semantic_search_code +``` + +If the manifest or runtime seam for capability metadata is not yet present, +fall back gracefully to addon tool descriptions. Final truth pass should happen +after Team C lands so the corpus-coverage section reflects real capability +metadata. + +If an addon already exposes an obvious refresh/index trigger or handler, you +may surface it here too, but do not invent a new core contract from Team B. + +For alerts, reuse the existing proactive-intelligence path if available, but +guard it tightly. + +**2. Inject the frame before the LLM call.** + +In `respond()`, after building messages and before entering the tool loop, +check the latest operator message for `_DELIBERATION_RE`. + +If it matches: + +- build the deliberation frame +- cap it at `budget.thread_context * 4` chars (fallback 1500 chars if needed) +- insert it as a system message before the first non-system message + +This is pre-context for reasoning, routing, and planning. It is not a +post-response annotation. + +### Tests + +Create `tests/unit/surface/test_deliberation_frame.py` with at least: + +1. `test_frame_includes_domains_and_outcomes` +2. `test_frame_caps_at_budget` +3. `test_deliberation_triggers_on_exploratory_message` +4. `test_frame_empty_for_bare_workspace` +5. `test_frame_prefers_capability_metadata_when_available` + +The last test should mock addon manifests with `content_kinds`, `path_globs`, +and `search_tool`, then assert the frame labels addon corpus coverage by source +type rather than only by tool name. + +--- + +## Acceptance Gates + +- [ ] `queen_budget.py` exists with `QueenContextBudget` and `compute_queen_budget()` +- [ ] Every slot uses `max(fallback, proportional)` +- [ ] Fallback values match the current hardcoded defaults +- [ ] `context_window` is read from `ModelRecord.context_window` +- [ ] Output reserve comes from `_queen_max_tokens()` +- [ ] `_build_messages()` and `respond()` consume budget values instead of the old hardcoded caps +- [ ] Deliberation frame is assembled from projections only +- [ ] Deliberation frame is injected before the LLM call +- [ ] Addon coverage is source-labeled and prefers capability metadata when available +- [ ] Budget logging is present at debug level +- [ ] No new event types are added + +## Validation + +```bash +pytest tests/unit/surface/test_queen_budget.py -v +pytest tests/unit/surface/test_deliberation_frame.py -v + +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_68/team_c_prompt.md b/docs/waves/wave_68/team_c_prompt.md new file mode 100644 index 0000000..8d2664e --- /dev/null +++ b/docs/waves/wave_68/team_c_prompt.md @@ -0,0 +1,255 @@ +# Wave 68 - Team C: Extension Contract & Routing + +**Theme:** Make the Queen a better router by surfacing corpus coverage +declaratively through addon metadata and workspace taxonomy. + +## Context + +Read `docs/waves/wave_68/design_note.md` first. You are bound by all three +invariants. In particular, invariant 3 says the Queen is the router/composer +across sources. + +Flexibility in this wave means the Queen can answer two questions without +hardcoded addon names: + +- which source should I search? +- which addon should I refresh/reindex? + +Read `CLAUDE.md` for hard constraints (event closed union, layer rules, etc.). +Read `AGENTS.md` for repo norms. This prompt overrides stale root +`AGENTS.md` for file ownership within this wave. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/addon_loader.py` - `AddonManifest` field additions +- `addons/codebase-index/addon.yaml` - capability metadata +- `addons/docs-index/addon.yaml` - capability metadata +- `addons/git-control/addon.yaml` - capability metadata if truly meaningful +- `addons/proactive-intelligence/addon.yaml` - capability metadata if truly meaningful +- `src/formicos/surface/queen_tools.py` - `_list_addons()` text output + + `set_workspace_tags` +- `config/caste_recipes.yaml` - Queen routing rule + tool list updates +- `src/formicos/surface/queen_runtime.py` - small tag injection at the top of + `_build_thread_context()` +- `tests/unit/addons/test_addon_capability.py` - **new** +- `tests/unit/surface/test_workspace_taxonomy.py` - **new** + +## Do Not Touch + +- `src/formicos/surface/projections.py` +- `src/formicos/core/types.py` +- `src/formicos/core/events.py` +- `src/formicos/surface/knowledge_catalog.py` +- `src/formicos/surface/colony_manager.py` +- `_build_messages()` in `queen_runtime.py` - Team B owns +- `respond()` in `queen_runtime.py` - Teams A/B own +- any frontend files + +## Overlap Coordination + +- Team A inserts plan text at the bottom of `_build_thread_context()`. You only + insert tags near the top after the goal line. +- Team A adds `mark_plan_step` to the Queen tool list. You also touch + `caste_recipes.yaml` to add `set_workspace_tags` and routing guidance. +- Team B wants capability-backed addon coverage in the deliberation frame. + Your `list_addons()` text and manifest metadata are the routing truth it + should eventually prefer. + +--- + +## Track 5: Addon Capability Metadata + +### Problem + +`_list_addons()` currently exposes mostly tool and handler inventory. That tells +the Queen what exists, but not what each addon actually covers. A strong router +needs model-visible answers to: + +- what corpus type does this addon own? +- what files or paths does it cover? +- what is the primary search path? +- if the addon already supports refresh/reindex, how do I reach it? + +Without that, the Queen has to guess and burn tool turns learning the shape of +the system. + +### Implementation + +**1. Extend `AddonManifest` with three optional fields.** + +Add these fields in `addon_loader.py` with empty defaults: + +```python +content_kinds: list[str] = Field(default_factory=list) +path_globs: list[str] = Field(default_factory=list) +search_tool: str = Field(default="") +``` + +This stays additive and backward-compatible. + +Keep `content_kinds` free-form. Do not add a new enum for this wave. + +**2. Update real addon manifests.** + +Populate meaningful values for the addons that own corpora, especially: + +- `addons/codebase-index/addon.yaml` +- `addons/docs-index/addon.yaml` + +Only add metadata to `git-control` or `proactive-intelligence` if they truly +own a searchable corpus. Do not fabricate coverage. + +Use the actual search tool names defined in each manifest. + +**3. Make `_list_addons()` tell the Queen what matters.** + +Update `_list_addons()` so the **text string** returned to the model includes a +capability summary per addon, for example: + +```text +**docs-index**: Index and search workspace documentation + Content: documentation + Files: **/*.md, **/*.rst, **/*.txt + Search via: search_docs + Index via: incremental_reindex +``` + +Rules: + +- capability data must be in the first tuple element (text), not hidden in a + side dict +- group by addon, not by raw tool spec alone +- if an addon already exposes an obvious refresh/index trigger or handler, + surface that path in text as `Index via: ...` +- do not add a new manifest field for refresh/index unless you discover the + existing manifest cannot represent it at all + +This is the core routing seam for the Queen. + +**4. Add a Queen routing rule in `caste_recipes.yaml`.** + +Add a short system-prompt rule that says: + +- call `list_addons` when the operator asks to search, index, or learn from + content +- route by `content_kinds` and `path_globs` +- use `memory_search` only for institutional memory / experience / conventions +- use addon search tools for addon-owned corpora +- use the addon's surfaced refresh/index path for reindex requests +- if multiple addons match, prefer the narrowest `path_globs` or the + operator's explicit path +- treat workspace tags as hints, not hard constraints + +Also update the Queen tool count and add both `mark_plan_step` and +`set_workspace_tags` to the tool list. + +### Tests + +Create `tests/unit/addons/test_addon_capability.py` with at least: + +1. `test_manifest_parses_with_capability_fields` +2. `test_manifest_parses_without_capability_fields` +3. `test_list_addons_includes_capability_text` +4. `test_list_addons_includes_refresh_path_when_present` + +The last test should prove that the model-visible text distinguishes the +search path from the refresh/index path when the addon already exposes one. + +--- + +## Track 6: Soft Workspace Taxonomy + +### Problem + +The Queen still lacks cheap, explicit workspace priors. Names are often too +opaque to tell whether a workspace is about auth, docs, Python, infra, or +something else. That makes routing and plan composition weaker than they need +to be. + +The fix here is **soft taxonomy**, not validation. Tags should steer the Queen, +not reject new concepts. + +### Correct event/projection contract + +`WorkspaceConfigChanged` uses: + +- `field` +- `old_value` +- `new_value` + +Do not use `key` / `value`. + +Workspace config is read from `ws.config`, not from a separate projection map. + +### Implementation + +**1. Add `set_workspace_tags`.** + +Add a Queen tool in `queen_tools.py` that: + +- accepts `tags: list[str]` +- normalizes to lowercase, stripped strings +- caps to 20 tags and 50 chars per tag +- emits `WorkspaceConfigChanged` with: + - `field="taxonomy_tags"` + - `old_value` from the existing config entry if present + - `new_value` as JSON + +Follow the exact event-emission pattern already used by the config tools in +`queen_tools.py`. Do not invent a new one. + +**2. Inject tags near the top of `_build_thread_context()`.** + +At the top of `_build_thread_context()`, right after the goal line, read: + +```python +ws.config.get("taxonomy_tags") +``` + +Parse the JSON string and render a single line like: + +```text +Tags: python, auth, web-api +``` + +This should be a small hint, not a giant block. + +**3. Add a gentle hint for brand-new tagless workspaces.** + +If the workspace has no tags and fewer than 3 threads, append a brief hint +about `set_workspace_tags`. The hint should disappear once tags exist or the +workspace is no longer brand new. + +### Tests + +Create `tests/unit/surface/test_workspace_taxonomy.py` with at least: + +1. `test_set_workspace_tags_emits_config_event` +2. `test_tags_normalized_and_capped` +3. `test_thread_context_includes_tags` +4. `test_auto_suggest_nudge_for_tagless_workspace` + +--- + +## Acceptance Gates + +- [ ] `AddonManifest` parses with and without the new optional fields +- [ ] Existing manifests remain backward-compatible +- [ ] Updated manifests contain real capability values for real corpus addons +- [ ] `_list_addons()` text includes `Content:`, `Files:`, and `Search via:` +- [ ] `_list_addons()` distinguishes search path from refresh/index path when present +- [ ] Queen system prompt includes routing guidance based on source coverage +- [ ] `set_workspace_tags` emits `WorkspaceConfigChanged` with `field` / `old_value` / `new_value` +- [ ] Tags are read from `ws.config` +- [ ] Tags are injected near the top of `_build_thread_context()` +- [ ] Tagless-workspace hint fires only for small/new workspaces +- [ ] No new event types are added + +## Validation + +```bash +pytest tests/unit/addons/test_addon_capability.py -v +pytest tests/unit/surface/test_workspace_taxonomy.py -v + +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_68/wave_68_plan.md b/docs/waves/wave_68/wave_68_plan.md new file mode 100644 index 0000000..d71efc8 --- /dev/null +++ b/docs/waves/wave_68/wave_68_plan.md @@ -0,0 +1,175 @@ +# Wave 68: The Strategic Queen + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 67.5 +**Theme:** Make the Queen stateful, context-aware, and a better router across +institutional memory and addon-owned corpora. + +## Packet Authority + +This file is the dispatch overview. The prompts and ADR are the authority for +implementation detail: + +- `docs/waves/wave_68/design_note.md` +- `docs/waves/wave_68/team_a_prompt.md` +- `docs/waves/wave_68/team_b_prompt.md` +- `docs/waves/wave_68/team_c_prompt.md` +- `docs/decisions/051-dynamic-context-caps.md` + +`docs/waves/wave_68/addon_extension_contract.md` is retained as a compact +compatibility note only. The old long-form Wave 68 plan is superseded. + +## Locked Boundaries + +Wave 68 is intentionally flexible at the routing layer and conservative at the +memory layer. + +- `memory_entries` remain distilled institutional knowledge only. +- Raw corpora stay in addon-owned indices and keep their own retrieval rules. +- The Queen is the router/composer across sources; colonies still work from + injected context plus normal `memory_search`. +- Workspace taxonomy is soft guidance for routing, not hard validation. +- No new event types. No frontend work. No changes to core retrieval math. + +## Scope + +| Track | Outcome | Team | Dependency | +|------|---------|------|------------| +| 1 | Plan file persistence + `mark_plan_step` | A | None | +| 2 | Session continuity via `.formicos/sessions/` | A | Track 1 | +| 3 | Dynamic Queen context caps (ADR-051) | B | None | +| 4 | Deliberation frame with source-labeled routing context | B | Track 3; richer after Track 5 | +| 5 | Addon capability metadata + model-visible `list_addons` text | C | None | +| 6 | Soft workspace taxonomy + `set_workspace_tags` | C | Same packet as Track 5 | + +## What Flexibility Means In Wave 68 + +This wave does **not** broaden the core knowledge model. It makes the system +more adaptable by improving the Queen's control plane: + +- plans and session summaries are files, not memory entries +- addon manifests describe what each corpus index covers +- `list_addons` must surface that coverage in text the Queen can read +- the deliberation frame labels source types separately +- workspace tags bias routing without blocking novel concepts +- the Queen chooses when to search memory, search corpora, or refresh an index + +That is the right flexibility seam: better routing and composition without +polluting institutional memory. + +## Team Missions + +### Team A + +Own persistent Queen attention across sessions. + +- write proposal plans to `.formicos/plans/{thread_id}.md` +- add `mark_plan_step` +- inject the plan at the bottom of `_build_thread_context()` +- write deterministic session summaries to `.formicos/sessions/{thread_id}.md` +- inject prior-session context on every `respond()` when the file exists + +Hard boundary: do **not** touch `ThreadProjection.active_plan`. That field is +already used for `DelegationPlanPreview`. + +### Team B + +Own adaptive context sizing and pre-LLM deliberation support. + +- add `queen_budget.py` +- compute budgets from `ModelRecord.context_window` +- reserve output with `_queen_max_tokens()` +- use `max(fallback, proportional)` so budgets never shrink below current + behavior +- build a deliberation frame that labels institutional memory separately from + addon-owned corpora and thread momentum + +Hard boundary: no replay changes, no retrieval changes, no frontend changes. + +### Team C + +Own declarative routing metadata and workspace routing hints. + +- extend `AddonManifest` with optional routing metadata +- update addon manifests with real capability values +- make `_list_addons()` return capability data in the text string the Queen + reads +- surface the primary search path and, when the addon already exposes one, the + primary refresh/index path +- add `set_workspace_tags` +- inject workspace tags near the top of `_build_thread_context()` +- add a Queen prompt rule that routes by source coverage instead of hardcoded + addon names + +Hard boundary: no projection or event changes; routing metadata must stay +backward-compatible. + +## Merge Order + +Recommended merge order: + +1. Team A Track 1 +2. Team B Track 3 +3. Team C Tracks 5 and 6 +4. Team A Track 2 +5. Team B Track 4 +6. Final truth pass across prompts/docs + +Notes: + +- Team B can build Track 4 before Team C lands, but final acceptance should + happen after Track 5 so the addon coverage section reflects real capability + metadata instead of only tool inventory. +- Team A and Team C both touch `_build_thread_context()` and + `config/caste_recipes.yaml`, but the insertions are intentionally disjoint. + +## Global Do Not Touch + +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/surface/projections.py` +- `src/formicos/surface/knowledge_catalog.py` +- frontend files + +Wave 68 is a control-plane wave, not a schema-expansion wave. + +## Acceptance Focus + +- no use of `ThreadProjection.active_plan` for proposal-shaped data +- no `MemoryEntryCreated` path for plans or session summaries +- `context_window` read from `ModelRecord.context_window` +- output reserve derived from `_queen_max_tokens()` +- budget math uses `max(fallback, proportional)` in every slot +- deliberation frame is injected before the LLM call +- deliberation frame labels source types, especially addon-owned corpora +- `_list_addons()` text includes `content_kinds`, `path_globs`, search path, + and any primary refresh/index path already exposed by the addon +- `set_workspace_tags` uses `field` / `old_value` / `new_value` and reads + from `ws.config` +- no new event types + +## Validation + +```bash +pytest tests/unit/surface/test_plan_attention.py -v +pytest tests/unit/surface/test_session_continuity.py -v +pytest tests/unit/surface/test_queen_budget.py -v +pytest tests/unit/surface/test_deliberation_frame.py -v +pytest tests/unit/addons/test_addon_capability.py -v +pytest tests/unit/surface/test_workspace_taxonomy.py -v + +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 68 succeeds if the Queen becomes more capable without becoming blurrier: + +- it remembers plans and sessions without polluting memory +- it scales gracefully from small to large context models +- it sees the workspace more clearly before deciding +- it routes across memory, docs, code, and future corpora with explicit source + boundaries + +That gives you a more flexible knowledge system by strengthening the Queen, +not by loosening the core memory model. diff --git a/docs/waves/wave_69/team_a_prompt.md b/docs/waves/wave_69/team_a_prompt.md new file mode 100644 index 0000000..f4ad4f9 --- /dev/null +++ b/docs/waves/wave_69/team_a_prompt.md @@ -0,0 +1,465 @@ +# Wave 69 — Team A: Enriched Queen Chat + +**Theme:** The Queen chat shows what happened, not just that something +happened. + +## Context + +Read `docs/waves/wave_69/wave_69_plan.md` first. This is a rendering wave — +the backend data already exists. Your job is to surface it in the chat. + +Read `CLAUDE.md` for hard constraints. Read `docs/design-system-v4.md` for +the Void Protocol design system — every new component must follow it. + +## Your Files (exclusive ownership) + +### Frontend +- `frontend/src/components/queen-chat.ts` — inline progress cards, + consulted-sources chips, diff preview enrichment, plan progress bar +- `frontend/src/components/colony-progress-card.ts` — **new**, reactive + inline colony progress card +- `frontend/src/components/consulted-sources.ts` — **new**, citation chip + strip +- `frontend/src/types.ts` — new type additions only (additive) +- `frontend/src/state/store.ts` — colony state subscription helpers + (additive) + +### Backend +- `src/formicos/surface/queen_runtime.py` — `consulted_entries` metadata + on QueenMessage emission (small addition) +- `src/formicos/surface/runtime.py` — `retrieve_relevant_memory()` return + type change only (return `(str, list)` tuple instead of `str`) +- `src/formicos/surface/routes/api.py` — thread plan read endpoint (small + addition) + +### Tests +- `tests/unit/surface/test_plan_read_endpoint.py` — **new** + +## Do Not Touch + +- `frontend/src/components/knowledge-browser.ts` — Team B owns +- `frontend/src/components/knowledge-view.ts` — Team B owns +- `frontend/src/components/settings-view.ts` — Team C owns +- `frontend/src/components/model-registry.ts` — Team C owns +- `frontend/src/components/addons-view.ts` — Team C owns +- `src/formicos/surface/projections.py` — no projection changes +- `src/formicos/core/events.py` — no new events +- `src/formicos/core/types.py` — no type changes +- `src/formicos/surface/knowledge_catalog.py` — no retrieval changes +- `config/caste_recipes.yaml` — stable from Wave 68 + +## Overlap Coordination + +- Team B may add search result types to `types.ts`. Team C does not touch + types. All additions are additive — no conflicts. +- Team B may add state to `store.ts` for search results. Your colony + subscription additions are in a different area. Additive. +- `formicos-app.ts` — Team C may adjust nav items. You do not touch the + nav. No conflict. + +--- + +## Track 1: Inline Colony Progress Cards + +### Problem + +When the Queen spawns a colony, the operator must navigate to the colony +detail tab to see what's happening. The chat shows a preview card (spawn +intent) and later a result card (completion), but nothing in between. The +operator can't see progress without leaving the chat. + +### Data already available + +The store already processes these WebSocket events in real time: + +- `ColonySpawned` (store.ts:247) — creates tree node with initial state +- `RoundStarted` (store.ts:369) — updates `colony.round` +- `RoundCompleted` (store.ts:374) — pushes to `convergenceHistory` +- `ColonyCompleted` / `ColonyFailed` — terminal state + +The colony tree node carries: `round`, `maxRounds`, `status`, `caste`, +`strategy`, `convergenceHistory[]`, `cost`. + +### Implementation + +**1. New component `colony-progress-card.ts`.** + +A reactive Lit component that subscribes to store updates for a specific +colony and renders inline progress: + +```typescript +@customElement('fc-colony-progress') +export class ColonyProgressCard extends LitElement { + @property() colonyId = ''; + @property() task = ''; + private _unsub?: () => void; + + connectedCallback() { + super.connectedCallback(); + this._unsub = store.subscribe(() => this.requestUpdate()); + } + + disconnectedCallback() { + super.disconnectedCallback(); + this._unsub?.(); + } + + render() { + const node = findNode(store.state.tree, this.colonyId); + if (!node) return nothing; + // ... render progress + } +} +``` + +This follows the exact subscription pattern from `colony-detail.ts` +(lines 285–307): subscribe on connect, unsubscribe on disconnect, +`requestUpdate()` on store change. + +**Render states:** + +- **Running:** Glass card with progress bar (round N / maxRounds), caste + badge, strategy label, cost accumulator. If `convergenceHistory` has + data, show a 3-line sparkline (tiny inline SVG, ~40x16px). +- **Completed:** Transition to compact result: success/failure indicator, + files changed count, cost, quality score. Use `fc-dot` for status. +- **Failed:** Same compact result with danger styling. + +Style with Void Protocol tokens. Card should be narrow — same width as +a text message, not full-width like preview cards. + +**2. Mount progress cards in `queen-chat.ts`.** + +In the message render dispatch (queen-chat.ts:212–345), after a +`preview_card` message that carries `meta.tool === 'spawn_colony'` and +`meta.colony_id`, insert a `` card. The progress card +renders below the preview card and updates reactively as the colony runs. + +When the colony completes, the Queen emits a `result_card` message. At +that point the progress card can transition to its compact completed state +or be replaced by the result card. Prefer transition — it's less jarring. + +**Detection logic:** When iterating messages, look for preview_card +messages where `meta?.tool === 'spawn_colony'` or where the action dict +from `queen_tools.py` includes `colony_id`. The colony ID is available +in `meta.colony_id` (see queen_tools.py spawn return at line 1536: +`{"tool": "spawn_colony", "colony_id": colony_id}`). + +For `spawn_parallel`, the meta carries `colony_ids: string[]`. Render +one progress card per colony, or a grouped progress card that shows all +colonies in the parallel group with individual progress rows. + +**3. Handle terminal state transition.** + +When a `result_card` message arrives for the same `colonyId`, the +progress card should stop updating. Check if a later message in the +array has `render === 'result_card'` and matching `meta.colonyId`. If +so, render the progress card in its completed compact state (not the +full running state). + +--- + +## Track 2: Consulted Sources + +### Problem + +When the Queen's response is informed by knowledge entries (via the +deliberation frame from Wave 68 Track 4, or via `memory_search` tool +results), the operator can't see which knowledge was consulted. The Queen +says "based on prior experience" but doesn't link to the actual entry. + +### Correct framing + +This is "consulted sources," not "citations." The deliberation frame +injects knowledge entries as context before the LLM call. The Queen +doesn't explicitly cite them — they were available during reasoning. The +UI should reflect this honestly: "Consulted Knowledge" or "Sources +Available," not "References" or "Citations." + +### Implementation + +**1. Backend: emit `consulted_entries` on QueenMessage metadata.** + +In `queen_runtime.py`, in `respond()`, after the deliberation frame is +built (lines 1047–1070), record the knowledge entry IDs that were +injected. The deliberation frame's `_build_deliberation_frame()` method +assembles institutional memory coverage from projections — it has access +to the top domains and their entry counts. + +Add to the response metadata: + +```python +# After deliberation frame injection and before the LLM call +_consulted: list[dict[str, Any]] = [] +``` + +Populate `_consulted` from real knowledge entries only: + +1. **Memory retrieval results** — `respond()` calls + `self._runtime.retrieve_relevant_memory()` (queen_runtime.py:892) + which delegates to `catalog.search()` (runtime.py:1183) and returns + a **formatted string**, not the structured results list. + + To capture structured entries, either: + - **(preferred)** Have `retrieve_relevant_memory()` return a + `(text, items)` tuple instead of just the string. The items list + is already built at runtime.py:1183 — return it alongside the + formatted text. Update the call site in `respond()` to unpack both. + - **(alternative)** Call `catalog.search()` directly in `respond()` + before `retrieve_relevant_memory()`, but this duplicates the search. + + With the structured results available, extract the top 5: + ```python + for _item in _memory_items[:5]: + _consulted.append({ + "id": _item.get("id", ""), + "title": _item.get("title", "")[:80], + "confidence": round(_item.get("confidence", 0.5), 2), + }) + ``` + + **Note:** `retrieve_relevant_memory()` lives in `runtime.py` (line + 1163), not in `queen_runtime.py`. You own changes in + `queen_runtime.py` but need a small edit to `runtime.py` for the + return type change. This is the only runtime.py touch — keep it + minimal. + +Do **not** fabricate synthetic consulted entries like `__deliberation__` +or attach fake `confidence: 1.0` summaries. If you want to preserve the +fact that a deliberation frame was present, that belongs in a separate +label or boolean metadata field, not in the consulted-entry list. + +Attach to the QueenMessage emission: + +```python +if _consulted: + # Add to the meta dict of the response QueenMessage + _response_meta["consulted_entries"] = _consulted +``` + +The exact insertion point: `_emit_queen_message()` is called at the end +of the tool loop / response generation. The meta dict is already passed +through. Add `consulted_entries` to it. + +**2. Frontend: `consulted-sources.ts` component.** + +A horizontal strip of clickable chips rendered below a Queen message +when `meta.consulted_entries` is present. + +Each chip shows: +- Entry title (truncated to ~40 chars) +- Confidence indicator: `fc-dot` with status mapping + (`confidence >= 0.7` → loaded/green, `>= 0.4` → pending/gold, + else → error/red) +- Click navigates to the knowledge browser detail view for that entry ID + +Only render chips for real entry IDs. If a separate summary label exists, +render it as plain muted text, not as a pseudo-entry chip. + +**3. Mount in `queen-chat.ts`.** + +In the text message render path (queen-chat.ts:329–344), after the +message text, check `m.meta?.consulted_entries`. If present and non-empty, +render ``. + +--- + +## Track 3: Inline Diff Preview + +### Problem + +When a colony produces file changes (via `edit_file`), the result card +shows success/failure and cost, but not what changed. The operator must +navigate to workspace browser to see the diff. + +### Data already available + +- `EditProposalMeta` (types.ts:244–249) carries `filePath`, `diff`, + `reason`, `colonyId`. This is already rendered by `fc-edit-proposal`. +- `ResultCardMeta` (types.ts:212–226) carries `colonyId` but not diff + data directly. + +### Implementation + +**1. Enhance `fc-result-card` with a diff summary section.** + +When a result card's colony produced file changes, show a compact diff +summary below the result stats. The diff data is not currently on +`ResultCardMeta` — it would need to come from the colony's artifact +output. + +**Simpler approach:** The `edit_proposal` card type already renders +diffs inline via `fc-edit-proposal`. For result cards, add a small +"Files Changed" badge showing the count. Clicking expands to show the +file list. Each file entry links to the workspace browser. + +The colony's file changes are tracked in the colony tree node — the +store processes `ArtifactCreated` events. Check whether the tree node +carries artifact/file data. If not, the simplest path is a small API +call: `GET /api/v1/colonies/{id}/transcript` already returns the full +transcript which includes file operations. + +**2. For `edit_proposal` cards: already handled.** + +The `fc-edit-proposal` component already renders inline diff. No changes +needed. This track is about enriching `result_card` — the post-completion +summary. + +**Scope note:** Keep this lightweight. A "Files: 3 changed" badge with +expandable file list is sufficient. Full inline diff rendering is +already handled by `edit_proposal` cards. Don't duplicate that work. + +--- + +## Track 4: Plan Progress Bar + +### Problem + +Wave 68 Track 1 persists plans to `.formicos/plans/{thread_id}.md`. The +Queen reads them for attention injection. But the operator can't see the +plan state at a glance without reading through conversation history. + +### Implementation + +**1. Backend: thread plan read endpoint.** + +Add a small GET endpoint in `routes/api.py`: + +```python +GET /api/v1/workspaces/{workspace_id}/threads/{thread_id}/plan +``` + +Read `.formicos/plans/{thread_id}.md` from the data directory. Parse the +`## Steps` section into structured data: + +```python +# Return shape: +{ + "exists": true, + "title": "Plan: Implement auth module", + "approach": "Use OAuth2 with JWT tokens", + "steps": [ + {"index": 0, "status": "completed", "description": "Set up OAuth provider", "colony_id": "abc123", "note": "Done, merged."}, + {"index": 1, "status": "started", "description": "Write integration tests", "colony_id": "def456"}, + {"index": 2, "status": "pending", "description": "Update API docs"} + ] +} +``` + +If no plan file exists, return `{"exists": false}`. + +The step format in the file (from Wave 68 Team A prompt): +```markdown +## Steps +- [0] [started] Implement auth module (colony abc12345) +- [1] [pending] Write integration tests +- [2] [completed] Update API docs — Done, merged. +``` + +Parse with a simple regex. This is a read-only endpoint — the Queen +writes plans via `propose_plan` and `mark_plan_step` tools. + +**2. Frontend: plan progress bar in `queen-chat.ts`.** + +Below the thread tabs and above the message list, render a persistent +plan progress bar when the active thread has a plan. + +On thread switch or mount, fetch the plan: +```typescript +const res = await fetch(`/api/v1/workspaces/${wsId}/threads/${threadId}/plan`); +const plan = await res.json(); +``` + +If `plan.exists`: +- Render a slim horizontal bar (glass card, 36px height) with: + - Plan title (truncated) + - Step indicators: small circles in a row. Green filled = completed, + accent ring = started, dim = pending, red = blocked. + - Step count: "2/5 completed" +- Clicking the bar expands to show the full step list with descriptions. + +Poll or refresh the plan on each new QueenMessage (the plan may update +when `mark_plan_step` is called). A simple fetch on message arrival is +sufficient — this is not a real-time subscription, it's a read-heavy +file that changes infrequently. + +**3. Write `tests/unit/surface/test_plan_read_endpoint.py`.** + +4 tests: +1. `test_plan_endpoint_returns_parsed_steps` — write a plan file with + steps, hit the endpoint, assert structured response. +2. `test_plan_endpoint_no_file_returns_not_exists` — no plan file, + assert `{"exists": false}`. +3. `test_plan_endpoint_parses_colony_ids` — step with `(colony abc123)`, + assert `colony_id` field populated. +4. `test_plan_endpoint_handles_malformed_gracefully` — garbage file + content, assert no crash, returns partial data. + +--- + +## Track 5: AG-UI Compatible Event Shapes (Tail Item) + +### Problem + +AG-UI defines standard event types for agent interaction. The existing +WebSocket events carry equivalent data but in custom shapes. A thin +compatibility layer would make future AG-UI client integration easier. + +### Priority + +This is a **tail item**. Only implement if Tracks 1–4 land cleanly. +Do not let this slow the card work. It is an internal adapter, not +user-facing. + +### Implementation + +Create `frontend/src/agui-compat.ts` (~50 lines). Map: + +| FormicOS Event | AG-UI Shape | Notes | +|----------------|-------------|-------| +| `ColonySpawned` | `STEP_STARTED` | `stepId = colonyId` | +| `ColonyCompleted` / `ColonyFailed` | `STEP_FINISHED` | `status = success/error` | +| `QueenToolCallCompleted` (if exists) | `TOOL_CALL_END` | Tool name + result | +| Thread state delta | `STATE_DELTA` | Partial state update | + +Export mapping functions, not a transport layer. The colony progress +card can optionally consume these mapped shapes internally. + +--- + +## Empty States + +First-run quality matters for a product surface. Handle: + +- **Empty thread (no messages):** Show a centered prompt hint: + "Ask the Queen anything — she'll plan, delegate, and track." +- **No active plan:** Plan progress bar hidden (not empty-state shown). +- **No colonies spawned yet:** No progress cards (nothing to show). +- **Colony completed with no file changes:** Result card shows stats + only, no diff section. + +--- + +## Acceptance Gates + +- [ ] Colony progress cards render inline and update reactively +- [ ] Progress cards transition cleanly on colony completion +- [ ] Consulted-sources chips appear below Queen responses when entries + were available +- [ ] Chips are labeled "Consulted Knowledge," not "Citations" +- [ ] Chips link to knowledge browser detail view +- [ ] Result cards show "Files Changed" count when applicable +- [ ] Plan progress bar renders below thread tabs when plan exists +- [ ] Plan read endpoint returns structured step data +- [ ] No new event types added +- [ ] No projection changes +- [ ] All new components follow Void Protocol design system +- [ ] `prefers-reduced-motion` respected on animations + +## Validation + +```bash +npm run build +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +pytest tests/unit/surface/test_plan_read_endpoint.py -v +``` diff --git a/docs/waves/wave_69/team_b_prompt.md b/docs/waves/wave_69/team_b_prompt.md new file mode 100644 index 0000000..95accbc --- /dev/null +++ b/docs/waves/wave_69/team_b_prompt.md @@ -0,0 +1,573 @@ +# Wave 69 — Team B: Unified Knowledge Search + +**Theme:** Search box first, tree view behind a toggle. One question, +answers from everywhere — institutional memory, documentation, codebase — +labeled by source. + +## Context + +Read `docs/waves/wave_69/wave_69_plan.md` first. This is a rendering wave. +The backend search capabilities already exist — this wave wires them to a +unified frontend surface with one new REST endpoint for fan-out. + +Read `CLAUDE.md` for hard constraints. Read `docs/design-system-v4.md` for +the Void Protocol design system. + +## Your Files (exclusive ownership) + +### Frontend +- `frontend/src/components/knowledge-browser.ts` — search-first redesign, + detail mode toggle, quick filter pills +- `frontend/src/components/knowledge-search-results.ts` — **new**, + source-grouped search result cards +- `frontend/src/types.ts` — search result types (additive only) + +### Backend +- `src/formicos/surface/routes/knowledge_api.py` — unified search endpoint + +### Tests +- `tests/unit/surface/test_unified_search.py` — **new** + +## Do Not Touch + +- `frontend/src/components/queen-chat.ts` — Team A owns +- `frontend/src/components/settings-view.ts` — Team C owns +- `frontend/src/components/model-registry.ts` — Team C owns +- `frontend/src/components/addons-view.ts` — Team C owns +- `src/formicos/surface/knowledge_catalog.py` — no retrieval changes +- `src/formicos/surface/projections.py` — no projection changes +- `src/formicos/core/events.py` — no new events +- `src/formicos/core/types.py` — no type changes +- `src/formicos/addons/codebase_index/` — do not modify addon code +- `src/formicos/addons/docs_index/` — do not modify addon code +- `config/caste_recipes.yaml` — stable from Wave 68 + +## Overlap Coordination + +- Team A adds types to `types.ts`. Your additions are different types + (search results). Additive, no conflict. +- Team A adds state to `store.ts`. You may add search result state. + Different areas, additive. +- `formicos-app.ts` — Team C may adjust nav items. You don't touch the + nav. The knowledge tab already exists. No conflict. +- `knowledge-view.ts` — You may need minor changes here if the tab + container structure changes for the search-first layout. Coordinate: + Team C does not touch this file. + +--- + +## Track 6: Unified Search Endpoint + +### Problem + +The frontend has no single API call that searches across institutional +memory AND addon-owned indices. The existing `GET /api/v1/knowledge/search` +(knowledge_api.py:73) only queries `knowledge_catalog.search()`, which +hits institutional memory (vector store + legacy skill bank). Addon search +tools (`semantic_search_code`, `semantic_search_docs`) are only callable +via Queen tool dispatch or direct addon handler invocation. + +### Implementation + +**1. New endpoint in `knowledge_api.py`.** + +``` +GET /api/v1/workspaces/{workspace_id}/search?q=...&sources=memory,docs,code +``` + +Parameters: +- `q` (required): natural language query string +- `sources` (optional): comma-separated list of source types to search. + Default: all available. Canonical values for this endpoint are + `memory`, `docs`, and `code`. Map those stable UI tokens to addon + capability metadata (`documentation`, `source_code`) internally. +- `limit` (optional): max results per source. Default 10, max 20. + +**2. Fan-out logic.** + +The endpoint needs access to: +- `knowledge_catalog` — for institutional memory search +- `runtime` — for addon handler resolution + +Fan out in parallel using `asyncio.gather`: + +```python +async def unified_search(request: Request) -> JSONResponse: + workspace_id = request.path_params["workspace_id"] + query = request.query_params.get("q", "") + if not query: + return _err_response("QUERY_REQUIRED") + sources_param = request.query_params.get("sources", "") + limit = min(int(request.query_params.get("limit", "10")), 20) + + results: list[dict] = [] + tasks: list[Coroutine] = [] + + # Always search institutional memory unless excluded + requested = set(sources_param.split(",")) if sources_param else None + if requested is None or "memory" in requested: + tasks.append(_search_memory(query, workspace_id, limit)) + + # Search addon indices based on capability metadata. + # Use the actual app-state seam, not a fictional runtime.addon_registry. + regs = getattr(request.app.state, "addon_registrations", []) + if regs: + for reg in regs: + manifest = reg.manifest + if not manifest.search_tool: + continue + if requested is not None: + want_docs = "docs" in requested + want_code = "code" in requested + kinds = set(manifest.content_kinds or []) + if not ( + (want_docs and "documentation" in kinds) + or (want_code and "source_code" in kinds) + ): + continue + tasks.append( + _search_addon(reg, query, workspace_id, limit) + ) + + gathered = await asyncio.gather(*tasks, return_exceptions=True) + for batch in gathered: + if isinstance(batch, list): + results.extend(batch) + return JSONResponse({"results": results, "total": len(results)}) +``` + +**3. Memory search helper.** + +```python +async def _search_memory(query, workspace_id, limit): + items = await knowledge_catalog.search( + query=query, workspace_id=workspace_id, top_k=limit, + ) + return [ + { + "source": "memory", + "source_label": "Institutional Memory", + "id": it.get("id", ""), + "title": it.get("title", ""), + "snippet": (it.get("summary") or it.get("content_preview") or "")[:200], + "score": round(it.get("score", 0), 4), + "metadata": { + "confidence": round(it.get("confidence", 0.5), 2), + "status": it.get("status", ""), + "domains": it.get("domains", []), + "sub_type": it.get("sub_type", ""), + }, + } + for it in items + ] +``` + +**4. Addon search helper.** + +Addon search handlers follow the signature in +`addons/codebase_index/search.py` and `addons/docs_index/search.py`: + +```python +async def handle_semantic_search( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str +``` + +They return a formatted markdown string, not structured data. The unified +search endpoint must parse the markdown output into result items. + +```python +async def _search_addon(reg, query, workspace_id, limit): + manifest = reg.manifest + addon_name = manifest.name + + # manifest.search_tool is the tool NAME (e.g., "semantic_search_code"). + # The handler REFERENCE lives in the matching tool spec's .handler field + # (e.g., "search.py::handle_semantic_search"). + # + # Two ways to call the handler: + # + # (a) Look up the tool name in manifest.tools to get the handler + # reference, then resolve via addon_loader._resolve_handler() + # and pass reg.runtime_context manually. + # + # (b) The tool handlers are already registered as wrapped callables + # in the Queen's tool dispatcher during addon loading + # (addon_loader.py:262). Those wrappers have runtime_context + # baked into the closure. If you have access to the queen's + # tool dispatcher, call it directly. + # + # For the REST endpoint, path (a) is cleaner because the route + # doesn't hold a Queen reference: + + tool_spec = next( + (t for t in manifest.tools if t.name == manifest.search_tool), + None, + ) + if tool_spec is None: + return [] + + from formicos.surface.addon_loader import _resolve_handler + try: + handler = _resolve_handler(addon_name, tool_spec.handler) + except Exception: + return [] + + # The registration object stores the runtime_context that was + # injected during loading (addon_loader.py:224). + rc = getattr(reg, "runtime_context", {}) or {} + + raw = await handler( + {"query": query, "top_k": limit}, + workspace_id, "", # thread_id not needed for search + runtime_context=rc, + ) + + # Parse markdown results into structured items + return _parse_addon_results( + raw, addon_name, manifest.content_kinds, limit, + ) +``` + +The addon search results come back as markdown like: +``` +**path/to/file.py:10-25** (score: 0.832) +\`\`\` +code content here +\`\`\` +``` + +Parse with a simple regex to extract path, line range, score, and content +snippet. Each result becomes: + +```python +{ + "source": addon_name, # e.g., "codebase-index" + "source_label": manifest.description or addon_name, + "id": f"{addon_name}:{path}:{line_start}", + "title": path, + "snippet": content[:200], + "score": parsed_score, + "metadata": { + "file_path": path, + "line_range": f"{line_start}-{line_end}", + "content_kinds": manifest.content_kinds, + }, +} +``` + +**5. Runtime context construction.** + +Check how `queen_tools.py` builds `runtime_context` when calling addon +handlers. Replicate the same pattern. The key fields are typically: +- `vector_port` — for vector search +- `data_dir` — workspace data directory +- `workspace_id` + +Look at `addon_loader.py` handler resolution (lines 86–125) for the +pattern. The tool registry wrapper injects `runtime_context` if the +handler signature accepts it. + +**Important:** The addon handler resolution and `runtime_context` wiring +must match the existing pattern. Do not invent a new calling convention. +Read `queen_tools.py`'s addon tool dispatch to find the exact seam. + +**6. Ranking within source, not across sources.** + +Do NOT sort all results by raw score across sources. Memory scores, code +search scores, and doc search scores are not comparable — they come from +different scoring functions with different distributions. + +Return results grouped by source. Each source group is sorted by score +descending (within that source). The frontend renders groups separately. + +The response shape: + +```json +{ + "results": [ + {"source": "memory", "source_label": "Institutional Memory", ...}, + {"source": "memory", ...}, + {"source": "codebase-index", "source_label": "Index and search...", ...}, + {"source": "docs-index", "source_label": "Index and search...", ...} + ], + "total": 15 +} +``` + +Results are ordered: all memory results first (sorted by score), then +each addon group (sorted by score). The frontend groups by `source` for +display. + +### Tests + +Create `tests/unit/surface/test_unified_search.py` with at least: + +1. `test_unified_search_returns_memory_results` — mock knowledge_catalog, + query, assert memory results in response with correct shape. +2. `test_unified_search_fans_out_to_addons` — mock addon registry with + a search_tool, assert addon handler called through the real + addon-registration seam. +3. `test_unified_search_filters_by_source_param` — request + `?sources=memory`, assert addon handlers not called. +4. `test_unified_search_parses_addon_markdown` — feed markdown output, + assert structured result items extracted. +5. `test_unified_search_handles_addon_failure_gracefully` — addon handler + raises, assert memory results still returned. +6. `test_results_grouped_by_source` — assert results arrive in source + groups, not interleaved by raw score. + +--- + +## Track 7: Search-First Knowledge UI + +### Problem + +The knowledge browser (knowledge-browser.ts) defaults to a flat catalog +list with tree view, score breakdown bars, Beta posteriors, and provenance +timelines. That's a power-user tool. An end user wants a search box. + +### Current state of knowledge-browser.ts + +The component already has: +- `_queryText` state for search input +- `_filterType` for skill/experience filtering +- `_sortBy` for newest/confidence/relevance +- `_threadFilter` for scope filtering +- Sub-view modes: `catalog` | `graph` | `tree` +- Entry card rendering with confidence display, badges, relationships +- Score breakdown bar with 7 segments + +The search input exists but it queries the existing +`/api/v1/knowledge/search` endpoint (institutional memory only). The +results render in the same detailed card format. + +### Implementation + +**1. Redesign the default view.** + +Replace the current default (catalog list) with a search-first layout: + +- **Large centered search box** at the top. Prominent, full-width within + the content area. Placeholder: "Search knowledge, docs, code..." + Style: glass card background, 14px body font, accent border on focus. +- **Quick stats below the search box** (before any search): entry count, + domain count, addon index status (if available from addon health data). + One line, muted text, `var(--f-mono)`. +- **Results area** below, initially empty. Shows source-grouped results + after a search. + +**2. Wire to unified search endpoint.** + +On search input (debounced, 300ms), call: +``` +GET /api/v1/workspaces/${wsId}/search?q=${query}&limit=10 +``` + +Group results by `source` field and render in labeled sections: + +- **"From Institutional Memory"** — memory entries with: + - Title (linked to detail view) + - Content snippet (2 lines max) + - Confidence indicator: `fc-dot` with tier mapping + (`>= 0.7` → loaded, `>= 0.4` → pending, else → error). + Show "High" / "Medium" / "Low" text label, NOT alpha/beta numbers. + - Domain badges + - Status badge (verified/candidate/active) + +- **"From Documentation"** — doc results with: + - File path as title (linked to workspace browser) + - Section name if available + - Content snippet (2–3 lines) + - Score indicator (simple bar, not 7-segment breakdown) + +- **"From Codebase"** — code results with: + - File path + line range as title + - Code snippet in monospace (`var(--f-mono)`) + - Language indicator if detectable from file extension + +Each section is a glass card. Section headers use `var(--f-display)`, +11px, `var(--v-fg-muted)`, uppercase. + +If a source returns no results, omit that section (don't show "No results +from Documentation"). + +**3. New component `knowledge-search-results.ts`.** + +Extract the search result rendering into its own component to keep +`knowledge-browser.ts` manageable. Props: + +```typescript +@property({ type: Array }) results: UnifiedSearchResult[] = []; +@property() activeWorkspaceId = ''; +``` + +Renders the source-grouped cards. Emits `entry-selected` custom event +when a memory entry is clicked (so the parent can switch to detail view). +Emits `file-selected` custom event when a doc/code result is clicked +(so the parent can navigate to workspace browser). + +--- + +## Track 8: Progressive Disclosure Toggle + +### Problem + +The existing tree view, score breakdown bars, Beta posteriors, provenance +timeline are valuable for the builder but overwhelming for new users. + +### Implementation + +**1. Add a "Detail Mode" toggle.** + +In the knowledge browser header area (where sub-view mode buttons +currently live), add a toggle switch labeled "Detail Mode" or a single +icon toggle (magnifying glass → list icon). + +- **Off (default):** Search-first view from Track 7. Entry cards show + simple confidence indicator (high/medium/low + `fc-dot`), title, + snippet, domain badges. No score breakdown bar, no Beta numbers, no + provenance timeline. +- **On:** Full power-user view. The existing catalog/tree/graph modes + become available. Score bars, Beta posteriors, provenance timelines, + relationships — everything currently rendered. + +Store the toggle state in component local state (`@state()`). Do not +persist it — default to off on every page load. This reinforces +"simple by default." + +**2. Conditional rendering in entry cards.** + +When rendering entry cards, check the detail mode state: + +```typescript +${this._detailMode ? this._renderDetailCard(entry) : this._renderSimpleCard(entry)} +``` + +`_renderSimpleCard` is the new simplified rendering. +`_renderDetailCard` is the existing `_renderEntryCard` logic (renamed). + +--- + +## Track 9: Quick Filters + +### Problem + +The current knowledge browser has dropdown-based filtering. Dropdowns +are discoverable but slow. Filter pills are more tactile and show the +active filter state at a glance. + +### Implementation + +**1. Filter pill strip below the search box.** + +Three pill groups, horizontally arranged: + +- **Source:** All | Memory | Docs | Code + - "All" is default (no filter). Others filter the unified search + `sources` parameter. + - Only show pills for sources that actually exist. If no docs-index + addon is installed, don't show "Docs." + +- **Domain:** Dynamically populated from knowledge hierarchy top-level + branches. Show up to 6 domain pills. If more than 6, show a "More..." + pill that opens a dropdown. + - Domain filtering is client-side: filter `results` by + `metadata.domains` containing the selected domain. + +- **Status:** All | Verified | Candidate + - Client-side filter on `metadata.status`. + +**2. Pill styling.** + +Use the `fc-pill` atom if one exists, otherwise create simple pill +styles: + +```css +.filter-pill { + padding: 4px 10px; + border-radius: 12px; + font-family: var(--f-mono); + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.08em; + background: var(--v-surface); + border: 1px solid var(--v-border); + cursor: pointer; + transition: all 0.15s; +} +.filter-pill[active] { + background: var(--v-accent-muted); + border-color: var(--v-accent); + color: var(--v-accent-bright); +} +``` + +**3. Filter state.** + +Filters persist during the session (component state). Changing a filter +re-triggers the search with updated parameters. Source filter changes the +`sources` query param. Domain and status filters are applied client-side +to the fetched results. + +--- + +## Empty States + +- **No search query yet:** Show quick stats + gentle prompt: "Search + across all knowledge sources." +- **Search with no results:** "No results for '{query}'" with suggestion + to try broader terms or check addon index status. +- **No knowledge entries at all (fresh workspace):** "No knowledge yet. + Start a conversation with the Queen to build institutional memory." +- **Addon index not available:** Don't show that source's pill. If all + addon indices are unavailable, search falls back to memory only. + +--- + +## Types to Add (in `types.ts`) + +```typescript +/** Wave 69: unified search result from /search endpoint. */ +export interface UnifiedSearchResult { + source: string; // 'memory' | 'codebase-index' | 'docs-index' + source_label: string; // human-readable source name + id: string; // entry ID or composite key + title: string; + snippet: string; + score: number; + metadata: Record; // source-specific metadata +} +``` + +--- + +## Acceptance Gates + +- [ ] Unified search endpoint returns source-labeled results +- [ ] Endpoint fans out to memory + addon indices in parallel +- [ ] Addon handler failures don't break the entire search +- [ ] Results are ranked within source, not across sources +- [ ] Search-first UI is the default knowledge browser view +- [ ] Results grouped by source with clear section labels +- [ ] Confidence shown as high/medium/low, not alpha/beta numbers +- [ ] Detail mode toggle switches between simple and power-user view +- [ ] Detail mode defaults to off on page load +- [ ] Quick filter pills for source, domain, status +- [ ] Source pills reflect actually-installed addons +- [ ] All new components follow Void Protocol design system +- [ ] No changes to retrieval algorithms or scoring math +- [ ] No new event types + +## Validation + +```bash +npm run build +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +pytest tests/unit/surface/test_unified_search.py -v +``` diff --git a/docs/waves/wave_69/team_c_prompt.md b/docs/waves/wave_69/team_c_prompt.md new file mode 100644 index 0000000..f9df164 --- /dev/null +++ b/docs/waves/wave_69/team_c_prompt.md @@ -0,0 +1,385 @@ +# Wave 69 — Team C: Unified Settings & System Awareness + +**Theme:** One settings surface, organized by concern. Read from multiple +backends, write only where a write path already exists. The operator can +see at a glance what the system knows and how it's configured. + +## Context + +Read `docs/waves/wave_69/wave_69_plan.md` first. This is a rendering wave. +Every setting shown already has a backend source — this wave reorganizes +the frontend to present them coherently. + +Read `CLAUDE.md` for hard constraints. Read `docs/design-system-v4.md` for +the Void Protocol design system. + +## Your Files (exclusive ownership) + +### Frontend +- `frontend/src/components/settings-view.ts` — complete redesign as card + sections +- `frontend/src/components/system-overview.ts` — **new**, capability + summary header + +### Tests +- No backend tests (zero backend changes) + +## Do Not Touch + +- `frontend/src/components/queen-chat.ts` — Team A owns +- `frontend/src/components/knowledge-browser.ts` — Team B owns +- `frontend/src/components/knowledge-view.ts` — Team B owns +- `frontend/src/types.ts` — Teams A/B own additions +- `src/formicos/surface/routes/api.py` — Teams A/B own additions +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `config/caste_recipes.yaml` +- `src/formicos/surface/knowledge_catalog.py` + +## Overlap Coordination + +- `formicos-app.ts` — You may adjust the nav labels or tab routing if + the settings surface subsumes some existing tabs. Teams A and B do not + touch the nav. Coordinate: do not remove tabs that other teams' + components depend on. Prefer adding deep-links over removing tabs. +- `frontend/src/state/store.ts` — You read from existing state, do not + add new state. No conflict with Teams A/B. + +--- + +## Critical Constraint: Read/Write Asymmetry + +Current config lives in multiple backends: + +| Data | Backend | Write path | +|------|---------|------------| +| Governance (strategy, max rounds, budget, convergence, autonomy) | operator override history (`ConfigSuggestionOverridden`) | `POST /api/v1/workspaces/{id}/config-overrides` | +| Taxonomy tags | `WorkspaceConfigChanged` events | No dedicated settings REST write path confirmed; existing Queen tool path only | +| Model registry | `config/formicos.yaml` + model registry YAML | Partial policy PATCH exists at `/api/v1/models/{address}`; registry facts remain read-mostly | +| Caste model assignments | operator override history / caste policy surface | `POST /api/v1/workspaces/{id}/config-overrides` if the existing component already uses it; otherwise read-only | +| Caste recipes (tool lists, system prompts) | `config/caste_recipes.yaml` | `POST /api/v1/castes/{caste_id}` | +| Addon config | Addon manifest + `WorkspaceConfigChanged` | `PUT /api/v1/addons/{addon_name}/config` | + +**Rule:** Do NOT unify the backends. The UI reads from all sources and +presents them as unified. Writes go to the correct backend per section. +If a section has no write path today, present it clearly as **read-only** +with a muted label ("Read-only — managed by config file" or similar). + +Specifically: +- Model registry entries (provider, context window, cost): **read-only**. + Managed by `config/formicos.yaml` or model discovery. A partial model-policy + PATCH exists, but re-exposing that editor is out of scope for this wave. +- Caste recipes (system prompts, tool lists): **read-only** display in + settings. The existing `POST /api/v1/castes/{caste_id}` endpoint exists + but is admin-level. Do not expose inline editing for system prompts. +- Governance config: **editable**. Existing write path works. +- Taxonomy tags: **display-first** in Wave 69. Unless you confirm an existing + direct settings write path, show the tags clearly and add a muted hint like + "Set via Queen" rather than inventing a new endpoint here. +- Addon config params: **editable** only through the existing addon config + route, not through config-overrides. + +--- + +## Track 10: Redesigned Settings Page + +### Current state of `settings-view.ts` + +The component currently renders (settings-view.ts): +- Colony governance: strategy dropdown, max rounds input, budget input, + convergence threshold slider, autonomy level selector +- Protocol status: MCP/AG-UI/A2A with connection indicators +- Retrieval diagnostics: embedded `` component +- Save/revert controls + +State: `_editStrategy`, `_editMaxRounds`, `_editBudget`, +`_editConvergence`, `_editAutonomy`, `_saving`, `_saveMsg`, +`_diagTiming`, `_diagCounts`, `_diagEmbedModel`, `_diagEmbedDim`, +`_diagSearchMode`. + +Write path: `POST /api/v1/workspaces/{id}/config-overrides`, which records +operator overrides. Treat this as an override/editorial surface, not as a +generic workspace-config writer. + +### Implementation + +**1. Restructure as a single scrollable page with card sections.** + +Each section is a glass card (Void Protocol): + +```css +.settings-card { + background: var(--v-surface); + border: 1px solid var(--v-border); + border-radius: 10px; + padding: 16px 20px; + margin-bottom: 12px; +} +.settings-card h3 { + font-family: var(--f-display); + font-size: 13px; + font-weight: 600; + color: var(--v-fg); + margin: 0 0 12px 0; +} +``` + +**2. Card sections in order:** + +#### A. Workspace Identity + +- **Workspace name** — display from store state. Unless you confirm an + existing direct write path, keep this read-only in Wave 69. +- **Taxonomy tags** — from `ws.config.taxonomy_tags`. Render as tag pills. + If you confirm an existing direct settings write path, you may make them + editable. Otherwise keep them read-only in this wave and show a subtle + hint that tags are set through the Queen/tooling path, not through this + card. +- **Project description** — link to the workspace browser's project + context editor (`.formicos/project_context.md`). Don't duplicate the + editor — show a preview (first 2 lines) with a "Edit in Workspace" + link. + +#### B. Models + +- **Model registry table** — read from store's `runtimeConfig.models.registry`. + Columns: Model Name, Provider, Context Window, Max Output, Status. + Provider shown as a colored dot + label. + Context window formatted as "128K" / "200K". + **Read-only in this view.** Show muted label: "Managed by configuration + file / model policy routes." +- **Caste model assignments** — 5-column grid showing which model is + assigned to Queen/Coder/Reviewer/Researcher/Archivist. Read from + caste recipes in store. This IS editable via config-overrides if the + existing model-registry component already has that write path. Check + `model-registry.ts` for the cascade save logic and replicate it. If + no save logic exists, show as read-only. + +#### C. Knowledge + +- **Domain summary** — count of entries per top-level domain. Read from + knowledge store state. Simple bar chart or just number + domain name + pairs. +- **Addon index status** — for each addon with index/search capability data + already present on the addon summary payload, show: + name, content type, chunk count (if available from addon status), + health indicator (`fc-dot`), and a "Reindex" button that calls + `POST /api/v1/addons/{addon_name}/trigger` with the reindex handler. + If capability metadata is not already in the addon summary payload, + fall back to description + status + trigger availability. Do not add a + backend expansion here. +- **Retrieval diagnostics** — embed the existing + `` component. It already renders timing, + counts, and embedding config. + +#### D. Governance + +- Keep the existing governance controls from `settings-view.ts`: + strategy dropdown, max rounds, budget, convergence threshold, + autonomy level. +- Restyle as inline controls within the card (not a separate form). +- Save behavior: instant save on change (debounced 500ms), not a + separate save button. Show a subtle green checkmark that fades after + 1.5s on successful save. + +Read the existing `_saveSettings()` method — it already POSTs to +config-overrides. Wire the same logic to `change` events on each +control. + +#### E. Protocols + +- Keep the existing MCP/AG-UI/A2A status display. +- Restyle as a card section with status indicators. +- **Read-only.** Protocol status is runtime state, not config. + +#### F. Addons + +- **Addon summary cards** — for each installed addon, show: name, + version, health status (`fc-dot`), description, tool count, handler + count, and capability summary when those fields are already present on + the addon summary payload. +- This is a summary view, not the full addons-view. The existing + addons-view tab stays for deep-dive (tool call counts, handler + errors, manual triggers, panel rendering). +- If an addon has config params defined in the addon summary/config payload, + show them as editable fields. Write via the existing + `PUT /api/v1/addons/{addon_name}/config` route. + +--- + +## Track 11: System Capability Summary + +### Problem + +The operator has no quick way to see what the system knows and can do. +They must visit multiple tabs to piece together: how many knowledge +entries, what addons are installed, how many tools the Queen has, what +models are available. + +### Implementation + +**1. New component `system-overview.ts`.** + +A compact header rendered at the top of the settings page (above the +first card section). One or two lines of summary text: + +``` +38 Queen tools · 4 addons · 3 providers · 847 knowledge entries +across 12 domains · Code index: 2,341 chunks · Docs index: 489 chunks +``` + +Data sources (all already in store state or fetchable): +- Queen tool count: hardcoded 38 (from caste_recipes) or read from + caste recipe data in store +- Addon count: `runtimeConfig` or `/api/v1/addons` response +- Provider count: count unique providers in model registry +- Knowledge entry count + domain count: from knowledge store state or + `/api/v1/knowledge?limit=1` total field +- Addon index chunk counts: from addon status endpoints if available, + or omit if not readily available + +Style: `var(--f-mono)`, 10.5px, `var(--v-fg-muted)`, single line +wrapping allowed. Separator: centered dot (·). No card wrapper — just +text above the first card. + +**2. Mount in `settings-view.ts`.** + +At the top of the render method, before the first card section: + +```typescript +html`` +``` + +--- + +## Track 12: Inline Editing Details + +### Implementation + +**1. Instant save with visual feedback.** + +Replace the current save/revert button pattern with instant save: + +```typescript +private _saveTimeout?: number; + +private _onControlChange(field: string, value: unknown) { + clearTimeout(this._saveTimeout); + this._saveTimeout = window.setTimeout(() => { + this._saveField(field, value); + }, 500); +} + +private async _saveField(field: string, value: unknown) { + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/config-overrides`, + { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({ + dimension: 'governance', + original: {}, + overridden: {[field]: value}, + reason: 'Settings update', + }), + }, + ); + if (resp.ok) { + this._showSaveIndicator(field); + } +} +``` + +**2. Save indicator.** + +A small green checkmark (✓) that appears next to the control and fades +out after 1.5s: + +```css +.save-indicator { + color: var(--v-success); + font-size: 12px; + opacity: 0; + transition: opacity 0.15s; +} +.save-indicator[visible] { + opacity: 1; +} +``` + +**3. Validation.** + +Client-side only: +- Budget: must be positive number +- Max rounds: 1–50 +- Convergence: 0.80–1.00 +- Tags: lowercase strings, max 50 chars each, max 20 tags + +Show inline validation errors below the control in +`var(--v-danger)` color, 10px `var(--f-mono)`. + +--- + +## Navigation Decision + +The settings surface now shows addon summary, model registry info, and +protocol status. These overlap with the existing Addons and Models tabs. + +**Recommendation:** Keep both tabs but adjust their purpose: +- **Settings tab:** Overview + config editing. Shows summary cards for + addons, models, knowledge. The config surface. +- **Addons tab:** Deep-dive into addon operations. Tool call counts, + handler errors, manual trigger buttons, panel rendering. The ops + surface. +- **Models tab:** Deep-dive into model operations. Slot utilization, + VRAM meters, cloud spend, per-model detail cards. The ops surface. + +Do not remove tabs. Adjust tab labels if needed for clarity: +- "Settings" → "Settings" (unchanged) +- "Models" → "Models" (unchanged, or "Model Ops" if you want to + distinguish) +- "Addons" → "Addons" (unchanged, or "Addon Ops") + +If you adjust labels, update the `NAV` array in `formicos-app.ts` +(line 29–37). + +--- + +## Empty States + +- **Fresh workspace, no config:** All governance fields show defaults. + Tags section shows "No tags yet" with a text input to add the first + one. +- **No addons installed:** Addons card shows "No addons installed." +- **No knowledge entries:** Knowledge section shows "0 entries." + Domain summary is empty. +- **Model registry unavailable:** Models card shows "Model registry + not available" with read-only label. + +--- + +## Acceptance Gates + +- [ ] Single scrollable settings page with card sections +- [ ] System capability summary at the top +- [ ] Workspace identity card with honest tag handling (editable only if a confirmed write path exists; otherwise clear read-only presentation) +- [ ] Models card with registry table (read-only) and caste assignments +- [ ] Knowledge card with domain summary and addon index status +- [ ] Governance card with instant-save inline editing +- [ ] Protocol card with status indicators (read-only) +- [ ] Addons summary card with capability metadata when already available, otherwise coherent fallback summary +- [ ] Read-only sections clearly labeled as read-only +- [ ] No backend changes +- [ ] No new event types +- [ ] All new components follow Void Protocol design system +- [ ] Existing addons and models tabs preserved +- [ ] Validation on editable fields +- [ ] `prefers-reduced-motion` respected + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_69/wave_69_plan.md b/docs/waves/wave_69/wave_69_plan.md new file mode 100644 index 0000000..25c8f82 --- /dev/null +++ b/docs/waves/wave_69/wave_69_plan.md @@ -0,0 +1,118 @@ +# Wave 69: The Product Surface + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 68 +**Theme:** Make FormicOS usable by someone who isn't the builder. Take the +rich backend (hierarchy, provenance, PPR retrieval, deliberation frames, +plan persistence, session continuity, addon capabilities, workspace +taxonomy) and make it visible through progressive disclosure. + +## Packet Authority + +This file is the dispatch overview. The prompts are the authority for +implementation detail: + +- `docs/waves/wave_69/team_a_prompt.md` +- `docs/waves/wave_69/team_b_prompt.md` +- `docs/waves/wave_69/team_c_prompt.md` + +## Locked Boundaries + +Wave 69 is a rendering wave, not an architecture wave. + +- No new event types. No new projection fields. +- No changes to retrieval algorithms or scoring math. +- No new Queen tools. Wave 68 added the last batch. +- No addon development. No MCP integration changes. +- Backend changes are limited to: one new REST endpoint (unified search), + one new REST endpoint (thread plan read), small metadata additions + to `QueenMessage` emission, and a return-type change in + `retrieve_relevant_memory()` to expose structured results. +- All new components follow `docs/design-system-v4.md` (Void Protocol). + +## Scope + +| Track | Outcome | Team | Dependency | +|-------|---------|------|------------| +| 1 | Inline colony progress cards in Queen chat | A | None | +| 2 | Consulted-sources chips on Queen responses | A | None | +| 3 | Inline diff preview on result/edit cards | A | None | +| 4 | Plan progress bar below chat tabs | A | Track 5 (plan read endpoint) | +| 5 | Thread plan read endpoint | A | None (backend) | +| 6 | Unified search endpoint (memory + addon indices) | B | None (backend) | +| 7 | Search-first knowledge UI with source-labeled results | B | Track 6 | +| 8 | Progressive disclosure toggle (detail mode) | B | None | +| 9 | Quick filters (source, domain, status pills) | B | None | +| 10 | Redesigned settings-view with card sections | C | None | +| 11 | System capability summary header | C | None | +| 12 | Inline editing with correct backend routing | C | None | + +## Team Missions + +### Team A — Enriched Queen Chat + +Own the operator's primary interaction surface. Make the Queen chat show +what happened, not just that something happened. Inline colony progress, +consulted sources, diff previews, plan progress. One small backend addition: +thread plan read endpoint + consulted-entry metadata on QueenMessage. + +### Team B — Unified Knowledge Search + +Own the knowledge discovery surface. Search box first, tree view behind +a toggle. One backend endpoint that fans out to memory + addon indices +in parallel and returns source-labeled results. Results ranked within +source, grouped by source — no cross-source raw score sorting. + +### Team C — Unified Settings & System Awareness + +Own trust, config, and system legibility. One scrollable settings page +with card sections. Read from multiple backends, write only where an +existing endpoint/config path already exists. Read-only sections presented +clearly as read-only. + +## Merge Order + +All three teams can develop in parallel. No blocking dependencies between +teams. Recommended merge order: + +1. Team A (most complex, touches the most files) +2. Team B (search endpoint + UI) +3. Team C (reorganization of existing surfaces) + +## Global Do Not Touch + +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/surface/projections.py` +- `src/formicos/surface/knowledge_catalog.py` (retrieval math) +- `src/formicos/engine/` (any file) +- `config/caste_recipes.yaml` (Wave 68 just stabilized this) + +## Design Standard + +Every new component follows `docs/design-system-v4.md` (Void Protocol): + +- Glass cards: `background: var(--v-surface)`, `border: 1px solid var(--v-border)`, `border-radius: 10px` +- Font stack: `var(--f-display)` for headings, `var(--f-body)` for text, `var(--f-mono)` for data/labels +- Accent: `var(--v-accent)` for interactive elements +- Confidence: `fc-dot` with status mapping (high=loaded, medium=pending, low=error) +- Animations: 0.15s transitions, `prefers-reduced-motion` respected + +## Validation + +```bash +npm run build # frontend must build clean +npm run lint # if lint config exists +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 69 succeeds if someone who isn't the builder can open FormicOS and: + +- See what the Queen is doing without navigating to colony detail tabs +- Search across all knowledge sources from one box and understand where + results came from +- Find and change any setting without knowing which backend stores it +- Understand at a glance what the system knows, what tools it has, and + how it's configured diff --git a/docs/waves/wave_70/team_a_prompt.md b/docs/waves/wave_70/team_a_prompt.md new file mode 100644 index 0000000..4494782 --- /dev/null +++ b/docs/waves/wave_70/team_a_prompt.md @@ -0,0 +1,675 @@ +# Wave 70 — Team A: MCP Bridge + +**Theme:** The Queen can use any tool ecosystem the operator connects. + +## Context + +Read `docs/waves/wave_70/wave_70_plan.md` first. Read `CLAUDE.md` for hard +constraints. + +FormicOS is already an MCP **server** (19 tools at `/mcp` via FastMCP). +This wave adds MCP **client** capability: connecting to remote MCP servers +and exposing their tools to the Queen through the existing addon +infrastructure. + +**Key insight:** The addon system already handles tool registration, health +monitoring, capability metadata, and Queen routing. An MCP bridge is just +an addon whose tools are resolved from a remote server instead of local +Python handlers. The existing `register_addon()` → `tool_registry` → +Queen tool dispatch pipeline works unchanged. + +## Your Files (exclusive ownership) + +### New addon +- `addons/mcp-bridge/addon.yaml` — **new**, bridge manifest +- `src/formicos/addons/mcp_bridge/__init__.py` — **new** +- `src/formicos/addons/mcp_bridge/client.py` — **new**, MCP client + connection, tool listing, tool calling +- `src/formicos/addons/mcp_bridge/discovery.py` — **new**, dynamic tool + discovery and registration + +### Surface +- `src/formicos/surface/queen_tools.py` — `discover_mcp_tools` new Queen + tool +- `src/formicos/surface/addon_loader.py` — small extension: bridge-aware + registration path for dynamic tools +- `config/caste_recipes.yaml` — add `discover_mcp_tools` to Queen tool + list, update tool count + +### Tests +- `tests/unit/addons/test_mcp_bridge.py` — **new** + +## Do Not Touch + +- `src/formicos/surface/mcp_server.py` — the MCP server is unrelated +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/surface/queen_runtime.py` — Teams B/C own +- `src/formicos/surface/self_maintenance.py` — Team C owns +- `frontend/` — no frontend changes this wave + +## Overlap Coordination + +- Team B adds `propose_project_milestone` and `complete_milestone` to + `queen_tools.py` and `caste_recipes.yaml`. You add `discover_mcp_tools`. + Both are additive to different sections. No conflict. +- Team C adds `check_autonomy_budget` to `queen_tools.py`. Same: + additive, different section. +- All three teams touch `caste_recipes.yaml` to update the tool list. + The changes are additive (append tool names to the array, increment + the count). Merge last team's changes carefully. + +--- + +## Track 1: MCP Bridge Addon + +### Problem + +The Queen has ~38 built-in tools and 4 addons. With Docker MCP Toolkit, +the operator could give her git, filesystem, GitHub, databases, Slack — +anything with an MCP server. But there's no way to connect to external +MCP servers. + +### Architecture + +The MCP bridge is an **addon** — it fits into the existing addon loader +pipeline. But unlike static addons (codebase-index, docs-index), bridge +tools are resolved dynamically from a remote MCP server. + +Two design options: +- **(a) Static manifest with generic proxy tools** — the addon.yaml + declares `mcp_call_tool` and `mcp_list_tools` as generic proxy tools. + The Queen calls `mcp_call_tool(server_url, tool_name, args)` for any + remote tool. Simple but the Queen sees 2 tools, not N specific tools. +- **(b) Dynamic registration** — the bridge connects at startup (or on + demand), fetches tool specs from the remote server, and registers each + as a named addon tool. The Queen sees `git_commit`, `git_push`, + `read_file` etc. as first-class tools in her tool list. + +**Use option (a) as the base, with option (b) as an enhancement via +Track 2.** The generic proxy is the safety net; dynamic discovery is the +power feature. + +### Implementation + +**1. Addon manifest: `addons/mcp-bridge/addon.yaml`.** + +```yaml +name: mcp-bridge +version: "1.0.0" +description: "Connect to remote MCP servers and call their tools" +author: "formicos-core" + +content_kinds: + - external_tools +search_tool: "" + +config: + - key: servers + type: string + default: "[]" + label: "JSON array of MCP server configs: [{name, url, transport}]" + - key: request_timeout_s + type: integer + default: 30 + label: "Tool call timeout in seconds" + - key: max_retries + type: integer + default: 2 + label: "Connection retry attempts" + +tools: + - name: mcp_call_tool + description: "Call a tool on a connected MCP server" + handler: client.py::handle_call_tool + parameters: + type: object + properties: + server: + type: string + description: "MCP server name (from configured servers list)" + tool_name: + type: string + description: "Name of the tool on the remote server" + arguments: + type: object + description: "Arguments to pass to the tool" + required: ["server", "tool_name"] + + - name: mcp_list_remote_tools + description: "List available tools on a connected MCP server" + handler: client.py::handle_list_tools + parameters: + type: object + properties: + server: + type: string + description: "MCP server name (omit to list all servers)" + +triggers: + - type: manual + handler: discovery.py::manual_refresh +``` + +**2. Client module: `src/formicos/addons/mcp_bridge/client.py`.** + +Uses FastMCP 2.14.5's Client class (already in dependencies): + +```python +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any + +from fastmcp.client import Client + +log = logging.getLogger(__name__) + + +# -- Connection pool ---------------------------------------------------------- + +# Module-level cache: server_name → Client instance +_connections: dict[str, Client] = {} +_connection_health: dict[str, dict[str, Any]] = {} + + +async def _get_client( + server_name: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> Client | None: + """Get or create a Client for the named server. + + Server configs live in the addon's workspace config under the 'servers' + key: a JSON array of {name, url, transport?}. + """ + if server_name in _connections: + client = _connections[server_name] + if client.is_connected(): + return client + # Stale connection — remove and reconnect + try: + await client.close() + except Exception: + pass + del _connections[server_name] + + # Resolve server config + ctx = runtime_context or {} + servers_raw = _resolve_server_config(server_name, ctx) + if servers_raw is None: + return None + + url = servers_raw.get("url", "") + timeout = int(servers_raw.get("timeout", 30)) + + try: + client = Client(url, timeout=timeout) + await client.initialize() + _connections[server_name] = client + _connection_health[server_name] = { + "status": "connected", + "error_count": 0, + "last_error": None, + } + log.info("mcp_bridge.connected", server=server_name, url=url) + return client + except Exception as exc: + _record_error(server_name, str(exc)) + log.warning( + "mcp_bridge.connect_failed", + server=server_name, url=url, error=str(exc), + ) + return None + + +def _resolve_server_config( + server_name: str, ctx: dict[str, Any], +) -> dict[str, Any] | None: + """Look up server config from addon workspace config.""" + # The 'servers' config param holds a JSON array + settings = ctx.get("settings") + if settings is None: + return None + # Try workspace config first, then addon config default + servers_json = "" + projections = ctx.get("projections") + if projections is not None: + # Check each workspace for addon config + for ws in projections.workspaces.values(): + raw = ws.config.get("mcp-bridge:servers", "[]") + if raw and raw != "[]": + servers_json = raw + break + if not servers_json: + servers_json = "[]" + try: + servers = json.loads(servers_json) if isinstance(servers_json, str) else servers_json + except (json.JSONDecodeError, TypeError): + return None + for s in servers: + if s.get("name") == server_name: + return s + return None + + +def _record_error(server_name: str, error: str) -> None: + health = _connection_health.get(server_name, { + "status": "error", "error_count": 0, "last_error": None, + }) + health["error_count"] = health.get("error_count", 0) + 1 + health["last_error"] = error + health["status"] = "error" if health["error_count"] >= 3 else "degraded" + _connection_health[server_name] = health + + +# -- Tool handlers ------------------------------------------------------------ + +async def handle_call_tool( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Call a tool on a remote MCP server.""" + server = inputs.get("server", "") + tool_name = inputs.get("tool_name", "") + arguments = inputs.get("arguments") or {} + + if not server or not tool_name: + return "Error: 'server' and 'tool_name' are required." + + client = await _get_client(server, runtime_context=runtime_context) + if client is None: + health = _connection_health.get(server, {}) + return ( + f"Error: Cannot connect to MCP server '{server}'. " + f"Status: {health.get('status', 'unknown')}. " + f"Last error: {health.get('last_error', 'none')}" + ) + + try: + result = await client.call_tool(tool_name, arguments) + # Reset error count on success + if server in _connection_health: + _connection_health[server]["error_count"] = 0 + _connection_health[server]["status"] = "connected" + # Serialize result + if hasattr(result, "content"): + # CallToolResult has .content list + parts = [] + for item in result.content: + if hasattr(item, "text"): + parts.append(item.text) + else: + parts.append(str(item)) + return "\n".join(parts) + return str(result) + except Exception as exc: + _record_error(server, str(exc)) + return f"Error calling {tool_name} on {server}: {exc}" + + +async def handle_list_tools( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """List available tools on a remote MCP server.""" + server = inputs.get("server", "") + + if not server: + # List all known servers and their health + lines = ["## Connected MCP Servers"] + for name, health in _connection_health.items(): + status = health.get("status", "unknown") + errors = health.get("error_count", 0) + lines.append(f"- **{name}**: {status} ({errors} errors)") + if not _connection_health: + lines.append("No MCP servers connected.") + return "\n".join(lines) + + client = await _get_client(server, runtime_context=runtime_context) + if client is None: + return f"Cannot connect to MCP server '{server}'." + + try: + tools = await client.list_tools() + lines = [f"## Tools on '{server}' ({len(tools)} tools)"] + for tool in tools: + desc = getattr(tool, "description", "") or "" + lines.append(f"- **{tool.name}**: {desc[:100]}") + return "\n".join(lines) + except Exception as exc: + return f"Error listing tools on {server}: {exc}" +``` + +**Important implementation notes:** + +- `Client(url)` accepts a URL string directly — FastMCP auto-selects + the transport (StreamableHTTP, SSE, or stdio) based on the URL scheme. +- `client.call_tool(name, args)` returns a `CallToolResult` with a + `.content` list of content items (text, images, etc.). +- `client.list_tools()` returns `list[mcp.types.Tool]` with `.name`, + `.description`, `.inputSchema`. +- The connection pool is module-level. This is fine for a single-process + server. Connections are lazy — created on first use. +- Health tracking mirrors `AddonRegistration.health_status` pattern: + 0 errors = connected, 1–2 = degraded, 3+ = error. + +**3. `__init__.py`** — empty or minimal: + +```python +"""MCP Bridge addon — connect to remote MCP servers.""" +``` + +--- + +## Track 2: Dynamic Tool Discovery + +### Problem + +With Track 1, the Queen calls `mcp_call_tool(server="git", tool_name="commit", arguments={...})`. This works but the Queen must first call +`mcp_list_remote_tools` to learn what tools exist, then construct the +proxy call. Ideally, the Queen would see `git:commit`, `git:push` etc. +as first-class tools in her tool list. + +### Implementation + +**1. Queen tool: `discover_mcp_tools`.** + +Add to `queen_tools.py` — a Queen tool (not an addon tool) that: + +1. Connects to the named MCP server via the bridge client +2. Fetches the tool list +3. Dynamically registers each remote tool as a namespaced addon tool +4. Returns a summary of discovered tools + +```python +async def _discover_mcp_tools(self, inputs, workspace_id, thread_id): + """Connect to an MCP server and register its tools dynamically.""" + server_name = inputs.get("server_name", "") + server_url = inputs.get("server_url", "") + if not server_name: + return ("Error: server_name is required.", None) + + from formicos.addons.mcp_bridge.discovery import discover_and_register + result = await discover_and_register( + server_name=server_name, + server_url=server_url, + tool_dispatcher=self._tool_dispatcher, + runtime_context=self._addon_runtime_context, + ) + return (result, None) +``` + +Tool spec: +```python +{ + "name": "discover_mcp_tools", + "description": ( + "Connect to a remote MCP server and register its tools. " + "After discovery, the server's tools appear as callable tools." + ), + "parameters": { + "type": "object", + "properties": { + "server_name": { + "type": "string", + "description": "Short name for this server (e.g., 'git', 'github')" + }, + "server_url": { + "type": "string", + "description": "MCP server URL (e.g., 'http://localhost:8808/mcp')" + } + }, + "required": ["server_name"] + } +} +``` + +**2. Discovery module: `src/formicos/addons/mcp_bridge/discovery.py`.** + +```python +async def discover_and_register( + server_name: str, + server_url: str = "", + tool_dispatcher: Any = None, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Connect to MCP server, fetch tools, register as addon tools.""" + from formicos.addons.mcp_bridge.client import ( + _get_client, _connection_health, + ) + + # If URL provided, temporarily store in config + if server_url: + # Direct connection without config lookup + from fastmcp.client import Client + try: + client = Client(server_url, timeout=30) + await client.initialize() + except Exception as exc: + return f"Failed to connect to {server_url}: {exc}" + else: + client = await _get_client(server_name, runtime_context=runtime_context) + + if client is None: + return f"Cannot connect to MCP server '{server_name}'." + + try: + tools = await client.list_tools() + except Exception as exc: + return f"Failed to list tools on '{server_name}': {exc}" + + if not tools: + return f"Server '{server_name}' has no tools." + + registered = [] + if tool_dispatcher is not None: + handlers = getattr(tool_dispatcher, "_handlers", {}) + addon_specs = getattr(tool_dispatcher, "_addon_tool_specs", []) + + for tool in tools: + namespaced = f"{server_name}:{tool.name}" + + # Create a closure that calls this specific tool + async def _make_handler( + _client: Any, _tool_name: str, _server: str, + ): + async def _handler( + inputs: dict, workspace_id: str, thread_id: str, + **kwargs: Any, + ) -> str: + from formicos.addons.mcp_bridge.client import ( + handle_call_tool, + ) + return await handle_call_tool( + {"server": _server, "tool_name": _tool_name, + "arguments": inputs}, + workspace_id, thread_id, + runtime_context=kwargs.get("runtime_context"), + ) + return _handler + + handler = await _make_handler(client, tool.name, server_name) + handlers[namespaced] = handler + + # Add to addon tool specs for Queen visibility + desc = getattr(tool, "description", "") or "" + schema = getattr(tool, "inputSchema", {}) or {} + addon_specs.append({ + "name": namespaced, + "description": f"[{server_name}] {desc}"[:200], + "parameters": schema, + }) + registered.append(namespaced) + + lines = [ + f"Discovered {len(tools)} tools on '{server_name}'.", + f"Registered {len(registered)} tools:", + ] + for name in registered: + lines.append(f" - {name}") + return "\n".join(lines) +``` + +**Critical notes on dynamic registration:** + +- Namespaced as `server_name:tool_name` (e.g., `git:commit`) to avoid + collisions with built-in tools. +- Registered into `_handlers` dict (same dict addon_loader.py uses at + line 262). This makes them callable via the normal tool dispatch path. +- Added to `_addon_tool_specs` list (same list queen_tools.py reads at + line 1411). This makes them visible in the Queen's LLM tool list. +- The handler closure delegates to `handle_call_tool` — all remote calls + go through the same health-tracked code path. + +**3. Manual refresh trigger in `discovery.py`.** + +```python +async def manual_refresh( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Trigger handler for manual MCP bridge refresh.""" + # Re-discover all configured servers + from formicos.addons.mcp_bridge.client import _connection_health + results = [] + for server_name in list(_connection_health.keys()): + result = await discover_and_register( + server_name=server_name, + runtime_context=runtime_context, + ) + results.append(result) + return "\n\n".join(results) if results else "No servers configured." +``` + +--- + +## Track 3: Connection Health + Graceful Degradation + +### Problem + +Remote MCP servers can be down, slow, or returning unexpected shapes. The +Queen needs to know when a bridge is unhealthy so she doesn't waste tool +turns on broken connections. + +### Implementation + +**1. Health already tracked in `_connection_health` dict (Track 1).** + +Extend `handle_list_tools` to include health data in the text output: + +```text +## Tools on 'git' (12 tools) — Status: connected +- **git:commit**: Create a git commit +- **git:push**: Push commits to remote +... + +## Tools on 'github' (8 tools) — Status: degraded (2 errors) +- **github:create_issue**: ... +``` + +**2. Queen-visible health in `_list_addons()` output.** + +The existing `_list_addons()` in `queen_tools.py` (Wave 68 Track 5) +already shows capability metadata per addon. For the mcp-bridge addon, +the text should include connected server health: + +```text +**mcp-bridge**: Connect to remote MCP servers and call their tools + Content: external_tools + Servers: git (connected, 12 tools), github (degraded, 8 tools) +``` + +To achieve this, modify `_list_addons()` to check for mcp-bridge +specifically: if the addon name is `mcp-bridge`, append server health +summary from `client._connection_health` to the text output. This is +a small addition (~10 lines) to the existing method. + +**3. Graceful degradation in `handle_call_tool`.** + +Already handled in Track 1: if the client can't connect, the handler +returns an error string (not an exception). The Queen sees the error +and can decide to use a different approach. + +Add one enhancement: if a server has `status == "error"` (3+ errors), +`handle_call_tool` should return a short-circuit error without attempting +connection: + +```python +if server in _connection_health: + health = _connection_health[server] + if health.get("status") == "error": + return ( + f"MCP server '{server}' is in error state " + f"({health.get('error_count', 0)} consecutive errors). " + f"Last error: {health.get('last_error', 'unknown')}. " + f"Use mcp_list_remote_tools to check status or wait for " + f"automatic recovery." + ) +``` + +**4. Automatic recovery.** + +After 5 minutes since the last error, reset `error_count` to 0 on the +next connection attempt. This allows the bridge to recover without +manual intervention. Add a `last_error_time` field to the health dict +and check elapsed time in `_get_client`. + +### Tests + +Create `tests/unit/addons/test_mcp_bridge.py` with at least: + +1. `test_handle_call_tool_returns_result` — mock FastMCP Client, assert + result text returned. +2. `test_handle_call_tool_connection_failure` — mock Client that raises, + assert error string returned (not exception). +3. `test_handle_list_tools_returns_tool_names` — mock Client.list_tools, + assert formatted tool list. +4. `test_health_degrades_on_errors` — call handle_call_tool 3 times with + failures, assert health status transitions: connected → degraded → error. +5. `test_health_recovers_after_success` — after error state, successful + call resets error count. +6. `test_discover_registers_tools` — mock Client.list_tools returning + tool specs, assert tools registered in handlers dict and addon_specs + list. +7. `test_namespaced_tool_names` — assert discovered tools use + `server:tool` format. +8. `test_error_state_short_circuits` — server in error state, assert + handle_call_tool returns immediately without connection attempt. + +--- + +## Acceptance Gates + +- [ ] `addons/mcp-bridge/addon.yaml` exists and loads via addon discovery +- [ ] `mcp_call_tool` calls remote MCP server tools via FastMCP Client +- [ ] `mcp_list_remote_tools` shows available tools per server +- [ ] `discover_mcp_tools` Queen tool registers remote tools dynamically +- [ ] Dynamically registered tools appear in Queen's tool list +- [ ] Dynamically registered tools are callable via normal dispatch +- [ ] Connection health tracked: connected → degraded → error +- [ ] Error state short-circuits without connection attempt +- [ ] Health auto-recovers after 5 minutes +- [ ] `_list_addons()` text includes server health for mcp-bridge +- [ ] No new event types +- [ ] No frontend changes +- [ ] All tests pass + +## Validation + +```bash +pytest tests/unit/addons/test_mcp_bridge.py -v +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_70/team_b_prompt.md b/docs/waves/wave_70/team_b_prompt.md new file mode 100644 index 0000000..bc05113 --- /dev/null +++ b/docs/waves/wave_70/team_b_prompt.md @@ -0,0 +1,450 @@ +# Wave 70 — Team B: Project Intelligence + +**Theme:** The Queen maintains awareness across threads via a persistent +project plan. + +## Context + +Read `docs/waves/wave_70/wave_70_plan.md` first. Read `CLAUDE.md` for hard +constraints. + +FormicOS threads are isolated conversations. The Queen has session summaries +(`.formicos/sessions/{thread_id}.md`) and thread-scoped plans +(`.formicos/plans/{thread_id}.md`), but nothing that spans threads. When an +operator starts a new thread, the Queen loses project-level context. + +Wave 68 introduced `propose_plan` and `mark_plan_step` for thread-scoped +plans. This wave adds a **project-level** plan that persists across threads, +tracks milestones, and orients every new conversation. + +**Key insight:** Project plans are files, not memory entries, not events. +They live at `.formicos/project_plan.md` (one per data directory). The +Queen reads and writes them via tools. Injection follows the existing +pattern from session summaries and project context injection in `respond()`. + +## Your Files (exclusive ownership) + +### Surface +- `src/formicos/surface/queen_tools.py` — `propose_project_milestone` and + `complete_milestone` new Queen tools (additive to handler registry) +- `src/formicos/surface/queen_runtime.py` — project plan injection in + `respond()` (small addition alongside existing injection points) +- `config/caste_recipes.yaml` — add `propose_project_milestone` and + `complete_milestone` to Queen tool list, update tool count + +### Tests +- `tests/unit/surface/test_project_plan.py` — **new** + +## Do Not Touch + +- `src/formicos/surface/addon_loader.py` — Team A owns +- `src/formicos/surface/self_maintenance.py` — Team C owns +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/engine/` — any file +- `frontend/` — no frontend changes this wave + +## Overlap Coordination + +- Team A adds `discover_mcp_tools` to `queen_tools.py` and + `caste_recipes.yaml`. You add `propose_project_milestone` and + `complete_milestone`. Both are additive to different sections. No conflict. +- Team C adds `check_autonomy_budget` to `queen_tools.py`. Same: additive, + different section. +- All three teams touch `caste_recipes.yaml` to append tool names. The + changes are additive. Merge last team's changes carefully. + +--- + +## Track 4: Project-Level Plan Persistence + +### Problem + +The operator may work on a multi-week project across many threads. Each +thread has its own plan, but there's no persistent artifact that tracks +project-level milestones, goals, and status across all threads. + +### File format + +The project plan lives at: +``` +{data_dir}/.formicos/project_plan.md +``` + +This is a single file per FormicOS data directory. Format: + +```markdown +# Project Plan + +**Goal:** Build the auth module with OAuth2 support + +**Updated:** 2026-03-26T14:30:00Z + +## Milestones + +- [0] [completed] Set up OAuth provider integration + Thread: abc123 | Completed: 2026-03-24 + Note: Using Auth0 as primary provider + +- [1] [active] Implement token refresh flow + Thread: def456 + +- [2] [pending] Write integration test suite + +- [3] [pending] Deploy to staging +``` + +### Implementation + +The project plan file is read and written by Queen tools only. It is not an +event, not a projection, not a memory entry. It follows the same file-based +pattern as thread plans (`.formicos/plans/{thread_id}.md`) and session +summaries (`.formicos/sessions/{thread_id}.md`). + +**No new infrastructure needed.** The `_data_dir` resolution pattern is +already established in `queen_tools.py` at line 3256 (`propose_plan`) and +`queen_runtime.py` at line 842 (session summary write). Reuse the same +`self._runtime.settings.system.data_dir` path. + +--- + +## Track 5: `propose_project_milestone` + `complete_milestone` Queen Tools + +### Problem + +The Queen needs to create and update project milestones. Two new tools: +one to propose a milestone (or initialize the project plan), one to mark +a milestone as completed. + +### Implementation + +**1. `propose_project_milestone` tool handler in `queen_tools.py`.** + +Add to the handler registry (around line 198, alongside `mark_plan_step`): + +```python +"propose_project_milestone": lambda i, w, t: self._propose_project_milestone(i, w, t), +"complete_milestone": lambda i, w, t: self._complete_milestone(i, w, t), +``` + +**Handler: `_propose_project_milestone`** + +```python +def _propose_project_milestone( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, +) -> tuple[str, dict[str, Any] | None]: + """Add a milestone to the project plan. Creates the plan file if needed.""" + goal = inputs.get("goal", "") + milestone = inputs.get("milestone", "") + if not milestone: + return ("Error: milestone description is required.", None) + + try: + _data_dir = self._runtime.settings.system.data_dir + if not isinstance(_data_dir, str) or not _data_dir: + return ("No data directory configured.", None) + + _plan_path = Path(_data_dir) / ".formicos" / "project_plan.md" + _plan_path.parent.mkdir(parents=True, exist_ok=True) + + now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + + if _plan_path.is_file(): + text = _plan_path.read_text(encoding="utf-8") + else: + # Initialize new project plan + _goal = goal or "Project plan" + text = ( + f"# Project Plan\n\n" + f"**Goal:** {_goal}\n\n" + f"**Updated:** {now}\n\n" + f"## Milestones\n" + ) + + # Parse existing milestones to find next index + lines = text.split("\n") + max_index = -1 + for line in lines: + m = _re.match(r"^- \[(\d+)\]", line) + if m: + max_index = max(max_index, int(m.group(1))) + + next_index = max_index + 1 + new_line = f"- [{next_index}] [pending] {milestone}" + if thread_id: + new_line += f"\n Thread: {thread_id}" + + # Update timestamp + text = _re.sub( + r"\*\*Updated:\*\* .*", + f"**Updated:** {now}", + text, + ) + + # Update goal if provided and plan was just created + if goal and "**Goal:**" in text: + text = _re.sub( + r"\*\*Goal:\*\* .*", + f"**Goal:** {goal}", + text, + ) + + # Append milestone + if text.endswith("\n"): + text += f"\n{new_line}\n" + else: + text += f"\n\n{new_line}\n" + + _plan_path.write_text(text, encoding="utf-8") + + return ( + f"Added project milestone [{next_index}]: {milestone}", + {"tool": "propose_project_milestone", "index": next_index}, + ) + except (OSError, TypeError) as exc: + return (f"Failed to write project plan: {exc}", None) +``` + +**Handler: `_complete_milestone`** + +```python +def _complete_milestone( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, +) -> tuple[str, dict[str, Any] | None]: + """Mark a project milestone as completed.""" + index = inputs.get("index") + note = inputs.get("note", "") + if index is None: + return ("Error: milestone index is required.", None) + + try: + _data_dir = self._runtime.settings.system.data_dir + if not isinstance(_data_dir, str) or not _data_dir: + return ("No data directory configured.", None) + + _plan_path = Path(_data_dir) / ".formicos" / "project_plan.md" + if not _plan_path.is_file(): + return ("No project plan exists. Use propose_project_milestone first.", None) + + text = _plan_path.read_text(encoding="utf-8") + lines = text.split("\n") + now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + + found = False + for i, line in enumerate(lines): + m = _re.match(r"^- \[(\d+)\] \[(\w+)\] (.*)$", line) + if m and int(m.group(1)) == index: + desc = m.group(3) + lines[i] = f"- [{index}] [completed] {desc}" + # Add completion metadata on next line + completion_line = f" Completed: {now[:10]}" + if note: + completion_line += f" | {note}" + # Check if next line is indented metadata + if i + 1 < len(lines) and lines[i + 1].startswith(" "): + lines.insert(i + 1, completion_line) + else: + lines.insert(i + 1, completion_line) + found = True + break + + if not found: + return (f"Milestone [{index}] not found in project plan.", None) + + # Update timestamp + text = "\n".join(lines) + text = _re.sub( + r"\*\*Updated:\*\* .*", + f"**Updated:** {now}", + text, + ) + + _plan_path.write_text(text, encoding="utf-8") + + return ( + f"Milestone [{index}] marked as completed.", + {"tool": "complete_milestone", "index": index}, + ) + except (OSError, TypeError) as exc: + return (f"Failed to update project plan: {exc}", None) +``` + +**Tool specs — add to `_queen_tools()` list** (before the `*self._addon_tool_specs` spread at line 1411): + +```python +# Wave 70 Track 5: project-level milestone management +{ + "name": "propose_project_milestone", + "description": ( + "Add a milestone to the project plan. Creates the plan if " + "it doesn't exist. Use for tracking cross-thread goals." + ), + "parameters": { + "type": "object", + "properties": { + "milestone": { + "type": "string", + "description": "Milestone description", + }, + "goal": { + "type": "string", + "description": ( + "Project goal (used when creating a new plan)" + ), + }, + }, + "required": ["milestone"], + }, +}, +{ + "name": "complete_milestone", + "description": ( + "Mark a project milestone as completed." + ), + "parameters": { + "type": "object", + "properties": { + "index": { + "type": "integer", + "description": "Milestone index number to complete", + }, + "note": { + "type": "string", + "description": "Completion note (optional)", + }, + }, + "required": ["index"], + }, +}, +``` + +**2. Update `caste_recipes.yaml`.** + +Add `"propose_project_milestone"` and `"complete_milestone"` to the Queen +tools array (line 207). Update the comment if it mentions tool count. + +--- + +## Track 6: Project Plan Injection on Startup + Thread Creation + +### Problem + +When the Queen starts a new thread or responds in any thread, she should +be aware of the project plan. Without injection, she won't know what +milestones exist or which are active. + +### Implementation + +**1. Inject project plan in `respond()` in `queen_runtime.py`.** + +The existing injection pattern is at lines 931–953 (project context) and +lines 955–983 (session summary). Add a project plan injection block +immediately after the project context injection: + +```python +# Wave 70 Track 6: inject project plan for cross-thread awareness +try: + _data_dir_pp = self._runtime.settings.system.data_dir + if isinstance(_data_dir_pp, str) and _data_dir_pp: + _pp_path = Path(_data_dir_pp) / ".formicos" / "project_plan.md" + if _pp_path.is_file(): + _pp_text = _pp_path.read_text( + encoding="utf-8", + )[:budget.project_context * 4] + if _pp_text: + _pp_insert = 0 + for _ppi, _ppm in enumerate(messages): + if _ppm.get("role") != "system": + _pp_insert = _ppi + break + else: + _pp_insert = len(messages) + messages.insert(_pp_insert, { + "role": "system", + "content": ( + "# Project Plan (cross-thread)\n" + f"{_pp_text}" + ), + }) +except (AttributeError, TypeError, OSError): + pass +``` + +**Key decisions:** +- Uses `budget.project_context` token budget (same as project context). + The project plan shares this budget — it's injected alongside project + context, not as a separate allocation. +- Injected after system prompts, before conversation. Same pattern as + all other injections. +- Labeled `(cross-thread)` so the Queen knows this spans threads. +- If no project plan file exists, nothing is injected. + +**2. No special "thread creation" handling needed.** + +The injection happens on every `respond()` call. When a new thread is +created, the first `respond()` call will inject the project plan. The Queen +will see the active milestones and can orient the conversation. + +--- + +## Tests + +Create `tests/unit/surface/test_project_plan.py`: + +1. `test_propose_project_milestone_creates_file` — no existing plan file, + call handler, assert file created with correct format, milestone at + index 0 with status `pending`. + +2. `test_propose_project_milestone_appends_to_existing` — existing plan + with 2 milestones, call handler, assert new milestone at index 2. + +3. `test_complete_milestone_updates_status` — existing plan with 3 + milestones, complete index 1, assert status changed to `completed` + with completion date. + +4. `test_complete_milestone_missing_index` — complete a non-existent + index, assert error message returned. + +5. `test_complete_milestone_no_plan_file` — no plan file exists, assert + error message referencing `propose_project_milestone`. + +6. `test_propose_project_milestone_sets_goal` — provide `goal` param, + assert `**Goal:**` line contains the goal text. + +7. `test_project_plan_injection_reads_file` — create a plan file, mock + the respond() path, verify the plan text appears in injected messages. + +**Test setup pattern:** Mock `self._runtime.settings.system.data_dir` to +point to a tmp directory. Use `tmp_path` fixture for file I/O. Follow the +pattern from existing queen_tools tests (mock Runtime with minimal stubs). + +--- + +## Acceptance Gates + +- [ ] `.formicos/project_plan.md` created on first `propose_project_milestone` +- [ ] Milestones appended with sequential indices +- [ ] `complete_milestone` updates status from any state to `completed` +- [ ] Completion includes date and optional note +- [ ] Project plan injected in `respond()` on every call when file exists +- [ ] Injection labeled `(cross-thread)` for Queen awareness +- [ ] No injection overhead when no plan file exists +- [ ] Tools visible in Queen's tool list via `caste_recipes.yaml` +- [ ] No new event types +- [ ] No projection changes +- [ ] No frontend changes +- [ ] All tests pass + +## Validation + +```bash +pytest tests/unit/surface/test_project_plan.py -v +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_70/team_c_prompt.md b/docs/waves/wave_70/team_c_prompt.md new file mode 100644 index 0000000..eb0532b --- /dev/null +++ b/docs/waves/wave_70/team_c_prompt.md @@ -0,0 +1,632 @@ +# Wave 70 — Team C: Autonomy Guardrails + +**Theme:** The Queen earns trust through a track record, checks her budget +before acting, and escalates high-impact work. + +## Context + +Read `docs/waves/wave_70/wave_70_plan.md` first. Read `CLAUDE.md` for hard +constraints. + +FormicOS already has autonomy infrastructure: + +- **`MaintenanceDispatcher`** (`self_maintenance.py`) with 3 levels: + `suggest`, `auto_notify`, `autonomous`. Daily budget tracking via + `_daily_spend` dict, reset at UTC midnight (line 243). Per-caste cost + estimates (line 38): researcher=$0.08, archivist=$0.05, coder=$0.12/round. +- **`MaintenancePolicy`** (`core/types.py`) with `autonomy_level`, + `auto_actions`, `max_maintenance_colonies`, `daily_maintenance_budget`. +- **`BudgetEnforcer`** (`runtime.py:1661`) with workspace hard stop at 100%, + soft warn at 80%, downgrade at 90%. +- **`BudgetSnapshot`** (`projections.py:289`) tracking `total_cost`, + `model_usage`, `api_cost`. +- **`ColonyOutcome`** (`projections.py:89`) with `succeeded`, `total_rounds`, + `total_cost`, `quality_score`, `caste_composition`, `strategy`. +- **Earned autonomy rule** (`proactive_intelligence/rules.py:1237`) — + recommendation-only: ≥5 follow-throughs + >70% rate triggers promotion + insight, ≥3 kills + >50% negative rate triggers demotion insight. + +This wave extends these foundations with three new capabilities: daily cost +caps visible to the Queen, blast radius estimation for autonomous dispatch, +and graduated autonomy scoring from outcome history. + +## Your Files (exclusive ownership) + +### Surface +- `src/formicos/surface/self_maintenance.py` — daily cost cap enforcement, + blast radius estimator, autonomy scoring integration +- `src/formicos/surface/queen_tools.py` — `check_autonomy_budget` new Queen + tool (additive to handler registry) +- `config/caste_recipes.yaml` — add `check_autonomy_budget` to Queen tool + list + +### Tests +- `tests/unit/surface/test_autonomy_guardrails.py` — **new** + +## Do Not Touch + +- `src/formicos/surface/queen_runtime.py` — Team B owns +- `src/formicos/surface/addon_loader.py` — Team A owns +- `src/formicos/surface/projections.py` — no projection changes +- `src/formicos/core/events.py` — no new events +- `src/formicos/core/types.py` — no type changes (extend `MaintenancePolicy` + via workspace config, not model changes) +- `src/formicos/engine/` — any file +- `frontend/` — no frontend changes this wave +- `src/formicos/addons/proactive_intelligence/rules.py` — the earned + autonomy rule stays recommendation-only; this wave adds a scoring + function that the rule can reference, but the rule file itself is not + modified + +## Overlap Coordination + +- Team A adds `discover_mcp_tools` to `queen_tools.py` and + `caste_recipes.yaml`. Team B adds `propose_project_milestone` and + `complete_milestone`. You add `check_autonomy_budget`. All additive to + different sections. No conflict. +- All three teams touch `caste_recipes.yaml` to append tool names. The + changes are additive. Merge last team's changes carefully. + +--- + +## Track 7: Daily Cost Budget with Cap + +### Problem + +The `MaintenanceDispatcher` has a `daily_maintenance_budget` in the policy, +but the **Queen** has no visibility into how much of that budget remains. +She dispatches colonies without knowing if the workspace is approaching its +daily cost limit. The operator has no way to set a daily cap that applies +to all Queen-initiated work (not just maintenance). + +### Implementation + +**1. Queen tool: `check_autonomy_budget` in `queen_tools.py`.** + +Add to the handler registry (around line 198): + +```python +"check_autonomy_budget": lambda i, w, t: self._check_autonomy_budget(i, w, t), +``` + +Handler: + +```python +def _check_autonomy_budget( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, +) -> tuple[str, dict[str, Any] | None]: + """Show the Queen her remaining daily budget and autonomy status.""" + # Get maintenance policy + import json as _json + + ws = self._runtime.projections.workspaces.get(workspace_id) + if ws is None: + return ("Workspace not found.", None) + + raw_policy = ws.config.get("maintenance_policy") + policy = MaintenancePolicy() + if raw_policy is not None: + try: + data = _json.loads(raw_policy) if isinstance(raw_policy, str) else raw_policy + policy = MaintenancePolicy(**data) + except Exception: + pass + + # Get daily spend from MaintenanceDispatcher + dispatcher = getattr(self._runtime, "maintenance_dispatcher", None) + daily_spend = 0.0 + if dispatcher is not None: + dispatcher._reset_daily_budget_if_needed() + daily_spend = dispatcher._daily_spend.get(workspace_id, 0.0) + + budget_limit = policy.daily_maintenance_budget + remaining = max(0.0, budget_limit - daily_spend) + + # Get workspace total budget info + budget = ws.budget + total_cost = budget.total_cost if budget else 0.0 + + # Get active maintenance colony count + active_maintenance = 0 + if dispatcher is not None: + active_maintenance = dispatcher._count_active_maintenance_colonies( + workspace_id, + ) + + lines = [ + "## Autonomy Budget Status", + "", + f"**Autonomy level:** {policy.autonomy_level}", + f"**Daily budget:** ${budget_limit:.2f}", + f"**Spent today:** ${daily_spend:.2f}", + f"**Remaining:** ${remaining:.2f}", + f"**Active maintenance colonies:** {active_maintenance}" + f" / {policy.max_maintenance_colonies} max", + "", + f"**Workspace total cost:** ${total_cost:.2f}", + ] + + if policy.auto_actions: + lines.append( + f"**Auto-dispatch categories:** {', '.join(policy.auto_actions)}" + ) + else: + lines.append("**Auto-dispatch categories:** none") + + if remaining <= 0: + lines.append("") + lines.append( + "⚠ Daily budget exhausted. No autonomous dispatch until " + "midnight UTC reset." + ) + elif remaining < budget_limit * 0.2: + lines.append("") + lines.append( + f"⚠ Budget running low ({remaining / budget_limit:.0%} remaining)." + ) + + return ("\n".join(lines), None) +``` + +Tool spec — add to `_queen_tools()` list: + +```python +# Wave 70 Track 7: autonomy budget visibility +{ + "name": "check_autonomy_budget", + "description": ( + "Check daily autonomy budget status: remaining budget, " + "active maintenance colonies, and autonomy level." + ), + "parameters": { + "type": "object", + "properties": {}, + }, +}, +``` + +**2. Update `caste_recipes.yaml`.** + +Add `"check_autonomy_budget"` to the Queen tools array (line 207). + +**3. Budget-aware dispatch gate in `MaintenanceDispatcher`.** + +The existing `evaluate_and_dispatch()` (self_maintenance.py:57) already +checks `budget_remaining` against `insight.suggested_colony.estimated_cost`. +This is sufficient. No changes needed to the dispatch gate itself. + +The new value is that the Queen can **proactively check** her budget before +deciding to spawn colonies, rather than only discovering budget exhaustion +when a maintenance dispatch is skipped silently. + +--- + +## Track 8: Blast Radius Estimator + +### Problem + +When the Queen considers dispatching work autonomously, she has no way to +estimate the impact scope. A "rename a variable" task has low blast radius; +a "refactor the auth module" task has high blast radius. The Queen should +escalate high-impact work to the operator. + +### Implementation + +**1. `estimate_blast_radius()` function in `self_maintenance.py`.** + +A pure function that scores the estimated impact of a proposed task. No +LLM calls. Uses heuristic signals only. + +```python +@dataclass +class BlastRadiusEstimate: + """Estimated scope and impact of a proposed autonomous action.""" + + score: float # 0.0 (trivial) to 1.0 (high impact) + level: str # "low", "medium", "high" + factors: list[str] # human-readable explanations + recommendation: str # "proceed", "notify", "escalate" + + +def estimate_blast_radius( + task: str, + caste: str = "coder", + max_rounds: int = 3, + strategy: str = "sequential", + workspace_id: str = "", + projections: ProjectionStore | None = None, +) -> BlastRadiusEstimate: + """Estimate the blast radius of a proposed autonomous dispatch. + + Uses deterministic heuristics only. No LLM calls. + """ + score = 0.0 + factors: list[str] = [] + + # Factor 1: task length as proxy for complexity + task_len = len(task) + if task_len > 500: + score += 0.2 + factors.append("Long task description (complex scope)") + elif task_len > 200: + score += 0.1 + factors.append("Medium-length task description") + + # Factor 2: caste risk profile + caste_risk = { + "coder": 0.3, # writes files + "reviewer": 0.1, # read-only + "researcher": 0.1, # read-only + "archivist": 0.05, # knowledge-only + } + risk = caste_risk.get(caste, 0.2) + score += risk + if risk >= 0.3: + factors.append(f"Caste '{caste}' can modify files") + + # Factor 3: round count as proxy for complexity + if max_rounds > 5: + score += 0.15 + factors.append(f"High round budget ({max_rounds} rounds)") + elif max_rounds > 3: + score += 0.05 + + # Factor 4: strategy + if strategy == "stigmergic": + score += 0.1 + factors.append("Stigmergic strategy (multi-agent, harder to predict)") + + # Factor 5: keyword signals in task text + high_risk_keywords = [ + "delete", "remove", "drop", "migrate", "refactor", + "rename", "replace all", "database", "schema", "deploy", + "production", "auth", "security", "permission", + ] + task_lower = task.lower() + matched = [kw for kw in high_risk_keywords if kw in task_lower] + if matched: + score += 0.15 * min(len(matched), 3) + factors.append(f"High-risk keywords: {', '.join(matched[:3])}") + + # Factor 6: prior outcome history for this caste/strategy + if projections and workspace_id: + stats = projections.outcome_stats(workspace_id) + for stat in stats: + if stat["strategy"] == strategy and caste in stat.get("caste_mix", ""): + if stat["success_rate"] < 0.5 and stat["total"] >= 3: + score += 0.2 + factors.append( + f"Low historical success rate for {strategy}/{caste}: " + f"{stat['success_rate']:.0%}" + ) + break + + # Clamp score + score = min(1.0, max(0.0, score)) + + # Determine level and recommendation + if score >= 0.6: + level = "high" + recommendation = "escalate" + elif score >= 0.3: + level = "medium" + recommendation = "notify" + else: + level = "low" + recommendation = "proceed" + + return BlastRadiusEstimate( + score=round(score, 2), + level=level, + factors=factors, + recommendation=recommendation, + ) +``` + +**2. Integrate with `evaluate_and_dispatch()`.** + +Before spawning a maintenance colony, call `estimate_blast_radius()` and +skip dispatch if the recommendation is `"escalate"`: + +```python +# In evaluate_and_dispatch(), before _spawn_maintenance_colony(): +from formicos.surface.self_maintenance import estimate_blast_radius + +estimate = estimate_blast_radius( + task=sc.task, + caste=sc.caste, + max_rounds=sc.max_rounds, + strategy=sc.strategy, + workspace_id=workspace_id, + projections=self._runtime.projections, +) + +if estimate.recommendation == "escalate": + log.info( + "maintenance.blast_radius_escalation", + workspace_id=workspace_id, + category=insight.category, + score=estimate.score, + factors=estimate.factors, + ) + continue # Skip this insight, leave for operator +``` + +For `auto_notify` level, also skip dispatch when recommendation is +`"notify"` — the insight is already surfaced in the briefing. For +`autonomous` level, proceed on `"notify"` but skip on `"escalate"`. + +**3. Make blast radius available to `check_autonomy_budget` tool.** + +When the Queen checks her budget, she can also see the blast radius +estimate for a proposed task. Add an optional `task` parameter to +`check_autonomy_budget`: + +```python +# In the tool spec: +"task": { + "type": "string", + "description": ( + "Optional task description to estimate blast radius" + ), +}, +``` + +If `task` is provided, append the blast radius estimate to the output: + +```python +task_text = inputs.get("task", "") +if task_text: + estimate = estimate_blast_radius( + task=task_text, + workspace_id=workspace_id, + projections=self._runtime.projections, + ) + lines.extend([ + "", + "## Blast Radius Estimate", + f"**Score:** {estimate.score} ({estimate.level})", + f"**Recommendation:** {estimate.recommendation}", + ]) + for factor in estimate.factors: + lines.append(f" - {factor}") +``` + +--- + +## Track 9: Graduated Autonomy Scoring + +### Problem + +The earned autonomy rule in `proactive_intelligence/rules.py` uses simple +thresholds (≥5 follow-throughs, >70% rate). It generates promotion/demotion +insights but has no continuous scoring function. A graduated score would +give the Queen and operator a clearer picture of where trust stands. + +### Implementation + +**1. `compute_autonomy_score()` function in `self_maintenance.py`.** + +A pure function that computes a 0–100 trust score from outcome history and +operator behavior. This is a **read-only computation** — it does not +change autonomy levels. The existing earned autonomy rule remains the +recommendation mechanism. + +```python +@dataclass +class AutonomyScore: + """Graduated autonomy trust score from outcome history.""" + + score: int # 0–100 + grade: str # "A", "B", "C", "D", "F" + components: dict[str, float] # breakdown + recommendation: str # human-readable + + +def compute_autonomy_score( + workspace_id: str, + projections: ProjectionStore, +) -> AutonomyScore: + """Compute graduated autonomy trust score from outcome history. + + Components: + - success_rate (40%): fraction of successful colonies + - volume (20%): log-scaled colony count (caps at 50 colonies) + - cost_efficiency (20%): avg cost vs budget (lower is better) + - operator_trust (20%): follow-through rate minus kill rate + """ + components: dict[str, float] = {} + + # Success rate + outcomes = [ + o for o in projections.colony_outcomes.values() + if o.workspace_id == workspace_id + ] + if not outcomes: + return AutonomyScore( + score=0, + grade="F", + components={"success_rate": 0, "volume": 0, + "cost_efficiency": 0, "operator_trust": 0}, + recommendation="No outcome history. Start with supervised dispatch.", + ) + + successes = sum(1 for o in outcomes if o.succeeded) + success_rate = successes / len(outcomes) + components["success_rate"] = round(success_rate, 2) + + # Volume (log-scaled, caps at 50) + import math + volume = min(1.0, math.log(1 + len(outcomes)) / math.log(51)) + components["volume"] = round(volume, 2) + + # Cost efficiency: avg cost relative to estimated budget + # Lower cost per colony = higher score + avg_cost = sum(o.total_cost for o in outcomes) / len(outcomes) + # Normalize: $0 = 1.0, $1.00 = 0.5, $5.00 = ~0.1 + cost_efficiency = 1.0 / (1.0 + avg_cost * 2) + components["cost_efficiency"] = round(cost_efficiency, 2) + + # Operator trust: follow-through vs kills + behavior = getattr(projections, "operator_behavior", None) + operator_trust = 0.5 # neutral baseline + if behavior is not None: + total_acted = sum(behavior.suggestion_categories_acted_on.values()) + total_kills = len(behavior.kill_records) + total_signals = total_acted + total_kills + if total_signals > 0: + operator_trust = total_acted / total_signals + components["operator_trust"] = round(operator_trust, 2) + + # Weighted score + raw = ( + success_rate * 0.40 + + volume * 0.20 + + cost_efficiency * 0.20 + + operator_trust * 0.20 + ) + score = int(round(raw * 100)) + score = max(0, min(100, score)) + + # Grade + if score >= 80: + grade = "A" + elif score >= 65: + grade = "B" + elif score >= 50: + grade = "C" + elif score >= 35: + grade = "D" + else: + grade = "F" + + # Recommendation + if score >= 80: + recommendation = ( + "Strong track record. Consider promoting to autonomous level." + ) + elif score >= 65: + recommendation = ( + "Good track record. Auto-notify with expanded categories " + "is appropriate." + ) + elif score >= 50: + recommendation = ( + "Mixed results. Auto-notify with limited categories recommended." + ) + elif score >= 35: + recommendation = ( + "Below average. Suggest-only mode recommended until outcomes improve." + ) + else: + recommendation = ( + "Poor track record. Suggest-only mode recommended. Review " + "recent colony failures." + ) + + return AutonomyScore( + score=score, + grade=grade, + components=components, + recommendation=recommendation, + ) +``` + +**2. Include autonomy score in `check_autonomy_budget` output.** + +At the end of the `_check_autonomy_budget` handler, add: + +```python +# Autonomy score +from formicos.surface.self_maintenance import compute_autonomy_score + +auto_score = compute_autonomy_score( + workspace_id, self._runtime.projections, +) +lines.extend([ + "", + "## Autonomy Score", + f"**Score:** {auto_score.score}/100 (Grade: {auto_score.grade})", + f"**Recommendation:** {auto_score.recommendation}", +]) +for component, value in auto_score.components.items(): + lines.append(f" - {component}: {value}") +``` + +This gives the Queen a single tool call that shows budget status, blast +radius estimate (if task provided), and autonomy score — everything she +needs to decide whether to act autonomously or escalate. + +--- + +## Tests + +Create `tests/unit/surface/test_autonomy_guardrails.py`: + +1. `test_estimate_blast_radius_low` — simple task, researcher caste, 2 + rounds → score < 0.3, level "low", recommendation "proceed". + +2. `test_estimate_blast_radius_high` — long task with "delete" + "database" + keywords, coder caste, 8 rounds, stigmergic → score ≥ 0.6, level "high", + recommendation "escalate". + +3. `test_estimate_blast_radius_medium` — moderate task, coder caste, 3 + rounds → score between 0.3 and 0.6, level "medium". + +4. `test_estimate_blast_radius_uses_outcome_history` — mock projections + with low success rate for coder/sequential, assert score increases. + +5. `test_compute_autonomy_score_no_outcomes` — empty outcomes → score 0, + grade "F". + +6. `test_compute_autonomy_score_perfect` — all successes, high volume, low + cost, positive operator trust → score ≥ 80, grade "A". + +7. `test_compute_autonomy_score_mixed` — 50% success, moderate volume → + score in C/D range. + +8. `test_check_autonomy_budget_tool_returns_status` — mock runtime with + policy and dispatcher, call handler, assert output includes budget + remaining and autonomy level. + +9. `test_blast_radius_blocks_dispatch` — mock MaintenanceDispatcher with + a high-risk insight, assert colony is NOT spawned when blast radius + recommends escalation. + +10. `test_daily_budget_exhausted_message` — set daily_spend equal to + budget, call `check_autonomy_budget`, assert "exhausted" message. + +**Test setup pattern:** Mock `Runtime` with projections containing +`ColonyOutcome` entries and operator behavior records. Follow existing +patterns in `tests/unit/surface/` for runtime mocking. + +--- + +## Acceptance Gates + +- [ ] `check_autonomy_budget` Queen tool returns daily budget status +- [ ] Budget output includes remaining amount, active colonies, autonomy level +- [ ] Budget exhaustion shown clearly when daily limit reached +- [ ] `estimate_blast_radius()` returns scored estimate without LLM calls +- [ ] Blast radius uses 6 heuristic factors (task length, caste risk, + rounds, strategy, keywords, outcome history) +- [ ] High blast radius blocks autonomous dispatch in `evaluate_and_dispatch()` +- [ ] `compute_autonomy_score()` returns 0–100 score from outcome history +- [ ] Score includes 4 weighted components (success, volume, efficiency, trust) +- [ ] Autonomy score included in `check_autonomy_budget` output +- [ ] No changes to earned autonomy rule in proactive_intelligence +- [ ] No new event types +- [ ] No projection changes +- [ ] No type changes to `MaintenancePolicy` +- [ ] No frontend changes +- [ ] All tests pass + +## Validation + +```bash +pytest tests/unit/surface/test_autonomy_guardrails.py -v +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_70/wave_70_plan.md b/docs/waves/wave_70/wave_70_plan.md new file mode 100644 index 0000000..5699bd2 --- /dev/null +++ b/docs/waves/wave_70/wave_70_plan.md @@ -0,0 +1,35 @@ +# Wave 70: Superseded By Split Packet + +**Status:** Superseded for dispatch + +The original single-wave Wave 70 packet is no longer the authority. +It has been split into two sequential packets: + +- `docs/waves/wave_70_0/wave_70_0_plan.md` +- `docs/waves/wave_70_5/wave_70_5_plan.md` + +## Why It Was Split + +The original packet mixed two different jobs: + +- backend/control-plane flexibility +- operator-visible product surface wiring + +After Wave 69, shipping major new capabilities with no operator-facing +surface would have felt like a product regression. The split keeps the +backend work sharp and generic in `70.0`, then finishes the trust/visibility +surface in `70.5`. + +## Authority + +Use these docs for dispatch: + +- `docs/waves/wave_70_0/team_a_prompt.md` +- `docs/waves/wave_70_0/team_b_prompt.md` +- `docs/waves/wave_70_0/team_c_prompt.md` +- `docs/waves/wave_70_5/team_a_prompt.md` +- `docs/waves/wave_70_5/team_b_prompt.md` +- `docs/waves/wave_70_5/team_c_prompt.md` + +The original prompts in `docs/waves/wave_70/` are retained only as an +archival first draft. diff --git a/docs/waves/wave_70_0/team_a_prompt.md b/docs/waves/wave_70_0/team_a_prompt.md new file mode 100644 index 0000000..cf32d91 --- /dev/null +++ b/docs/waves/wave_70_0/team_a_prompt.md @@ -0,0 +1,181 @@ +# Wave 70.0 - Team A: MCP Bridge Substrate + +**Theme:** Give the Queen a generic, healthy bridge into external MCP tool +ecosystems without hardcoding special-case routing. + +## Context + +Read `docs/waves/wave_70_0/wave_70_0_plan.md` first. This is a backend packet. +Your job is to land the MCP bridge substrate and the machine-readable health +seams that `70.5` will later surface. + +Read `CLAUDE.md` for hard constraints. + +### Key seams to read before coding + +- `addon_loader.py` — `register_addon()` (line 198), `AddonRegistration` + (line 170, fields: `health_status` property at 188, `tool_call_counts`, + `handler_error_count`), tool wrapper (lines 239–262) +- `queen_tools.py` — `_list_addons()` (lines 4027–4053, already generic — + iterates manifests, no addon-name branching), `_addon_tool_specs` (line 164, + populated from `app.py:787`) +- `queen_runtime.py` — deliberation frame addon coverage (lines 1456–1495) +- `routes/api.py` — `/api/v1/addons` (lines 1295–1344, returns status from + `reg.health_status`, tools with callCount, handlers, triggers, panels, config) +- FastMCP is `>=3.0,<4.0` (pyproject.toml line 13). Server import: + `from fastmcp import FastMCP`. Client: `from fastmcp.client import Client`. + Verify the Client class API against FastMCP 3.x docs — it may differ from + 2.x examples online. + +## Your Files (exclusive ownership) + +- `addons/mcp-bridge/addon.yaml` — **new** +- `src/formicos/addons/mcp_bridge/__init__.py` — **new** +- `src/formicos/addons/mcp_bridge/client.py` — **new** +- `src/formicos/addons/mcp_bridge/discovery.py` — **new** +- `src/formicos/surface/addon_loader.py` +- `src/formicos/surface/queen_tools.py` — `discover_mcp_tools` handler only +- `src/formicos/surface/queen_runtime.py` — deliberation frame addon coverage + section only (lines 1456–1495) +- `src/formicos/surface/routes/api.py` — additive fields in the existing + `/api/v1/addons` handler (lines 1295–1344) only +- `config/caste_recipes.yaml` — tool list only +- `tests/unit/addons/test_mcp_bridge.py` — **new** + +## Do Not Touch + +- frontend files +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/surface/self_maintenance.py` - Team C owns +- project-plan parsing or budget code - Team B owns + +## Overlap Coordination + +- Team B and Team C also add tools to `queen_tools.py` and entries to + `caste_recipes.yaml`. Keep your changes additive. +- Team B and Team C also touch `routes/api.py` to add endpoints. You only own + the existing addon-summary route section, not new endpoint definitions. +- In `queen_runtime.py`, you only touch the addon-coverage part of the + deliberation frame (lines 1456–1495). Team B owns project-plan injection. + Team C does not touch this file. + +--- + +## Track 1: MCP Bridge Addon Core + +### Goal + +Add a new `mcp-bridge` addon that can connect to remote MCP servers and call +their tools through the existing addon/tool infrastructure. + +### Requirements + +- use FastMCP `>=3.0` Client (already in deps — `from fastmcp.client import Client`). + Verify the 3.x Client API before coding; the constructor, `list_tools()`, and + `call_tool()` signatures may differ from 2.x blog posts +- keep the bridge as an addon, not new core architecture +- support multiple configured servers +- cache connections and track per-server health +- degrade gracefully when a server is unavailable + +### Implementation Notes + +- the bridge may store server configuration using the existing addon config + path even if the underlying value is persisted as JSON; `70.5` will provide + a real UI abstraction on top of it +- export one structured helper from `client.py`, for example + `get_bridge_health()`, that returns connection health by server name +- do not bury health only in log text; `70.5` needs machine-readable status + +--- + +## Track 2: Dynamic Tool Discovery + +### Goal + +Let the Queen discover MCP tools without hardcoding them into FormicOS. + +### Requirements + +- add `discover_mcp_tools` to `queen_tools.py` +- use the bridge to list remote tools and expose them in a FormicOS-friendly + way +- if dynamic registration is supported cleanly, use it +- if not, keep the generic proxy path as the safety net + +### Rule + +The bridge must still work if discovery fails. Discovery is a power feature, +not a single point of failure. + +--- + +## Track 3: Generic Bridge Health Exposure + +### Goal + +Make bridge state visible generically to both the Queen and future UI work. + +### Requirements + +**1. `_list_addons()` enhancement** (lines 4027–4053) + +The method is already generic — it iterates manifests without name checks. +Do **not** add `if addon_name == "mcp-bridge"` branching. Instead: + +- define a capability-based protocol: if an addon registration exposes a + `get_bridge_health` callable (or similar) in its `runtime_context`, include + a short health summary in `_list_addons()` text +- keep the logic capability-based, not addon-name-based + +**2. Deliberation frame** (lines 1456–1495 in `queen_runtime.py`) + +In the addon coverage section of the Queen deliberation frame, surface MCP +bridge status in a compact form when available: + +- connected server count +- unhealthy server count +- discovered remote tool count if known + +This is for Queen reasoning, not UI polish. + +**3. `/api/v1/addons` summary** (lines 1295–1344 in `routes/api.py`) + +The endpoint already returns per-addon: name, version, description, status, +lastError, tools (with callCount), handlers, triggers, panels, config. +Expand the payload additively so `70.5` can consume bridge health: + +- add a `bridgeHealth` (or equivalent) structured field when an addon + exposes bridge health through the capability protocol above +- no hardcoded UI-specific formatting + +## Tests + +Create `tests/unit/addons/test_mcp_bridge.py` with at least: + +1. bridge connects to configured server +2. bridge health reports disconnected/error states cleanly +3. discovery handles unavailable server gracefully +4. `_list_addons()` includes generic bridge health text without name-based branching +5. addon summary payload exposes bridge health additively when available + +## Acceptance Gates + +- [ ] `mcp-bridge` addon exists and registers cleanly +- [ ] remote tool calls work through the addon path +- [ ] `discover_mcp_tools` lands as an additive Queen tool +- [ ] bridge health is structured and reusable +- [ ] `_list_addons()` uses generic logic, not `addon_name == "mcp-bridge"` +- [ ] deliberation frame can see MCP bridge status +- [ ] `/api/v1/addons` exposes bridge health additively +- [ ] no frontend changes +- [ ] no new event types + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +pytest tests/unit/addons/test_mcp_bridge.py -v +``` diff --git a/docs/waves/wave_70_0/team_b_prompt.md b/docs/waves/wave_70_0/team_b_prompt.md new file mode 100644 index 0000000..0549122 --- /dev/null +++ b/docs/waves/wave_70_0/team_b_prompt.md @@ -0,0 +1,229 @@ +# Wave 70.0 - Team B: Project Intelligence Substrate + +**Theme:** Give the Queen a project-wide planning substrate that spans threads +and has its own stable parser, endpoint, and context budget. + +## Context + +Read `docs/waves/wave_70_0/wave_70_0_plan.md` first. This is backend work. +Do not build the UI card here; build the contracts that `70.5` will render. + +Read `CLAUDE.md` for hard constraints. + +### Key seams to read before coding + +- `queen_budget.py` — current 6 slots: `system_prompt` 15%, `memory_retrieval` + 15%, `project_context` 10%, `thread_context` 20%, `tool_memory` 10%, + `conversation_history` 30%. Fallbacks: 2000/1500/500/1500/4000/6000 tokens. + `QueenContextBudget` is a frozen dataclass (line 44). No `project_plan` + slot exists yet. +- `queen_runtime.py` `respond()` injection points (in order): + - memory retrieval (lines 903–920, budget: `memory_retrieval`) + - project context (lines 931–953, budget: `project_context` at line 939) + - session summary (lines 955–983, **hardcoded** 4000 chars — not yet + budget-backed) + - thread context (lines 985–996, budget: `thread_context`) + - briefing (lines 998–1065, summary-capped) + - deliberation frame (lines 1069–1092, budget: `thread_context`) + - Plan file in `_build_thread_context()` (line 1779, **hardcoded** 2000 + chars — not yet budget-backed) +- `queen_tools.py` — `_propose_plan()` (line 3131), `_mark_plan_step()` + (line 3292), `_STEP_RE` regex (line 3288: + `r"^- \[(\d+)\] \[(\w+)\] (.*)$"`). Plan file path: + `{data_dir}/.formicos/plans/{thread_id}.md` +- `routes/api.py` — thread plan endpoint already exists at line 1715: + `GET /api/v1/workspaces/{workspace_id}/threads/{thread_id}/plan`. + No project-plan endpoint exists yet. +- `docs/decisions/051-dynamic-context-caps.md` — documents current 6-slot + structure. Must be updated to reflect the new 7-slot structure. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/project_plan.py` — **new**, shared parser/helper +- `src/formicos/surface/queen_tools.py` — `propose_project_milestone` and + `complete_project_milestone` handlers only (add to handler registry near + line 198 and tool spec list before `*self._addon_tool_specs` at line 1411) +- `src/formicos/surface/queen_runtime.py` — project-plan injection block only. + Insert immediately after the project context block (lines 931–953), before + the session summary block (lines 955–983). Team A owns the deliberation + frame section (lines 1456–1495); do not touch it. +- `src/formicos/surface/queen_budget.py` — add `project_plan` slot +- `src/formicos/surface/routes/api.py` — `GET /api/v1/project-plan` only + (add to the route table near the existing workspace endpoints, lines + 1600–1720) +- `docs/decisions/051-dynamic-context-caps.md` — update to match new slots +- `config/caste_recipes.yaml` — append tool names to Queen tools array + (line 207) +- `tests/unit/surface/test_project_plan.py` — **new** + +## Do Not Touch + +- frontend files +- `src/formicos/surface/addon_loader.py` - Team A owns +- `src/formicos/surface/self_maintenance.py` - Team C owns +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` + +## Overlap Coordination + +- Team A and Team C also add tools to `queen_tools.py`. Keep your tool changes + additive and self-contained. +- Team A and Team C also touch `routes/api.py`. You only own the project-plan + endpoint section. +- In `queen_runtime.py`, you own project-plan injection only (insert between + lines 953–955). Team A owns the deliberation frame addon coverage section + (lines 1456–1495). Team C does not touch this file. + +--- + +## Track 4: Shared Project Plan Helper + +### Goal + +Stop duplicating plan-file parsing logic across tools, runtime injection, and +API routes. + +### Implementation + +Create `src/formicos/surface/project_plan.py` as the single source of truth +for: + +- resolving the project plan path +- parsing markdown into structured milestones +- rendering parsed plan back into compact Queen context text +- updating `Updated:` timestamps consistently + +Suggested return shape: + +```python +{ + "exists": True, + "goal": "...", + "updated": "...", + "milestones": [ + { + "index": 0, + "status": "completed", + "description": "...", + "thread_id": "...", + "completed_at": "...", + "note": "...", + } + ], +} +``` + +Use this helper everywhere in this track. No duplicated regex parsing in +multiple files. + +--- + +## Track 5: Milestone Tools + Read Endpoint + Budget Slot + +### Requirements + +**1. Two explicit Queen tools** + +Use explicit names: + +- `propose_project_milestone` +- `complete_project_milestone` + +Avoid the ambiguous `complete_milestone` name. We already have other +step/milestone concepts in the system. + +These tools should: + +- create `.formicos/project_plan.md` if needed +- append/update milestones through the shared helper +- stamp the active `thread_id` when relevant +- keep the file append-only in spirit, even if the markdown file is rewritten + +**2. `GET /api/v1/project-plan`** + +Add a read endpoint that returns structured JSON from the shared helper. + +Why this endpoint exists in `70.0`: + +- `70.5` must not parse markdown in the browser +- `70.5` project-plan UI should be almost pure rendering work + +**3. Dedicated context budget** + +The project plan must **not** share the `project_context` slot. + +Update `queen_budget.py` so the Queen gets a dedicated `project_plan` +allocation. The current slots sum to 1.0 across 6 fields. Adding a 7th +requires rebalancing. Recommended split (changes marked): + +- `system_prompt`: 15% (unchanged) +- `memory_retrieval`: 15% (unchanged) +- `project_context`: 10% (unchanged) +- `project_plan`: 5% (**new**) +- `thread_context`: 15% (**was 20% — reduced by 5%**) +- `tool_memory`: 10% (unchanged) +- `conversation_history`: 30% (unchanged) + +This trades 5% of thread context for the project plan slot. Thread context +still gets the `max(fallback=1500, proportional)` guarantee, so on large +context windows the absolute allocation stays high. + +Add the new field to `QueenContextBudget` (frozen dataclass, line 44), +`_FRACTIONS` (line 24), and `_FALLBACKS` (line 33). Recommended fallback +floor for `project_plan`: 400 tokens. + +Keep the `max(fallback, proportional)` rule. + +Update ADR-051 so the budget doc matches the code truth. + +--- + +## Track 6: Project Plan Injection + +### Goal + +Make the Queen project-aware on startup and in new conversations. + +### Requirements + +- inject the parsed project plan into Queen context as its own system message + block, following the same insertion pattern as project context (lines 931–953) +- cap it with the dedicated `project_plan` budget: + `[:budget.project_plan * 4]` (chars-per-token ratio matches existing usage) +- use the shared helper to render the compact context form +- keep this separate from `project_context.md` and separate from thread plans +- label the injected block `# Project Plan (cross-thread)` so the Queen + knows this spans threads + +This is additional context, not a replacement for the workspace project +context file. + +## Tests + +Create `tests/unit/surface/test_project_plan.py` with at least: + +1. parser returns structured milestones from markdown +2. milestone tools create/update the plan file correctly +3. `GET /api/v1/project-plan` returns helper-derived JSON +4. malformed markdown is handled gracefully +5. Queen budget includes a dedicated `project_plan` slot +6. project-plan injection uses the project-plan budget, not `project_context` + +## Acceptance Gates + +- [ ] `project_plan.py` exists as the single parser/helper +- [ ] milestone tools use explicit project-plan names +- [ ] `GET /api/v1/project-plan` returns structured JSON +- [ ] project plan has its own Queen context budget +- [ ] ADR-051 is updated to match the new slot structure +- [ ] project-plan injection is separate from `project_context.md` +- [ ] no frontend changes +- [ ] no new event types + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +pytest tests/unit/surface/test_project_plan.py -v +``` diff --git a/docs/waves/wave_70_0/team_c_prompt.md b/docs/waves/wave_70_0/team_c_prompt.md new file mode 100644 index 0000000..9e95fa9 --- /dev/null +++ b/docs/waves/wave_70_0/team_c_prompt.md @@ -0,0 +1,212 @@ +# Wave 70.0 - Team C: Autonomy Trust Substrate + +**Theme:** Give the Queen honest daily-budget, blast-radius, and earned-trust +contracts so `70.5` can render autonomy as something the operator can inspect. + +## Context + +Read `docs/waves/wave_70_0/wave_70_0_plan.md` first. This packet is backend. +Do not build the settings card or proposal UI here; land the trust contracts. + +Read `CLAUDE.md` for hard constraints. + +### Key seams to read before coding + +- `self_maintenance.py` — `MaintenanceDispatcher.__init__()` (line 52), + `evaluate_and_dispatch()` (line 57), `_daily_spend` dict (line 54, + workspace_id→USD), `_reset_daily_budget_if_needed()` (line 243, UTC + midnight reset), `_get_policy()` (line 213, reads from + `ws.config["maintenance_policy"]`), `_count_active_maintenance_colonies()` + (line 229), `_spawn_maintenance_colony()` (line 250). Per-caste cost + estimates at line 38: researcher=$0.08, archivist=$0.05, coder=$0.12. +- `core/types.py` — `AutonomyLevel` (line 851: suggest/auto_notify/autonomous), + `MaintenancePolicy` (line 857: `autonomy_level`, `auto_actions: list[str]`, + `max_maintenance_colonies: int = 2`, `daily_maintenance_budget: float = 1.0`) +- `projections.py` — `ColonyOutcome` (line 89: `succeeded`, `total_rounds`, + `total_cost`, `quality_score`, `strategy`, `caste_composition`, 18 fields + total), `outcome_stats()` (line 736: returns `[{strategy, caste_mix, total, + success_rate, avg_rounds, avg_cost}]`), `OperatorBehaviorProjection` (line + 170: `suggestion_categories_acted_on`, `kill_records`, `feedback_by_domain`, + `kills_by_strategy`) +- `proactive_intelligence/rules.py` — `_rule_earned_autonomy()` (line 1237): + read-only, promotion ≥5 follow-throughs, demotion ≥3 kills or >50% negative + feedback, 7-day cooldown. Stays untouched. +- `queen_tools.py` — no existing autonomy/budget Queen tool. Proposal metadata + is built at line 3241 (the `action` dict in `_propose_plan()`). Blast-radius + truth should be attached here, not in `queen_runtime.py`. +- `routes/api.py` — existing budget endpoint: + `GET /api/v1/workspaces/{id}/budget` (line 959). No autonomy-status endpoint + exists yet. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/self_maintenance.py` — blast radius estimator, + autonomy scoring, dispatch gate integration +- `src/formicos/surface/queen_tools.py` — `check_autonomy_budget` handler, + blast-radius metadata on `_propose_plan()` action dict +- `src/formicos/surface/routes/api.py` — + `GET /api/v1/workspaces/{id}/autonomy-status` only +- `config/caste_recipes.yaml` — tool list only +- `tests/unit/surface/test_autonomy_guardrails.py` — **new** + +## Do Not Touch + +- `src/formicos/surface/queen_runtime.py` — Team A owns addon coverage, + Team B owns project-plan injection. You have no changes here. +- frontend files +- `src/formicos/surface/addon_loader.py` — Team A owns +- `src/formicos/surface/project_plan.py` and `queen_budget.py` — Team B owns +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- `src/formicos/core/types.py` +- `src/formicos/addons/proactive_intelligence/rules.py` — the earned + autonomy rule stays untouched + +## Overlap Coordination + +- Team A and Team B also add tools to `queen_tools.py`. Keep your additions + additive and scoped to autonomy. +- Team A and Team B also touch `routes/api.py`. You only own the + `autonomy-status` endpoint section. + +--- + +## Track 7: Daily Autonomy Budget Truth + +### Goal + +Let the Queen and future UI see a stable daily autonomy budget contract. + +### Requirements + +- add `check_autonomy_budget` as an additive Queen tool +- expose daily spend, cap, remaining budget, and active autonomous work +- reuse existing maintenance/budget infrastructure rather than replacing it + +The tool is for Queen introspection; `70.5` will use the endpoint below for UI. + +--- + +## Track 8: Blast Radius + Proposal Metadata + +### Goal + +Make autonomous risk machine-readable before the UI exists. + +### Requirements + +Add a deterministic blast-radius estimator in `self_maintenance.py` with: + +- numeric score +- level (`low` / `medium` / `high`) +- factors list +- recommendation (`proceed` / `notify` / `escalate`) + +Then attach blast-radius truth to proposal metadata. The attachment point +is the `action` dict in `_propose_plan()` (queen_tools.py line 3241): + +```python +action: dict[str, Any] = { + "tool": "propose_plan", + "render": "proposal_card", + "proposal": proposal, + # Wave 70: add blast-radius truth for 70.5 rendering + "blast_radius": { ... }, +} +``` + +Also integrate blast radius into `evaluate_and_dispatch()` as a dispatch +gate: skip colonies where the estimator recommends `"escalate"`. For +`auto_notify` level, also skip on `"notify"`. For `autonomous` level, +proceed on `"notify"` but skip on `"escalate"`. + +`70.5` should not need to recompute anything in the browser. + +Use additive metadata fields only. No new events. + +--- + +## Track 9: Autonomy Score + Status Endpoint + +### Goal + +Turn earned autonomy from an internal recommendation into a stable read +contract. + +### Requirements + +**1. Scoring** + +Compute an autonomy score from replay-derived/workspace-derived history using: + +- success rate +- follow-through count +- operator interrupts / kills +- recent budget behavior +- maybe recent quality trend if already easy to access + +Keep it deterministic and inspectable. Return components, not just one number. + +**2. Endpoint** + +Add: + +```text +GET /api/v1/workspaces/{id}/autonomy-status +``` + +Suggested shape: + +```json +{ + "level": "auto_notify", + "score": 74, + "grade": "B", + "daily_budget": 5.0, + "daily_spend": 1.6, + "remaining": 3.4, + "components": { + "follow_through": 0.8, + "success_rate": 0.72, + "budget_discipline": 0.9, + "operator_interrupt_rate": 0.15 + }, + "recent_actions": [ + { + "task": "...", + "blast_radius": 0.28, + "recommendation": "proceed", + "outcome": "completed" + } + ] +} +``` + +This endpoint is the `70.5` autonomy card contract. + +## Tests + +Create `tests/unit/surface/test_autonomy_guardrails.py` with at least: + +1. `check_autonomy_budget` returns stable budget truth +2. blast-radius estimator produces expected levels/factors on representative tasks +3. autonomy score is deterministic from mocked outcome history +4. `GET /api/v1/workspaces/{id}/autonomy-status` returns the expected shape +5. proposal metadata carries blast-radius truth additively + +## Acceptance Gates + +- [ ] `check_autonomy_budget` lands as an additive Queen tool +- [ ] blast radius is deterministic and structured +- [ ] proposal metadata carries blast-radius/autonomy truth for `70.5` +- [ ] autonomy scoring is structured, not opaque +- [ ] `GET /api/v1/workspaces/{id}/autonomy-status` exists and returns stable JSON +- [ ] no frontend changes +- [ ] no new event types + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +pytest tests/unit/surface/test_autonomy_guardrails.py -v +``` diff --git a/docs/waves/wave_70_0/wave_70_0_plan.md b/docs/waves/wave_70_0/wave_70_0_plan.md new file mode 100644 index 0000000..48c3ef3 --- /dev/null +++ b/docs/waves/wave_70_0/wave_70_0_plan.md @@ -0,0 +1,131 @@ +# Wave 70.0: Operational Flexibility + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 69 +**Theme:** Land the backend/control-plane substrate for MCP access, +project-level intelligence, and earned autonomy without forcing half-finished +UI into the same merge window. + +## Packet Authority + +This file is the dispatch overview. The prompts are the authority for +implementation detail: + +- `docs/waves/wave_70_0/team_a_prompt.md` +- `docs/waves/wave_70_0/team_b_prompt.md` +- `docs/waves/wave_70_0/team_c_prompt.md` + +## Locked Boundaries + +- No new event types. No changes to the closed event union. +- No new projection fields. +- No retrieval/scoring math changes except the Queen context budget carve-out + needed for project-plan injection. +- No operator-surface buildout in this packet beyond additive REST/status/meta + contracts that `70.5` will consume. +- Every new capability must land with a stable machine-readable seam: + endpoint, metadata payload, or addon summary field. `70.5` should not need + to parse backend internals. + +## Scope + +| Track | Outcome | Team | +|------|---------|------| +| 1 | MCP bridge addon core + remote tool calls | A | +| 2 | Dynamic MCP tool discovery + `discover_mcp_tools` | A | +| 3 | Generic bridge health exposure for Queen/addon summaries | A | +| 4 | Project plan parser/helper + milestone tools | B | +| 5 | `GET /api/v1/project-plan` + dedicated `project_plan` budget slot | B | +| 6 | Project-plan injection for new conversations | B | +| 7 | Daily autonomy budget visibility + `check_autonomy_budget` | C | +| 8 | Blast radius estimator + proposal metadata | C | +| 9 | Autonomy scoring + `GET /api/v1/workspaces/{id}/autonomy-status` | C | + +## Team Missions + +### Team A - MCP Bridge Substrate + +Own the external-tool seam: + +- bridge remote MCP servers into the addon system +- expose discovered tools to the Queen cleanly +- report bridge health generically, without hardcoded addon-name checks +- make bridge status visible through machine-readable backend seams that + `70.5` can consume + +### Team B - Project Intelligence Substrate + +Own the project-wide planning seam: + +- store and update one project plan per data root +- parse it from a single shared helper, not ad hoc regexes everywhere +- give the Queen a dedicated project-plan context budget +- expose a read endpoint that `70.5` can render directly + +### Team C - Autonomy Trust Substrate + +Own the autonomy/trust seam: + +- daily budget truth +- blast radius estimation +- graduated autonomy scoring +- stable proposal/status metadata and one read endpoint for `70.5` + +## Merge Order + +All three teams can develop in parallel. Recommended merge order: + +1. Team B +2. Team C +3. Team A + +Why: + +- Team B changes the Queen context budget contract and project-plan endpoint. +- Team C adds the autonomy-status contract and proposal metadata. +- Team A is backend-heavy but mostly orthogonal, and its addon-summary/health + shaping can merge last once the core runtime contracts are stable. + +## Shared Seams + +- `src/formicos/surface/queen_tools.py` is shared by all three teams: + additive tool handlers only. Each team adds handlers to `_handlers` dict + (near line 198) and tool specs before `*self._addon_tool_specs` (line 1411). + All additions are self-contained — no team modifies another team's handlers. +- `src/formicos/surface/routes/api.py` is shared by all three teams: + additive route sections only. Team A expands the existing `/api/v1/addons` + handler. Teams B and C add new endpoints to the route table (lines 1600–1720). +- `src/formicos/surface/queen_runtime.py` is shared by Teams A and B only: + Team A owns the deliberation frame addon coverage section (lines 1456–1495), + Team B owns the project-plan injection block (insert between lines 953–955). + Team C does not touch this file. +- `config/caste_recipes.yaml` is shared by all three teams: + tool list only. Append new tool names to the Queen tools array (line 207). + No system-prompt rewrite in this packet. Merge order matters — last team + to merge should verify all tools are present and the count is correct. + +## Acceptance Focus + +- no hardcoded `if addon_name == "mcp-bridge"` routing +- bridge health exposed through reusable backend seams +- project plan has a dedicated Queen context budget, not shared with + `project_context.md` +- `GET /api/v1/project-plan` returns structured data from a shared parser +- `GET /api/v1/workspaces/{id}/autonomy-status` returns structured trust data +- proposal metadata carries blast-radius/autonomy truth for `70.5` +- no operator-surface work required to validate the backend packet + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 70.0 succeeds if `70.5` can be mostly frontend work: + +- no UI needs to parse markdown files directly +- no UI needs to inspect runtime internals directly +- no UI needs hardcoded addon-name heuristics for MCP health +- all three new capabilities are exposed through stable contracts diff --git a/docs/waves/wave_70_5/team_a_prompt.md b/docs/waves/wave_70_5/team_a_prompt.md new file mode 100644 index 0000000..13fd7a2 --- /dev/null +++ b/docs/waves/wave_70_5/team_a_prompt.md @@ -0,0 +1,114 @@ +# Wave 70.5 - Team A: MCP Settings UX + +**Theme:** Turn the 70.0 MCP bridge contract into a usable operator surface. + +## Context + +Read `docs/waves/wave_70_5/wave_70_5_plan.md` first. Assume `70.0` landed: + +- `/api/v1/addons` can expose bridge health additively +- addon config routes exist +- MCP bridge stores server config through the addon config path + +You are building the leaf UI component only. Team C owns `settings-view.ts`. + +### Key seams to read before coding + +- `settings-view.ts` — card sections use `.settings-card` class + (`background: var(--v-surface)`, `border: 1px solid var(--v-border)`, + `border-radius: 10px`, `padding: 16px 20px`). Each section is a private + `_render*Card()` method mounted in `render()`. Team C will mount your + card; you just export a self-contained ``. +- `addons-view.ts` — read-only addon status/trigger view (lines 1–221). + Not the right place for MCP config. Your card goes in settings. +- `routes/api.py` — addon config: `GET /api/v1/addons/mcp-bridge/config?workspace_id=...` + and `PUT /api/v1/addons/{addon_name}/config` (line 1664). Addon summary: + `GET /api/v1/addons` (line 1295). +- `styles/shared.ts` — Void Protocol tokens: `--v-surface`, `--v-border`, + `--v-fg`, `--v-fg-muted`, `--v-accent`, `--v-success`, `--v-danger`, + `--f-mono`. Glass card: `.glass` class (line 82). + +## Your Files (exclusive ownership) + +- `frontend/src/components/mcp-servers-card.ts` — **new** +- `frontend/src/styles/shared.ts` or atoms only if a tiny additive style atom is needed +- `frontend/src/components/addons-view.ts` only if a small deep-link/help affordance is needed + +## Do Not Touch + +- `frontend/src/components/settings-view.ts` - Team C owns +- `frontend/src/components/queen-overview.ts` - Team B owns +- `frontend/src/components/proposal-card.ts` - Team C owns +- backend files unless a tiny addon-summary payload expansion is truly required + +## Overlap Coordination + +- Team C will mount `` inside settings. +- Keep the component self-contained: let it fetch its own data rather than + requiring shared store changes. + +--- + +## Goal + +Build a real MCP server management card so the operator never has to type raw +JSON to configure bridge servers. + +## Requirements + +### Data sources + +Use existing `70.0` contracts: + +- `GET /api/v1/addons` +- `GET /api/v1/addons/mcp-bridge/config?workspace_id=...` +- `PUT /api/v1/addons/mcp-bridge/config` + +If bridge health is present in addon summaries, render it. If not, fall back +to config-only presentation without inventing runtime inspection in the browser. + +### UI + +Create `fc-mcp-servers-card` as a glass-card section that renders: + +- connected servers list +- server name +- URL +- health dot/status +- discovered tool count if available +- last connected / last error if available +- Add Server inline form +- Disconnect / Remove actions + +### Write behavior + +The UI may still persist the server list through the addon config route, but it +must present it as structured fields: + +- `name` +- `url` +- optional transport/options if supported + +No raw JSON textarea. + +### Empty states + +- no bridge installed +- bridge installed but no servers configured +- server configured but unhealthy + +## Acceptance Gates + +- [ ] `fc-mcp-servers-card` exists as a self-contained component +- [ ] no raw JSON entry in the operator UI +- [ ] reads from existing addon/bridge contracts +- [ ] writes through existing addon config route +- [ ] health is shown honestly when available +- [ ] component is ready for Team C to mount without store work + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_70_5/team_b_prompt.md b/docs/waves/wave_70_5/team_b_prompt.md new file mode 100644 index 0000000..62b59d9 --- /dev/null +++ b/docs/waves/wave_70_5/team_b_prompt.md @@ -0,0 +1,87 @@ +# Wave 70.5 - Team B: Project Visibility + +**Theme:** Make the project plan visible at the workspace level without asking +the operator to open a file or a thread. + +## Context + +Read `docs/waves/wave_70_5/wave_70_5_plan.md` first. Assume `70.0` landed +`GET /api/v1/project-plan`. + +You are building a pure rendering slice. Do not parse markdown in the browser. + +### Key seams to read before coding + +- `queen-overview.ts` — already has plan card infrastructure: + `_renderActivePlans()` (lines 511–570) renders per-thread plans with + group progress bars. Your project-plan card is workspace-scoped (not + thread-scoped) and should mount separately, near the budget panel + (around lines 210–220). Existing card mount pattern: `.glass` cards in + grid containers. +- `styles/shared.ts` — `.glass` class (line 82), Void Protocol tokens. + +## Your Files (exclusive ownership) + +- `frontend/src/components/project-plan-card.ts` — **new** +- `frontend/src/components/queen-overview.ts` — mount point only + +## Do Not Touch + +- `frontend/src/components/settings-view.ts` — Team C owns +- `frontend/src/components/proposal-card.ts` — Team C owns +- backend files + +## Goal + +Add a compact Project Plan card to the Queen overview. This is distinct from +the existing per-thread plan rendering in `_renderActivePlans()`. + +## Requirements + +### Data source + +Fetch: + +```text +GET /api/v1/project-plan +``` + +Use the response as-is. No frontend parsing, no inferred state. + +### UI + +Create `fc-project-plan-card` and render: + +- plan goal +- updated timestamp +- milestone checklist +- status chips (`pending`, `active`, `completed`) +- thread links when present +- completion dates / notes when present + +Mount it in `queen-overview.ts`. + +### Visibility rules + +- hide the card entirely when no project plan exists +- keep the card compact and dashboard-like, not full-document rendering + +### Empty/error states + +- no project plan +- endpoint unavailable + +## Acceptance Gates + +- [ ] `fc-project-plan-card` exists +- [ ] `queen-overview.ts` mounts it +- [ ] data comes only from `GET /api/v1/project-plan` +- [ ] no frontend markdown parsing +- [ ] milestone status/thread/date truth is preserved + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_70_5/team_c_prompt.md b/docs/waves/wave_70_5/team_c_prompt.md new file mode 100644 index 0000000..2264ad2 --- /dev/null +++ b/docs/waves/wave_70_5/team_c_prompt.md @@ -0,0 +1,131 @@ +# Wave 70.5 - Team C: Trust Integration + +**Theme:** Integrate the new trust surfaces into existing product UI with one +owner for settings and one owner for proposal-card truth. + +## Context + +Read `docs/waves/wave_70_5/wave_70_5_plan.md` first. Assume `70.0` landed: + +- `GET /api/v1/workspaces/{id}/autonomy-status` +- proposal metadata can carry blast-radius truth +- Team A provides `` + +You own the integration surfaces: + +- `settings-view.ts` +- `proposal-card.ts` + +### Key seams to read before coding + +- `settings-view.ts` — 6 existing card sections, each a `_render*Card()` + method mounted at lines 391–408. Mount your new cards after the existing + ones. Card class: `.settings-card` (lines 30–36). Data fetching in + `connectedCallback()` (line 218). +- `proposal-card.ts` — `ProposalData` type (types.ts line 236: `summary`, + `options`, `questions?`, `recommendation?`). Render structure: summary → + options → questions → recommendation → bottom-actions. Blast-radius + section goes between recommendation (line 178) and bottom-actions (line + 180). If `action.blast_radius` is absent, render unchanged. +- `types.ts` — extend `ProposalData` or add a sibling `BlastRadiusData` + interface for the metadata that `70.0` Team C attaches. +- `system-overview.ts` — compact one-line summary (line 59). Queen tool + count is **hardcoded at 38** (line 34). After 70.0 adds 3+ new tools, + this needs updating — either hardcode the new count or compute it + dynamically from the model registry. + +## Your Files (exclusive ownership) + +- `frontend/src/components/autonomy-card.ts` — **new** +- `frontend/src/components/settings-view.ts` +- `frontend/src/components/proposal-card.ts` +- `frontend/src/components/system-overview.ts` — tool count update +- `frontend/src/types.ts` — additive type additions only + +## Do Not Touch + +- `frontend/src/components/mcp-servers-card.ts` — Team A owns +- `frontend/src/components/project-plan-card.ts` and `queen-overview.ts` — Team B owns +- backend files unless a tiny presentational payload shim is absolutely required + +## Overlap Coordination + +- Team A delivers a self-contained MCP card. You mount it; you do not rewrite it. +- Keep settings integration centralized here so `settings-view.ts` has one owner. + +--- + +## Track 3: Autonomy Card + Settings Integration + +### Goal + +Make autonomy visible and trustworthy from the settings surface. + +### Requirements + +Create `fc-autonomy-card` that fetches: + +```text +GET /api/v1/workspaces/{id}/autonomy-status +``` + +Render: + +- autonomy level +- trust score (0-100) plus grade +- daily budget/spend/remaining +- recent autonomous actions table (last 5) +- blast radius score/recommendation when available in recent actions + +Then mount both: + +- `` +- `` + +inside `settings-view.ts`. + +### Settings rules + +- do not create a new view +- keep the existing settings layout language from Wave 69 +- present autonomy as inspectable truth, not marketing copy + +--- + +## Track 4: Proposal-Card Blast Radius + +### Goal + +When proposal metadata includes blast-radius/autonomy truth, render it inline +where the operator makes the decision. + +### Requirements + +Enhance `proposal-card.ts` so that when metadata includes blast-radius fields, +the card shows: + +- blast radius score +- level (`low` / `medium` / `high`) +- top factors +- recommendation (`proceed` / `notify` / `escalate`) + +This is additive. If the metadata is absent, render the existing card unchanged. + +## Acceptance Gates + +- [ ] `fc-autonomy-card` exists and is self-contained +- [ ] `settings-view.ts` mounts both the MCP and autonomy cards +- [ ] `proposal-card.ts` renders blast-radius truth when `action.blast_radius` present +- [ ] `proposal-card.ts` renders unchanged when blast-radius absent +- [ ] `system-overview.ts` tool count reflects new 70.0 tools +- [ ] `types.ts` extended with blast-radius type (additive only) +- [ ] no new nav/view sprawl +- [ ] one owner for `settings-view.ts` +- [ ] trust data comes from `70.0` contracts, not browser recomputation + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_70_5/wave_70_5_plan.md b/docs/waves/wave_70_5/wave_70_5_plan.md new file mode 100644 index 0000000..d925d75 --- /dev/null +++ b/docs/waves/wave_70_5/wave_70_5_plan.md @@ -0,0 +1,100 @@ +# Wave 70.5: Operator Trust Surface + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 70.0 +**Theme:** Finish the UI wiring for the 70.0 contracts so MCP access, +project intelligence, and autonomy are visible, editable where appropriate, +and trustworthy to the operator. + +## Packet Authority + +This file is the dispatch overview. The prompts are the authority for +implementation detail: + +- `docs/waves/wave_70_5/team_a_prompt.md` +- `docs/waves/wave_70_5/team_b_prompt.md` +- `docs/waves/wave_70_5/team_c_prompt.md` + +## Locked Boundaries + +- No new event types. +- No new projection fields. +- No new core autonomy/MCP/project-plan logic in this packet unless a tiny + payload expansion is absolutely required for rendering. +- No new nav surfaces. Use existing settings, overview, and proposal surfaces. +- One owner for `settings-view.ts`: Team C. + +## Scope + +| Track | Outcome | Team | +|------|---------|------| +| 1 | MCP Servers card and config UX | A | +| 2 | Project Plan overview card | B | +| 3 | Autonomy card + settings integration | C | +| 4 | Proposal-card blast-radius rendering | C | + +## Team Missions + +### Team A - MCP Settings UX + +Own the MCP server management leaf component. Build a real form-based surface +over the `70.0` bridge/config contracts. Do not own `settings-view.ts`. + +### Team B - Project Visibility + +Own the workspace-level project-plan card in the Queen overview. Use the +`70.0` endpoint; do not re-implement parsing in the browser. + +### Team C - Trust Integration + +Own the settings integration and proposal-card truth surface: + +- mount Team A's MCP card +- mount the autonomy card +- render blast-radius truth in proposal cards + +## Merge Order + +Recommended merge order: + +1. Team A +2. Team B +3. Team C + +Why: + +- Team A provides the MCP card that Team C mounts. +- Team B is independent. +- Team C is the integrator for `settings-view.ts` and proposal-card polish. + +## Known Housekeeping + +- `system-overview.ts` hardcodes the Queen tool count at 38 (line 34). + Wave 70.0 adds at least 3 new tools. Team C owns this file and must + update the count (or compute it dynamically). + +## Acceptance Focus + +- no raw JSON MCP configuration in the operator UI +- no frontend markdown parsing of project plans +- autonomy/trust shown from `70.0` endpoint data, not recomputed in the browser +- `settings-view.ts` has one owner (Team C) +- every `70.0` capability has an operator-visible seam in `70.5` +- `types.ts` additions are additive only (Team C owns blast-radius type) + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 70.5 succeeds if the operator can: + +- connect and inspect MCP servers without typing raw JSON +- see the workspace project plan from the overview surface +- inspect autonomy level, trust score, and budget remaining in settings +- understand blast radius directly on proposal cards diff --git a/docs/waves/wave_71/wave_71_plan.md b/docs/waves/wave_71/wave_71_plan.md new file mode 100644 index 0000000..7874103 --- /dev/null +++ b/docs/waves/wave_71/wave_71_plan.md @@ -0,0 +1,51 @@ +# Wave 71: The Operational Loop + +**Status:** Split dispatch packet +**Predecessor:** Wave 70.0 + 70.5 + +Wave 71 is split into two sequential packets: + +- `docs/waves/wave_71_0/wave_71_0_plan.md` +- `docs/waves/wave_71_5/wave_71_5_plan.md` + +## Why It Is Split + +This wave is really two jobs: + +- give the Queen durable, multi-day operational coherence +- make that operational state legible and steerable for the operator + +`71.0` lands the backend/control-plane substrate first: + +- Queen journal +- operating procedures +- durable action queue +- scheduled operational sweeps +- cross-artifact continuation/sync summary + +`71.5` then gives those capabilities a real home in the product surface: + +- dedicated Operations tab +- action inbox and review controls +- journal panel +- procedures editor + +## Authority + +Use these docs for dispatch: + +- `docs/waves/wave_71_0/design_note.md` +- `docs/waves/wave_71_0/team_a_prompt.md` +- `docs/waves/wave_71_0/team_b_prompt.md` +- `docs/waves/wave_71_0/team_c_prompt.md` +- `docs/waves/wave_71_5/team_a_prompt.md` +- `docs/waves/wave_71_5/team_b_prompt.md` +- `docs/waves/wave_71_5/team_c_prompt.md` + +## Thesis + +Wave 71 makes autonomy operational rather than episodic. The Queen should be +able to pick up a project after hours or days away, explain what she has been +doing, show what is waiting on the operator, and follow workspace-specific +operating procedures without smearing operational state into +`memory_entries`. diff --git a/docs/waves/wave_71_0/design_note.md b/docs/waves/wave_71_0/design_note.md new file mode 100644 index 0000000..30291c3 --- /dev/null +++ b/docs/waves/wave_71_0/design_note.md @@ -0,0 +1,81 @@ +# Wave 71 Design Note: Operational State + +Wave 71 adds an operational layer above plans, session summaries, and +proactive intelligence. That layer exists to keep the Queen coherent across +days while staying legible to the operator. These invariants apply to both +71.0 (substrate) and 71.5 (surface). + +## Invariants + +1. Operational state is not institutional memory. + +- Do not store journals, procedures, or queued actions in `memory_entries`. +- `memory_entries` remain distilled reusable knowledge. +- Operational state is file-backed working memory and audit history. + +2. The live approval system stays event-sourced. + +- Use the existing `ApprovalRequested`, `ApprovalGranted`, and + `ApprovalDenied` path for pending human gates. +- The new action ledger is a durable audit/history layer, not a second + approval authority. + +3. Existing artifacts remain primary sources. + +- Thread plans stay in `.formicos/plans/{thread_id}.md`. +- Session summaries stay in `.formicos/sessions/{thread_id}.md`. +- Project plan stays in `.formicos/project_plan.md`. +- The new operational layer references and synthesizes them; it does not + replace them. + +4. Every autonomous action must leave an audit trail. + +- Proposed, executed, rejected, and self-rejected actions must be queryable. +- If the operator asks "what happened while I was away?", the answer should be + recoverable without replaying internals by hand. + +5. The action queue is generic, not maintenance-specific. + +- `actions.jsonl` is the universal operational inbox for future action kinds: + continuation, knowledge review, workflow-template proposal, + procedure suggestion, and similar items. +- The durable action `kind` is the semantic authority for routing and UI. +- Any legacy approval transport details are implementation detail, not product + semantics. + +## New File-Backed Operational Layer + +Use workspace-scoped operational files: + +- `.formicos/operations/{workspace_id}/queen_journal.md` +- `.formicos/operations/{workspace_id}/operating_procedures.md` +- `.formicos/operations/{workspace_id}/actions.jsonl` + +This keeps operational state separate from global project files while still +letting the operator open and edit the human-facing artifacts directly. + +## Queen Context Contract + +The Queen should read concise operational context on every response: + +- operating procedures +- recent journal tail +- prior session summary +- project plan +- compact continuation/sync summary + +This context must be budgeted explicitly. Hardcoded char caps are acceptable +only as fallback floors, not as the primary policy. + +The operational summary should also expose operator-availability truth now: + +- last operator activity timestamp +- idle duration / active-vs-idle signal +- whether a continuation candidate is ready, blocked, or review-only + +## Wave Boundary + +Wave 71.0 lands the substrate and machine-readable seams. +Wave 71.5 turns them into a dedicated Operations surface. +Wave 72 can then hang knowledge review and workflow-template proposals off the +same action queue instead of inventing another operator workflow. diff --git a/docs/waves/wave_71_0/team_a_prompt.md b/docs/waves/wave_71_0/team_a_prompt.md new file mode 100644 index 0000000..01e3511 --- /dev/null +++ b/docs/waves/wave_71_0/team_a_prompt.md @@ -0,0 +1,189 @@ +# Wave 71.0 - Team A: Operational Memory + +**Theme:** Give the Queen a durable working-memory layer that stays separate +from institutional memory and is readable by both runtime and operator. + +## Context + +Read these first: + +- `docs/waves/wave_71_0/design_note.md` +- `docs/waves/wave_71_0/wave_71_0_plan.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `queen_budget.py` — current 7 slots: `system_prompt` 15%, `memory_retrieval` + 15%, `project_context` 10%, `project_plan` 5%, `thread_context` 15%, + `tool_memory` 10%, `conversation_history` 30%. Fallbacks: 2000/1500/500/400/ + 1500/4000/6000 tokens. `_FRACTIONS` at line 24, `_FALLBACKS` at line 33, + `QueenContextBudget` frozen dataclass at line 45. +- `queen_runtime.py` `respond()` (line 859) injection order: + - memory retrieval (lines 895–929, budget: `memory_retrieval`) + - project context (lines 931–953, budget: `project_context` at line 939) + - project plan (lines 955–980, budget: `project_plan` at line 967) + - session summary (lines 982–1010, **hardcoded `[:4000]` at line 993** — + not budget-backed, this is what you fix) + - thread context (lines 1012–1023) + - briefing (lines 1025–1094) + - deliberation frame (lines 1096–1119, budget: `thread_context`) +- `queen_runtime.py` `emit_session_summary()` (line 764) — writes to + `.formicos/sessions/{thread_id}.md`. This is where to add journal hook. +- `project_plan.py` — existing shared helper pattern to follow: `load_*()`, + `render_for_queen()`, workspace-scoped file paths. +- `routes/api.py` — workspace endpoints at lines 1717–1826. New journal/ + procedures endpoints should go after the forager block (line 1769). + +Current repo truth: + +- No shared helper for operational files exists yet. +- No `.formicos/operations/` directory structure exists yet. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/operational_state.py` - **new** +- `src/formicos/surface/queen_runtime.py` +- `src/formicos/surface/queen_budget.py` +- `src/formicos/surface/routes/api.py` - journal/procedures endpoints only +- `docs/decisions/051-dynamic-context-caps.md` +- `tests/unit/surface/test_operational_state.py` - **new** + +## Do Not Touch + +- `src/formicos/surface/self_maintenance.py` - Team B owns +- `src/formicos/surface/app.py` - Team B owns +- `src/formicos/surface/project_plan.py` - read only +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- frontend files + +## Overlap Coordination + +- Team B will import your helper for journal notes and operational paths. +- Team C will consume your helper for journal/procedure reads and may add a + separate continuity block to `queen_runtime.py`. +- In `routes/api.py`, you only own the journal/procedure endpoints. Team B and + Team C add other `/operations/...` endpoints. + +--- + +## Track 1: Shared Operational-State Helper + +Create `src/formicos/surface/operational_state.py` as the single source of +truth for workspace-scoped operational files: + +- `.formicos/operations/{workspace_id}/queen_journal.md` +- `.formicos/operations/{workspace_id}/operating_procedures.md` + +Required helpers: + +- resolve the workspace ops directory +- load/save operating procedures +- append a journal entry +- read a journal tail for UI/runtime use +- render compact procedures/journal text for Queen injection +- if clean, provide one structured helper for appending a rule under a + markdown heading so future procedure suggestions do not need ad hoc text + surgery + +Rules: + +- journal stays append-only in spirit +- procedures are editable and overwriteable +- keep helpers deterministic and file-backed +- do not route any of this through `memory_entries` + +--- + +## Track 2: Journal + Procedures in Queen Context + +### Requirements + +1. Inject operating procedures into the Queen context as a dedicated system + block when the file exists. Insert after the project plan block (line 980) + and before the session summary block (line 982). Follow the same + `[:budget.operating_procedures * 4]` pattern used by project plan at + line 967. + +2. Inject a compact journal tail into the Queen context as a dedicated system + block when entries exist. Insert immediately after the procedures block. + Cap with `[:budget.queen_journal * 4]`. + +3. Stop using the hardcoded session-summary `[:4000]` cap at line 993. + Replace with `[:budget.thread_context * 4]` to match the existing + budget-backed pattern. + +4. Add a deterministic journal append hook that other teams can reuse. At + minimum, journal entries should be written for: + +- session summary emission +- major Queen response milestones when easy to capture +- operator-facing operational notes from other tracks via helper import + +Do not turn the journal into a verbose transcript dump. It should read like a +working log, not chat history. + +--- + +## Track 3: Budget + Endpoints + +### ADR / budget update + +Update `queen_budget.py` and ADR-051 so the budget explicitly includes: + +- `operating_procedures` +- `queen_journal` + +Recommended split (changes marked): + +- `system_prompt`: 15% (unchanged) +- `memory_retrieval`: 13% (**was 15% — reduced by 2%**) +- `project_context`: 8% (**was 10% — reduced by 2%**) +- `project_plan`: 5% (unchanged) +- `operating_procedures`: 5% (**new**) +- `queen_journal`: 4% (**new**) +- `thread_context`: 13% (**was 15% — reduced by 2%**) +- `tool_memory`: 9% (**was 10% — reduced by 1%**) +- `conversation_history`: 28% (**was 30% — reduced by 2%**) + +This trades 9% across four existing slots for two new slots, keeping +conversation_history as the largest allocation. No single slot loses more +than 2 absolute points. + +Keep `max(fallback, proportional)`. Recommended fallback floors for new +slots: `operating_procedures` 400 tokens, `queen_journal` 300 tokens. +Do not shrink existing fallback floors below current truth. + +### Endpoints + +Add additive endpoints: + +- `GET /api/v1/workspaces/{workspace_id}/queen-journal` +- `GET /api/v1/workspaces/{workspace_id}/operating-procedures` +- `PUT /api/v1/workspaces/{workspace_id}/operating-procedures` + +The journal endpoint can return a compact tail by default plus optional full +text. Keep the shape simple and machine-readable. + +--- + +## Acceptance Gates + +- [ ] `operational_state.py` exists and is the canonical helper +- [ ] operational files are workspace-scoped under `.formicos/operations/` +- [ ] procedures and journal inject into Queen context through explicit budget + slots +- [ ] session-summary injection no longer uses a hardcoded 4000-char cap +- [ ] ADR-051 matches the new budget truth +- [ ] journal/procedures endpoints exist and are stable +- [ ] no new event types +- [ ] no `memory_entries` usage for operational artifacts + +## Validation + +```bash +pytest tests/unit/surface/test_operational_state.py -v +ruff check src/ +pyright src/ +python scripts/lint_imports.py +``` diff --git a/docs/waves/wave_71_0/team_b_prompt.md b/docs/waves/wave_71_0/team_b_prompt.md new file mode 100644 index 0000000..c2c8704 --- /dev/null +++ b/docs/waves/wave_71_0/team_b_prompt.md @@ -0,0 +1,239 @@ +# Wave 71.0 - Team B: Action Loop + +**Theme:** Turn proactive insights and autonomy guardrails into a real, +durable action queue instead of a mix of silent skips and transient logs. + +## Context + +Read these first: + +- `docs/waves/wave_71_0/design_note.md` +- `docs/waves/wave_71_0/wave_71_0_plan.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `self_maintenance.py` — `evaluate_and_dispatch()` at line 281. Blast radius + gate at lines 325–354: if recommendation == "escalate" → skips dispatch; + if level == "medium" and auto_notify → skips dispatch. `suggest` autonomy + level returns empty at line 293. Skipped insights are currently logged but + not persisted. `run_proactive_dispatch()` at line 560. +- `app.py` — maintenance loop at lines 892–920. **Default interval: 86400s + (24 hours)** via `FORMICOS_MAINTENANCE_INTERVAL_S` env var (line 889). + `_maint_dispatcher` is a **local variable** in the lifespan closure + (line 885), **not** on `app.state` or `runtime`. You must wire it to + `app.state.maintenance_dispatcher` so your approve/reject route handlers + can call dispatch machinery. +- `core/events.py` — `ApprovalRequested` (line 397), `ApprovalGranted` + (line 407), `ApprovalDenied` (line 414). `ApprovalType` enum in + `core/types.py` (line 261): `budget_increase`, `cloud_burst`, + `tool_permission`, `expense`. None of these types cover maintenance + actions — see note below. +- `routes/api.py` — workspace endpoints at lines 1717–1826. New + `/operations/...` endpoints should go after the forager block (line 1769) + and before the Knowledge CRUD block (line 1771). +- `runtime.py` — `approve()` at line 864 and `deny()` at line 870 emit + `ApprovalGranted`/`ApprovalDenied` events. + +Current repo truth: + +- There is no action ledger yet. +- The existing approval system handles live pending approvals only — no + durable history, no rejection reasons. +- Suggest-only and skipped proactive work vanishes into logs. + +## Your Files (exclusive ownership) + +- `src/formicos/surface/action_queue.py` - **new** +- `src/formicos/surface/self_maintenance.py` +- `src/formicos/surface/app.py` +- `src/formicos/surface/routes/api.py` - action queue endpoints only +- `tests/unit/surface/test_action_queue.py` - **new** + +## Do Not Touch + +- `src/formicos/surface/operational_state.py` - Team A owns +- `src/formicos/surface/queen_budget.py` - Team A owns +- `src/formicos/surface/queen_runtime.py` - Teams A/C own +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- frontend files + +## Overlap Coordination + +- Reuse Team A's journal helper for action audit notes when useful. +- Team C may queue continuation/sync proposals through your helper. Keep the + queue helper generic enough for both proactive-intelligence and + continuation-type actions. +- In `routes/api.py`, you only own the action-list/approve/reject endpoints. + +--- + +## Track 4: Durable Action Queue Ledger + +Create `src/formicos/surface/action_queue.py` as the canonical durable action +ledger: + +- `.formicos/operations/{workspace_id}/actions.jsonl` + +Each action record should be stable and queryable. Suggested fields: + +- `action_id` +- `created_at` +- `updated_at` +- `created_by` +- `status` (`pending_review`, `approved`, `rejected`, `executed`, + `self_rejected`, `failed`) +- `kind` (`maintenance`, `continuation`, `sync`, etc.) +- `source_category` +- `source_ref` +- `title` +- `detail` +- `rationale` +- `payload` +- `thread_id` when relevant +- `estimated_cost` +- `blast_radius` +- `confidence` +- `requires_approval` +- `approval_request_id` when a live approval exists +- `executed_at` +- `operator_reason` for rejection or manual override notes + +Keep the queue file append-friendly and easy to rewrite safely if status +updates need it. + +Design rule: + +- `kind` is the semantic authority for routing and UI. +- Do not make future surfaces infer semantics from `ApprovalType`. +- `payload` should be generic enough that Wave 72 can add knowledge-review, + workflow-template, and procedure-suggestion items without replacing the + ledger shape. + +### Size management + +Over weeks of autonomous operation, `actions.jsonl` will grow unbounded. Add +a `compact_action_log()` helper: when the file exceeds 1000 lines, archive +older entries to `actions.{date}.jsonl.gz` and keep only the last 500 entries +in the active file. Call it at the start of each operational sweep. This is +a one-time helper, not ongoing complexity — without it, file reads for the +queue listing will degrade over weeks. + +--- + +## Track 5: Approval-Backed Operator Review + +Do **not** invent a second approval mechanism. + +The action queue is the durable audit/history layer. The existing approval +event path remains the live gating mechanism. Not every queued action needs +an approval event — only actions that require operator sign-off before +execution. For those, use `ApprovalType.expense` (the closest existing fit +for "spend budget on autonomous work"). Do not add new values to the +`ApprovalType` enum in `core/types.py` — that is a Core layer type and +requires operator approval to extend. + +Important: + +- the UI and API should surface queue `kind`, not `ApprovalType`, as the user- + facing action meaning +- `ApprovalType` here is transport compatibility only + +Use the existing approval event path for live pending approvals, but back it +with the new action ledger and richer endpoints: + +- `GET /api/v1/workspaces/{workspace_id}/operations/actions` +- `POST /api/v1/workspaces/{workspace_id}/operations/actions/{action_id}/approve` +- `POST /api/v1/workspaces/{workspace_id}/operations/actions/{action_id}/reject` + +Requirements: + +1. Approve/reject endpoints update the durable action ledger. +2. If an action created a live approval request, approval resolution must emit + the existing `ApprovalGranted` / `ApprovalDenied` path. +3. Reject endpoint accepts an optional reason and stores it in the ledger. +4. Approve endpoint dispatches the queued work through the existing runtime / + maintenance machinery, not by duplicating colony-spawn logic in the route. + Access the dispatcher via `request.app.state.maintenance_dispatcher` — + you must wire this in `app.py` (see key seams above). + +The queue endpoint should support basic filtering by status. + +Frontload these now: + +- `status` filter +- `kind` filter +- `limit` +- aggregate counts by status and kind in the response envelope + +--- + +## Track 6: 30-Minute Operational Sweep + +The existing maintenance loop runs consolidation services + proactive dispatch +on a 24-hour default cadence (lines 892–920). The operational queue needs a +faster cadence for queue processing and status checks without disrupting +consolidation. + +### Implementation + +Add a **second** `asyncio.create_task` in `app.py` alongside the existing +`_maintenance_loop`. Call it `_operational_sweep_loop`. Default interval: +1800 seconds via `FORMICOS_OPS_SWEEP_INTERVAL_S` env var. Do not change +the existing 24-hour consolidation cadence. + +Requirements: + +1. The sweep loop checks the action queue for pending work and processes + approved actions through the maintenance dispatcher. + Keep the existing longer maintenance cadence for consolidation services. + +2. Change maintenance behavior so proactive insights do not simply disappear: + +- low-risk autonomous work may still execute immediately when policy allows +- medium/high-risk work should become queued actions +- suggest-only work should become queued actions +- explicitly skipped work should be recorded as `self_rejected` with reason + +3. Queue records should carry the originating briefing signal, blast-radius + estimate, and estimated cost. + +4. Where helpful, append concise journal notes through Team A's helper. + +The goal is a real detect -> queue -> review/execute -> audit loop. + +## Tests + +Create `tests/unit/surface/test_action_queue.py` with at least: + +1. queue appends and reads action records correctly +2. status transitions (pending → approved → executed, pending → rejected) +3. approve/reject endpoints update the ledger and return structured JSON +4. `compact_action_log()` archives old entries when threshold exceeded +5. **end-to-end operational loop**: proactive insight with medium blast radius + → queued as `pending_review` → approved via endpoint → dispatched through + maintenance machinery → journal note written. This is the "does the + operational loop actually loop?" test. +6. self-rejected actions are recorded with reason when suggest-only or + blast-radius gates block dispatch + +## Acceptance Gates + +- [ ] `action_queue.py` exists as the canonical action ledger +- [ ] queue uses existing approval events for live gating +- [ ] action history survives restart because it is file-backed +- [ ] operator can approve/reject with a reason through endpoints +- [ ] operational sweeps run on a 30-minute default cadence +- [ ] JSONL size management prevents unbounded growth +- [ ] proactive work is queued or logged, not silently dropped +- [ ] no new event types + +## Validation + +```bash +pytest tests/unit/surface/test_action_queue.py -v +ruff check src/ +pyright src/ +python scripts/lint_imports.py +``` diff --git a/docs/waves/wave_71_0/team_c_prompt.md b/docs/waves/wave_71_0/team_c_prompt.md new file mode 100644 index 0000000..2215613 --- /dev/null +++ b/docs/waves/wave_71_0/team_c_prompt.md @@ -0,0 +1,175 @@ +# Wave 71.0 - Team C: Coherence Coordinator + +**Theme:** Synthesize project plan, thread plans, session summaries, recent +outcomes, and queued actions into one compact operational model that both the +Queen and the future UI can read. + +## Context + +Read these first: + +- `docs/waves/wave_71_0/design_note.md` +- `docs/waves/wave_71_0/wave_71_0_plan.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `project_plan.py` — existing shared helper pattern. `load_project_plan()` + returns `{"exists": bool, "goal": str, "milestones": [...]}`. + `render_for_queen()` returns compact text. Follow this pattern for thread + plans. +- `queen_tools.py` — `_propose_plan()` at line 3205 writes thread plans to + `.formicos/plans/{thread_id}.md`. `_STEP_RE` regex at line 3390: + `r"^- \[(\d+)\] \[(\w+)\] (.*)$"`. This is the format you must parse. +- `queen_runtime.py` — `emit_session_summary()` at line 764 writes session + summaries to `.formicos/sessions/{thread_id}.md`. Session file contains: + plan excerpt (`[:1000]`), colony activity, step status, recent Queen + messages. `_build_thread_context()` at line 1736 reads thread plans with + a hardcoded `[:2000]` cap at line 1824. +- `queen_runtime.py` `respond()` injection order — your continuity-summary + block goes after briefing (line 1094) and before deliberation frame + (line 1096). Team A adds procedures/journal blocks between lines 980–982. +- Team A's post-rebalance budget: `thread_context` will be 13% (was 15%). + Your continuity block should use `[:budget.thread_context * 2]` (half the + thread-context allocation, ~2600 chars on a 32K model) as a conservative + cap. This is intentionally smaller than a full slot — if the summary needs + more space, it belongs in a dedicated slot in a future wave. + +Current repo truth: + +- No shared thread-plan parser/helper exists yet. +- No endpoint answers "what should we continue next?" + +## Your Files (exclusive ownership) + +- `src/formicos/surface/thread_plan.py` - **new** +- `src/formicos/surface/operations_coordinator.py` - **new** +- `src/formicos/surface/queen_runtime.py` - continuity-summary block only +- `src/formicos/surface/routes/api.py` - operations summary endpoint only +- `tests/unit/surface/test_operations_coordinator.py` - **new** + +## Do Not Touch + +- `src/formicos/surface/operational_state.py` - Team A owns +- `src/formicos/surface/action_queue.py` - Team B owns +- `src/formicos/surface/self_maintenance.py` - Team B owns +- `src/formicos/surface/queen_budget.py` - Team A owns +- `src/formicos/surface/projections.py` +- `src/formicos/core/events.py` +- frontend files + +## Overlap Coordination + +- You may read Team A and Team B helpers, but do not take ownership of them. +- If you decide a continuation/sync issue should become a queued action, route + it through Team B's action-queue helper instead of emitting direct Queen + messages or direct autonomous dispatch. +- In `queen_runtime.py`, you only own a compact continuity-summary block. + +--- + +## Track 7: Shared Thread-Plan Helper + +Create `src/formicos/surface/thread_plan.py` as the canonical helper for +reading `.formicos/plans/{thread_id}.md`. + +Keep it simple: + +- resolve thread-plan path +- parse a step list and coarse statuses +- expose compact summary data + +Do not try to replace the plan-file format. This helper exists so the +coordinator and API route stop doing ad hoc markdown scraping. + +--- + +## Track 8: Operations Coordinator + +Create `src/formicos/surface/operations_coordinator.py`. + +It should inspect: + +- project plan +- thread plans +- session summaries +- recent colony outcomes +- queued actions summary + +From those, derive: + +- `continuation_candidates` +- `sync_issues` +- `recent_progress` +- compact counts for pending review / stalled work / active milestones +- operator-availability signals (`last_operator_activity_at`, + `idle_for_minutes`, `operator_active`) + +Examples of useful findings: + +- a milestone is still pending but its thread plan is fully complete +- a thread has pending steps, no active colony, and recent successful context +- a thread had failures last session and probably needs operator review +- there is a backlog of queued actions but no clear active milestone owner + +This coordinator is a synthesis layer, not a second source of truth. + +--- + +## Track 9: Summary Endpoint + Queen Cue + +Add: + +- `GET /api/v1/workspaces/{workspace_id}/operations/summary` + +Suggested shape: + +```json +{ + "workspace_id": "ws_123", + "pending_review_count": 2, + "active_milestone_count": 1, + "last_operator_activity_at": "2026-03-26T18:10:00Z", + "idle_for_minutes": 47, + "operator_active": false, + "continuation_candidates": [], + "sync_issues": [], + "recent_progress": [] +} +``` + +Also add a compact `# Operational Loop Summary` system block in +`queen_runtime.py` using the coordinator output. + +Rules: + +- keep it short and operational +- do not dump full plans or full journal text here +- cap it with `[:budget.thread_context * 2]` (half the thread-context + allocation, conservative by design — see key seams above) + +Shape the continuation candidates for future automation now. Each candidate +should already carry a concise `blocked_reason` or `ready_for_autonomy` style +signal so Wave 72 does not need a contract rewrite just to distinguish +"interesting" from "actually executable." + +If it is cheap and clean, queue continuation/sync proposals through Team B's +helper. Do not directly auto-dispatch them in this packet. + +## Acceptance Gates + +- [ ] `thread_plan.py` exists as the canonical thread-plan helper +- [ ] `operations_coordinator.py` synthesizes real artifacts +- [ ] `GET /api/v1/workspaces/{workspace_id}/operations/summary` exists +- [ ] Queen gets a compact operational continuity cue +- [ ] no duplicate source of truth for plans or approvals +- [ ] no new event types + +## Validation + +```bash +pytest tests/unit/surface/test_operations_coordinator.py -v +ruff check src/ +pyright src/ +python scripts/lint_imports.py +``` diff --git a/docs/waves/wave_71_0/wave_71_0_plan.md b/docs/waves/wave_71_0/wave_71_0_plan.md new file mode 100644 index 0000000..103a37f --- /dev/null +++ b/docs/waves/wave_71_0/wave_71_0_plan.md @@ -0,0 +1,146 @@ +# Wave 71.0: Operational Coherence Substrate + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 70.5 +**Theme:** Give the Queen durable working memory, a real asynchronous action +loop, and a coherent cross-artifact model of what is in flight. + +## Packet Authority + +Use these docs: + +- `docs/waves/wave_71_0/design_note.md` +- `docs/waves/wave_71_0/team_a_prompt.md` +- `docs/waves/wave_71_0/team_b_prompt.md` +- `docs/waves/wave_71_0/team_c_prompt.md` + +## Locked Boundaries + +- No new event types. +- Do not add new projection fields unless an existing surface is impossible + without one. +- Keep operational state out of `memory_entries`. +- Reuse the existing approval event path for live gating. +- Land stable backend seams so `71.5` is mostly frontend work. + +## Scope + +| Track | Outcome | Team | +|------|---------|------| +| 1 | Shared operational-state helper + file layout | A | +| 2 | Queen journal + operating procedures + budgeted injection | A | +| 3 | Budget rebalance (7 → 9 slots) + journal/procedures endpoints | A | +| 4 | Durable action queue ledger | B | +| 5 | Approval-backed operator review endpoints | B | +| 6 | 30-minute operational sweeps on top of maintenance loop | B | +| 7 | Shared thread-plan helper | C | +| 8 | Operations coordinator (cross-artifact synthesis) | C | +| 9 | Operations summary endpoint + compact Queen continuity cue | C | + +## Team Missions + +### Team A - Operational Memory + +Own the new operational artifacts: + +- journal +- operating procedures +- helper functions and endpoints for reading/editing them +- Queen context injection for multi-day continuity + +### Team B - Action Loop + +Own the asynchronous action cycle: + +- queue proposed actions durably through a generic typed action envelope +- route approvals through the existing governance path +- turn periodic proactive sweeps into queued/executed/rejected work + +### Team C - Coherence Coordinator + +Own the synthesis layer: + +- inspect project plan, thread plans, session summaries, outcomes, and queue +- identify continuation candidates and sync issues +- give both the Queen and `71.5` a compact operational summary, including + operator-idle truth + +## Merge Order + +Recommended merge order: + +1. Team A +2. Team B +3. Team C + +Why: + +- Team A defines the new operational file layer and context budget seams. +- Team B builds on that layer for the durable action queue. +- Team C synthesizes across the new queue plus the existing plan/session + artifacts. + +## Shared Seams + +- `src/formicos/surface/routes/api.py` is shared by all three teams: + additive endpoint sections only. Workspace endpoints live at lines + 1717–1826. New `/operations/...` endpoints should go after the forager + block (line 1769) and before the Knowledge CRUD block (line 1771). +- `src/formicos/surface/queen_runtime.py` is shared by Teams A and C. + `respond()` starts at line 859. Current injection order in `respond()`: + memory retrieval (895–929), project context (931–953), project plan + (955–980), session summary (982–1010, **hardcoded `[:4000]` at line 993**), + thread context (1012–1023), briefing (1025–1094), deliberation frame + (1096–1119). Team A owns procedures/journal injection (insert after + project plan, before session summary — lines 980–982). Team C owns + a compact continuity-summary block (insert after briefing, before + deliberation — lines 1094–1096). Team A also fixes session summary to use + budget instead of the hardcoded cap. +- `src/formicos/surface/app.py` is Team B only in this packet. + `_maint_dispatcher` is currently a local variable in the lifespan closure + (line 885), **not** on `app.state` or `runtime`. Team B must wire it to + `app.state.maintenance_dispatcher` so approve/reject routes can dispatch. + Maintenance loop is at lines 892–920 with a 24-hour default interval + (line 889: `FORMICOS_MAINTENANCE_INTERVAL_S`). +- `src/formicos/surface/queen_budget.py` and `docs/decisions/051-dynamic-context-caps.md` + are Team A only in this packet. Current 7 slots and fractions at lines + 24–32; `QueenContextBudget` frozen dataclass at line 45. + +## Out Of Scope + +- knowledge review queue +- workflow template extraction +- full autonomous "resume work without asking" behavior +- new nav or polished UI surface + +Those are natural follow-ons once this packet lands. + +## Acceptance Focus + +- operational artifacts have one canonical file layout +- procedures and journal are injected into Queen context explicitly +- session-summary injection no longer relies on a hardcoded 4000-char cap +- medium/high-risk proactive work enters a durable queue instead of vanishing + into logs +- action queue is generic enough for future action kinds without schema churn +- operator approval/rejection can carry a reason and survive restarts +- continuation candidates come from real artifacts, not prompt-only heuristics +- operations summary exposes last-operator-activity / idle signals for later + continuation work +- `71.5` can render the operational loop from endpoints instead of scraping + markdown or runtime internals + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 71.0 succeeds if the Queen can answer, deterministically and durably: + +- what we were doing +- what is waiting on the operator +- what I should do next +- what standing procedures I should follow diff --git a/docs/waves/wave_71_5/team_a_prompt.md b/docs/waves/wave_71_5/team_a_prompt.md new file mode 100644 index 0000000..352ad68 --- /dev/null +++ b/docs/waves/wave_71_5/team_a_prompt.md @@ -0,0 +1,106 @@ +# Wave 71.5 - Team A: Operations Shell + +**Theme:** Give the operational loop a first-class home in the product +surface. + +## Context + +Read these first: + +- `docs/waves/wave_71_5/wave_71_5_plan.md` +- `docs/waves/wave_71_0/design_note.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `formicos-app.ts` — `ViewId` type union at line 27. `NAV` array at lines + 29–37 (7 entries). Nav grid at line 62: `repeat(7, minmax(72px, auto))` — + must become `repeat(8, ...)`. Responsive breakpoint at line 239: + `repeat(5, ...)` — verify 8 tabs fit or adjust. `navTab()` at line 322. + View routing switch in the render method. +- Void Protocol tokens and `sharedStyles` in `../styles/shared.js` — follow + existing card/layout patterns from `settings-view.ts`. + +Assume `71.0` landed: + +- `GET /api/v1/workspaces/{workspace_id}/operations/summary` +- `GET /api/v1/workspaces/{workspace_id}/operations/actions` +- `GET /api/v1/workspaces/{workspace_id}/queen-journal` +- `GET /api/v1/workspaces/{workspace_id}/operating-procedures` +- `PUT /api/v1/workspaces/{workspace_id}/operating-procedures` + +## Your Files (exclusive ownership) + +- `frontend/src/components/operations-view.ts` - **new** +- `frontend/src/components/formicos-app.ts` +- `frontend/src/components/system-overview.ts` only if a tiny badge/summary + tweak is needed + +## Do Not Touch + +- `frontend/src/components/operations-inbox.ts` - Team B owns +- `frontend/src/components/queen-journal-panel.ts` - Team C owns +- `frontend/src/components/operating-procedures-editor.ts` - Team C owns +- `frontend/src/components/operations-summary-card.ts` - Team C owns +- backend files unless a tiny presentational payload shim is unavoidable + +## Overlap Coordination + +- You are the only owner of `operations-view.ts`. +- Team B and Team C deliver leaf components; mount them, do not rewrite them. + +--- + +## Track 1: Operations Tab + Nav Integration + +Add a new top-level `Operations` tab to the app shell. + +Requirements: + +- add the tab in `formicos-app.ts` +- mount `fc-operations-view` +- keep the current nav legible; do not create a second side rail +- if useful, surface a pending-review badge on the Operations tab using the + summary endpoint + +This is the right moment to add a new surface. Do not bury the operational +loop inside Settings. + +--- + +## Track 2: Operations View Shell + +Create `fc-operations-view` as the integrator for Wave 71.5. + +Suggested layout: + +- top summary/header row +- left column: Team B action inbox +- right column: Team C summary card, journal panel, procedures editor + +Requirements: + +- fetch and pass the active workspace ID cleanly +- use the existing design system and current visual language +- give the page strong empty states: + - no pending actions + - no journal yet + - no procedures written yet + +Do not parse backend markdown or derive your own operational summary in the +browser. Use the `71.0` seams. + +## Acceptance Gates + +- [ ] `Operations` appears as a top-level nav item +- [ ] `fc-operations-view` exists and is the only shell owner +- [ ] Team B and Team C components mount cleanly inside it +- [ ] a pending-review badge is shown if clean and cheap +- [ ] no duplicate queue UI scattered across multiple tabs + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_71_5/team_b_prompt.md b/docs/waves/wave_71_5/team_b_prompt.md new file mode 100644 index 0000000..9ada5bc --- /dev/null +++ b/docs/waves/wave_71_5/team_b_prompt.md @@ -0,0 +1,109 @@ +# Wave 71.5 - Team B: Action Review + +**Theme:** Turn the backend action queue into a real operator inbox with +review, reasoning, and continuation control. + +## Context + +Read these first: + +- `docs/waves/wave_71_5/wave_71_5_plan.md` +- `docs/waves/wave_71_0/design_note.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `proposal-card.ts` — existing blast-radius rendering pattern (score, level + pill, recommendation pill, factors list). Follow this visual language for + action items that carry blast-radius metadata. +- `approval-queue.ts` — existing live-pending widget. Small and + overview-friendly. The new inbox is the richer replacement for the + Operations tab; the approval queue stays as-is for the Queen overview. +- Wave 72 is expected to add new action kinds. Build this inbox so new kinds + can slot in without changing the overall component shape. +- Void Protocol tokens in `../styles/shared.js` — use existing design system. + +## Your Files (exclusive ownership) + +- `frontend/src/components/operations-inbox.ts` - **new** +- `frontend/src/components/approval-queue.ts` only if a tiny compact refresh is + useful for consistency + +## Do Not Touch + +- `frontend/src/components/operations-view.ts` - Team A owns +- Team C operational-memory components +- backend files unless a tiny payload shim is unavoidable + +## Overlap Coordination + +- Build a self-contained leaf component. Team A will mount it. +- If you touch `approval-queue.ts`, keep it compact and overview-friendly. + The rich workflow belongs in `operations-inbox.ts`. + +--- + +## Track 2: Operations Inbox + +Create `fc-operations-inbox`. + +It should fetch: + +- `GET /api/v1/workspaces/{workspace_id}/operations/actions` + +And render sections such as: + +- Pending Review +- Recent Automatic Actions +- Deferred / Self-Rejected +- Continuation Suggestions + +Do not hardcode the inbox around continuation-only semantics. The component +should fundamentally render by action `status` and `kind`, with +continuation-specific decoration as just one variant. + +Each item should show: + +- title +- rationale +- action kind +- source category +- blast radius +- estimated cost +- confidence +- thread / milestone context when present + +--- + +## Approve / Reject Workflow + +Use the new action endpoints: + +- `POST .../approve` +- `POST .../reject` + +Requirements: + +- approving is one click +- rejecting captures an optional reason in the UI +- reason entry should be lightweight, not a heavy modal workflow +- do not recompute blast radius or autonomy in the browser +- do not use legacy `ApprovalType` labels as the operator-facing action meaning + +If an item is already executed or rejected, render it as history, not as an +actionable card. + +## Acceptance Gates + +- [ ] `fc-operations-inbox` exists and is self-contained +- [ ] pending, recent, and deferred states render clearly +- [ ] rejection reason can be entered and submitted +- [ ] continuation items are visible in the same inbox, not hidden elsewhere +- [ ] no browser-side recomputation of queue semantics + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_71_5/team_c_prompt.md b/docs/waves/wave_71_5/team_c_prompt.md new file mode 100644 index 0000000..e2f9c40 --- /dev/null +++ b/docs/waves/wave_71_5/team_c_prompt.md @@ -0,0 +1,113 @@ +# Wave 71.5 - Team C: Operational Memory Surfaces + +**Theme:** Make the Queen's working memory and standing procedures visible and +editable without making the operator open raw files unless they want to. + +## Context + +Read these first: + +- `docs/waves/wave_71_5/wave_71_5_plan.md` +- `docs/waves/wave_71_0/design_note.md` +- `CLAUDE.md` + +### Key seams to read before coding + +- `settings-view.ts` — existing card pattern (`.settings-card` class, read-only + labels, inline editing). Follow this for the procedures editor save UX. +- `autonomy-card.ts` — existing self-contained component pattern with fetch + + render + empty state. Follow this for all three components. +- Void Protocol tokens in `../styles/shared.js` — use existing design system. + +Assume `71.0` landed: + +- `GET /api/v1/workspaces/{workspace_id}/queen-journal` +- `GET /api/v1/workspaces/{workspace_id}/operating-procedures` +- `PUT /api/v1/workspaces/{workspace_id}/operating-procedures` +- `GET /api/v1/workspaces/{workspace_id}/operations/summary` + +## Your Files (exclusive ownership) + +- `frontend/src/components/queen-journal-panel.ts` - **new** +- `frontend/src/components/operating-procedures-editor.ts` - **new** +- `frontend/src/components/operations-summary-card.ts` - **new** + +## Do Not Touch + +- `frontend/src/components/operations-view.ts` - Team A owns +- `frontend/src/components/operations-inbox.ts` - Team B owns +- backend files unless a tiny payload shim is unavoidable + +## Overlap Coordination + +- Build self-contained leaf components that Team A can mount directly. +- Keep the operational summary card concise; the inbox already handles action + detail. + +--- + +## Track 3A: Journal Panel + +Create `fc-queen-journal-panel`. + +Requirements: + +- fetch the Queen journal from the `71.0` endpoint +- default to recent entries, with a simple "load more" or refresh affordance +- present entries as an operational log, not as chat bubbles +- include good empty-state copy when no journal exists yet + +This panel is for "what happened while I was away?" + +--- + +## Track 3B: Operating Procedures Editor + +Create `fc-operating-procedures-editor`. + +Requirements: + +- fetch current procedures +- allow direct text editing +- save via `PUT /api/v1/workspaces/{workspace_id}/operating-procedures` +- show subtle save state and failure state +- include a helpful empty template for first-time users + +This editor is the standing-policy surface for autonomy. Treat it as important. + +--- + +## Track 3C: Operations Summary Card + +Create `fc-operations-summary-card`. + +Use the operations summary endpoint to show compact, high-signal state: + +- pending review count +- active milestone count +- operator idle / active state +- top continuation candidate +- top sync issue, if any +- recent progress snippet + +If the endpoint exposes counts by action kind, show them compactly. This helps +Wave 72 add knowledge-review and procedure-suggestion actions without +redesigning the summary card. + +Do not duplicate the full inbox or the full journal here. This is the +at-a-glance orientation card. + +## Acceptance Gates + +- [ ] journal panel exists and reads from the journal endpoint +- [ ] procedures editor exists and writes through the procedures endpoint +- [ ] summary card exists and reads from the operations summary endpoint +- [ ] all three components have strong empty states +- [ ] no raw markdown parsing in the browser + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +``` diff --git a/docs/waves/wave_71_5/wave_71_5_plan.md b/docs/waves/wave_71_5/wave_71_5_plan.md new file mode 100644 index 0000000..7a4e6c8 --- /dev/null +++ b/docs/waves/wave_71_5/wave_71_5_plan.md @@ -0,0 +1,106 @@ +# Wave 71.5: Mission Control Surface + +**Status:** Dispatch-ready packet +**Predecessor:** Wave 71.0 +**Theme:** Turn the operational loop into a dedicated operator surface instead +of scattering it across Settings, Overview, and chat. + +## Packet Authority + +Use these docs: + +- `docs/waves/wave_71_0/design_note.md` — invariants that apply to both halves +- `docs/waves/wave_71_5/team_a_prompt.md` +- `docs/waves/wave_71_5/team_b_prompt.md` +- `docs/waves/wave_71_5/team_c_prompt.md` + +## Locked Boundaries + +- One new top-level surface: `Operations`. +- One owner for `operations-view.ts`: Team A. +- Use `71.0` endpoints and files; do not re-derive backend state in the browser. +- Keep existing Queen overview and Settings concise; the new operational loop + should live primarily in the Operations tab. + +## Scope + +| Track | Outcome | Team | +|------|---------|------| +| 1 | Operations tab shell + nav integration | A | +| 2 | Action inbox/history + continuation review | B | +| 3 | Journal panel + procedures editor + ops summary card | C | + +## Team Missions + +### Team A - Operations Shell + +Own the new tab and layout. Mount Team B and Team C leaf components. Do not +rebuild their internals. + +### Team B - Action Review + +Own the operator inbox: + +- pending actions +- recent automatic actions +- continuation proposals +- future action kinds without an inbox redesign +- approve/reject workflow with reasons + +### Team C - Operational Memory Surfaces + +Own the human-readable operational surfaces: + +- journal panel +- procedures editor +- compact operational summary card + +## Merge Order + +Recommended merge order: + +1. Team B +2. Team C +3. Team A + +Why: + +- Team B and Team C create the leaf components. +- Team A integrates them in one Operations surface and nav path. + +## Known Housekeeping + +- `formicos-app.ts` line 62: `grid-template-columns: repeat(7, ...)` — must + become `repeat(8, ...)` for the new Operations tab. +- `formicos-app.ts` line 239: responsive breakpoint `repeat(5, ...)` — verify + 8 tabs still fit or adjust breakpoint. +- `ViewId` type union (line 27) needs `'operations'` added. +- `NAV` array (lines 29–37) needs the Operations entry. + +## Acceptance Focus + +- one clear home for the operational loop +- pending actions are reviewable without scanning chat +- inbox semantics come from action `kind` and `status`, not legacy approval + labels +- rejection reasons are captured in the UI +- operator can inspect/edit procedures directly +- operator can see what happened while away via journal + summary +- browser does not parse markdown plans or runtime internals directly + +## Validation + +```bash +npm run build +npm run lint # if lint config exists +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` + +## Success Condition + +Wave 71.5 succeeds if the operator can open one tab and immediately answer: + +- what is waiting on me +- what happened while I was away +- what the Queen plans to continue next +- what standing procedures are currently governing autonomous behavior diff --git a/docs/waves/wave_72/design_note.md b/docs/waves/wave_72/design_note.md new file mode 100644 index 0000000..7219926 --- /dev/null +++ b/docs/waves/wave_72/design_note.md @@ -0,0 +1,52 @@ +# Wave 72 Design Note + +Four invariants govern this wave. + +## 1. New action kinds, not new mechanisms or new schedulers + +Knowledge review items, continuation proposals, workflow template proposals, +and procedure suggestions all flow through the existing action queue +(`action_queue.py`). The inbox (`operations-inbox.ts`) already renders by +`kind` via `_kindClass()`. + +Do not create a second queue, a second inbox, or a second background loop for +these features. `app.py` should remain the single scheduler seam for the +30-minute operational cadence. New work plugs into that loop as pure helper +functions. + +The existing status machine (`pending_review` / `approved` / `rejected` / +`executed` / `self_rejected` / `failed`) applies to every new kind unchanged. + +## 2. Knowledge review is NOT automatic correction + +The system surfaces entries for review. The operator decides. The system +never autonomously invalidates, edits, or deletes knowledge entries. Even +at `autonomous` level, knowledge mutations require human judgment. + +This is the "human-in-the-loop for what the system believes" invariant. +Autonomy applies to work continuation and maintenance dispatch. It does +not apply to what the system considers true. + +## 3. Prefer existing product seams over inventing parallel UI flows + +Wave 72 should reuse and expose real surfaces that already exist: + +- use the existing workspace ingest backend instead of inventing a second + "docs upload" pipeline +- use the existing approve/reject action queue contract instead of a new + review inbox contract +- use the real maintenance-policy / workspace-config path instead of fake + settings saves + +If an existing flow is hidden or unreachable, surface it in the active UI. +Do not build a duplicate. + +## 4. Polish items are blockers, not nice-to-haves + +The trigger wiring bug (both `docs-index` and `codebase-index` addon.yaml +manifests point manual triggers at `indexer.py::incremental_reindex` instead +of `search.py::handle_reindex`), the model-selection filtering gap, the lack +of visible document ingest on the active knowledge surface, and the Settings +structural inversion all make the product feel broken to a new user. + +They ship in this wave. diff --git a/docs/waves/wave_72/team_a_prompt.md b/docs/waves/wave_72/team_a_prompt.md new file mode 100644 index 0000000..8214794 --- /dev/null +++ b/docs/waves/wave_72/team_a_prompt.md @@ -0,0 +1,279 @@ +# Wave 72 - Team A: Knowledge Governance + +Theme: surface knowledge quality problems in the Operations inbox, and make +document ingest visible on the active Knowledge surface. + +## Read First + +- `docs/waves/wave_72/wave_72_plan.md` +- `docs/waves/wave_72/design_note.md` +- `docs/waves/wave_72_polish_reference.md` +- `CLAUDE.md` + +## Repo Truth You Must Start From + +- The active Knowledge tab is `frontend/src/components/knowledge-browser.ts`. +- There is already a working upload/ingest flow in + `frontend/src/components/knowledge-view.ts`, but `formicos-app.ts` does not + render that component. +- The existing workspace ingest backend already exists in + `src/formicos/surface/routes/colony_io.py`: + - `POST /api/v1/workspaces/{workspace_id}/ingest` + - `GET /api/v1/workspaces/{workspace_id}/files` +- Manual knowledge entry already exists in `knowledge-browser.ts`. +- Both addon trigger bugs are real: + - `addons/docs-index/addon.yaml` manual trigger points at + `indexer.py::incremental_reindex` + - `addons/codebase-index/addon.yaml` manual trigger points at + `indexer.py::incremental_reindex` + - both should point to `search.py::handle_reindex` +- `knowledge_entry_usage` is a separate dict in `projections.py`; it is not + embedded on the memory entry objects themselves. +- Team B owns `app.py` and the operational sweep call order. + +## Key Seams To Read Before Coding + +- `src/formicos/surface/app.py` + Read `_operational_sweep_loop()`. Team B owns the scheduler, but your + scanner runs from that loop. +- `src/formicos/surface/self_maintenance.py` + Read `run_proactive_dispatch()`, `_queue_insight()`, and the blast-radius / + autonomy helpers so your scan output matches the existing action style. +- `src/formicos/surface/action_queue.py` + Read `create_action()`, `append_action()`, `read_actions()`, + `update_action()`, and `list_actions()`. +- `src/formicos/surface/projections.py` + Read `memory_entries`, `knowledge_entry_usage`, and `colony_outcomes`. +- `src/formicos/surface/routes/api.py` + Read the existing action endpoints and knowledge CRUD endpoints. +- `src/formicos/surface/routes/knowledge_api.py` + Read: + - `POST /api/v1/knowledge/{entry_id}/feedback` + - operator action endpoint for `invalidate` / `reinstate` +- `src/formicos/surface/routes/colony_io.py` + Read `ingest_workspace_file()` and `upload_workspace_files()`. +- `frontend/src/components/operations-inbox.ts` + Read `_kindClass()` and the current approve/reject flow. +- `frontend/src/components/knowledge-browser.ts` + Read the active Knowledge tab rendering. +- `frontend/src/components/knowledge-view.ts` + Read the existing `Upload & Ingest` UI and reuse it. +- `addons/docs-index/addon.yaml` +- `addons/codebase-index/addon.yaml` +- `src/formicos/addons/docs_index/search.py` +- `src/formicos/addons/codebase_index/search.py` + +## Your Files + +- `src/formicos/surface/knowledge_review.py` - new +- `src/formicos/surface/routes/api.py` - additive review endpoint only +- `frontend/src/components/operations-inbox.ts` - add `knowledge_review` card +- `frontend/src/components/knowledge-health-card.ts` - new +- `frontend/src/components/knowledge-browser.ts` - visible ingest/reindex flow +- `addons/docs-index/addon.yaml` - trigger fix +- `addons/codebase-index/addon.yaml` - trigger fix +- `tests/unit/surface/test_knowledge_review.py` - new + +Read but do not own: + +- `frontend/src/components/knowledge-view.ts` +- `src/formicos/surface/routes/colony_io.py` +- `src/formicos/surface/app.py` + +## Do Not Touch + +- `src/formicos/surface/knowledge_catalog.py` +- `src/formicos/surface/projections.py` +- `src/formicos/surface/queen_runtime.py` +- `frontend/src/components/settings-view.ts` +- `frontend/src/components/formicos-app.ts` +- `frontend/src/components/caste-editor.ts` + +## Overlap Rules + +- Team B owns the scheduler in `app.py`. You provide pure helper functions. +- Team C adds different inbox kinds in `operations-inbox.ts`. Coordinate, but + do not reopen each other's rendering logic. +- Team C owns Settings / top-nav polish. Keep your UI work inside the active + Knowledge tab and the inbox. + +## Track 1: Knowledge Review Scanner + +Create `src/formicos/surface/knowledge_review.py` with a pure scan function: + +```python +async def scan_knowledge_for_review( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + *, + briefing_insights: list[dict[str, object]] | None = None, +) -> int: + """Queue review actions for entries that need human attention.""" +``` + +Requirements: + +- queue `kind="knowledge_review"` actions only +- use existing action queue helpers +- dedupe against existing pending review actions for the same `entry_id` +- do not mutate knowledge directly + +Review criteria: + +1. Outcome-correlated failures +- entry accessed by at least 3 colonies +- more than 50% of those colonies failed + +2. Contradictions +- reuse existing contradiction insight logic +- do not generate a second full briefing inside the same sweep cycle +- preferred seam: Team B passes the already-generated briefing insights into + your scanner + +3. Stale authority +- high-confidence entry +- old `last_accessed` +- not `decay_class="permanent"` + +4. Unconfirmed machine-generated entries +- influential entries with no operator-confirmed provenance signal + +Payload should include enough detail for the inbox to explain why the entry +was flagged: + +- `entry_id` +- `title` +- `content_preview` +- `review_reason` +- `confidence` +- `access_count` +- failure stats when applicable + +Tests: + +1. failure-correlated entry queues review action +2. contradiction insight becomes review action +3. stale authority queues review action +4. unconfirmed machine-generated entry queues review action +5. permanent entries are excluded from stale review +6. dedupe skips an existing pending review for the same entry + +## Track 2: Review Processing + +Add a dedicated endpoint: + +`POST /api/v1/workspaces/{workspace_id}/operations/actions/{action_id}/review` + +Body: + +```json +{ "decision": "confirm" | "invalidate", "reason": "..." } +``` + +Processing rules: + +- `confirm` + Reuse the same replay-safe confidence path as + `POST /api/v1/knowledge/{entry_id}/feedback` with positive operator feedback, + then mark the action executed. +- `invalidate` + Reuse the existing operator overlay invalidation path from + `knowledge_api.py`, then mark the action executed. +- `edit` + stays on the existing `PUT /api/v1/knowledge/{entry_id}` path from the + Knowledge tab UI; do not invent a second edit backend. + +Important: + +- do not invent a brand-new raw mutation scheme if a replay-safe operator + action already exists elsewhere +- the action queue item is the workflow wrapper, not the source of truth for + knowledge mutation semantics + +## Track 3: Inbox Rendering + Health Card + +In `frontend/src/components/operations-inbox.ts`: + +- add `knowledge_review` to `_kindClass()` +- add a card rendering branch for `knowledge_review` +- show: + - entry title + - preview + - review reason + - confidence + - usage stats + - failure stats when present +- actions: + - `Confirm` + - `Edit` + - `Invalidate` + +Create `frontend/src/components/knowledge-health-card.ts`. + +Use real routes: + +- `GET /api/v1/knowledge?workspace={id}&limit=200` +- `GET /api/v1/workspaces/{id}/operations/actions?kind=knowledge_review` + +Show: + +- total entries +- pending review count +- average confidence +- top domains +- stale review count +- contradiction review count + +Mount the health card in `knowledge-browser.ts`, near the active Knowledge +header/search surface. Keep it compact. + +## Track 4: Visible Upload And Ingest + Trigger Fix + +This track is about surfacing an existing capability that is currently hidden. + +Do this in `knowledge-browser.ts`: + +- add an `Upload & Ingest` control by porting/reusing the existing logic from + `knowledge-view.ts` +- add a `Refresh Library` or equivalent status refresh +- add a small reindex group: + - `Reindex Docs` + - `Reindex Code` +- show inline success/failure status for ingest and reindex operations + +Use existing backend seams: + +- `POST /api/v1/workspaces/{workspace_id}/ingest` +- `GET /api/v1/workspaces/{workspace_id}/files` +- `POST /api/v1/addons/docs-index/trigger` +- `POST /api/v1/addons/codebase-index/trigger` + +Do not invent: + +- a second upload pipeline +- fake addon `/status` routes +- a second knowledge-ingest backend + +Fix both addon manifests: + +- `addons/docs-index/addon.yaml` -> `search.py::handle_reindex` +- `addons/codebase-index/addon.yaml` -> `search.py::handle_reindex` + +## Acceptance Gates + +- knowledge review actions queue from the operational sweep +- contradiction reuse does not require a second full briefing pass +- review decisions use replay-safe existing knowledge mutation paths +- inbox renders `knowledge_review` cards cleanly +- knowledge health card is visible on the active Knowledge tab +- the active Knowledge tab has visible `Upload & Ingest` +- both manual addon triggers point at `handle_reindex` +- reindex controls work from the Knowledge tab + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +npm run build +``` diff --git a/docs/waves/wave_72/team_b_prompt.md b/docs/waves/wave_72/team_b_prompt.md new file mode 100644 index 0000000..a10ef8a --- /dev/null +++ b/docs/waves/wave_72/team_b_prompt.md @@ -0,0 +1,223 @@ +# Wave 72 - Team B: Autonomous Continuation + +Theme: make the Queen continue work coherently across sessions and idle time, +using the existing action queue and approval machinery. + +## Read First + +- `docs/waves/wave_72/wave_72_plan.md` +- `docs/waves/wave_72/design_note.md` +- `CLAUDE.md` + +## Repo Truth You Must Start From + +- `app.py` currently has two relevant loops: + - `_maintenance_loop()` for consolidation services and proactive dispatch + - `_operational_sweep_loop()` for fast-cadence action processing +- Wave 72 should not leave continuation logic split across parallel schedulers. +- `build_operations_summary()` in `operations_coordinator.py` already computes: + - continuation candidates + - operator activity + - idle time +- `approve_action()` already knows how to execute queued work when the action + payload includes `suggested_colony`. +- `maintenance_policy` lives in `ws.config`, not in a separate projection map. + +## Key Seams To Read Before Coding + +- `src/formicos/surface/app.py` + You own scheduler integration and call order. +- `src/formicos/surface/operations_coordinator.py` + Read `build_operations_summary()`. +- `src/formicos/surface/self_maintenance.py` + Read: + - `run_proactive_dispatch()` + - `estimate_blast_radius()` + - `compute_autonomy_score()` + - maintenance policy loading from `ws.config` +- `src/formicos/surface/action_queue.py` + Read `create_action()`, `append_action()`, `read_actions()`, + `update_action()`, and status constants. +- `src/formicos/surface/routes/api.py` + Read `approve_action()` and `reject_action()`. +- `src/formicos/surface/queen_runtime.py` + Read `respond()` and the existing continuity/session-summary injection order. +- `src/formicos/surface/operational_state.py` + Read `append_journal_entry()`. + +## Your Files + +- `src/formicos/surface/continuation.py` - new +- `src/formicos/surface/app.py` - scheduler integration +- `src/formicos/surface/queen_runtime.py` - warm-start cue +- `tests/unit/surface/test_autonomous_continuation.py` - new + +Read but do not own: + +- `src/formicos/surface/operations_coordinator.py` +- `src/formicos/surface/self_maintenance.py` +- `src/formicos/surface/action_queue.py` + +## Do Not Touch + +- `src/formicos/surface/knowledge_catalog.py` +- `src/formicos/surface/projections.py` +- `frontend/src/components/operations-inbox.ts` +- `frontend/src/components/settings-view.ts` +- `frontend/src/components/formicos-app.ts` + +## Overlap Rules + +- Team A provides `scan_knowledge_for_review(...)`. +- Team C provides `extract_workflow_patterns(...)` and + `detect_operator_patterns(...)`. +- You own the scheduler in `app.py` and wire the background call order. + +## Track 5: Continuation Proposals + +Create `src/formicos/surface/continuation.py` with: + +```python +async def queue_continuation_proposals( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + dispatcher: MaintenanceDispatcher, +) -> int: + """Queue continuation actions for work that is ready to resume.""" +``` + +Rules: + +1. Read candidates from `build_operations_summary(...)`. +2. Respect operator activity: + - if the operator has interacted recently, do not queue or execute + continuation work +3. Read maintenance/autonomy policy from `ws.config["maintenance_policy"]`. +4. Estimate blast radius with the existing helper. +5. Respect daily budget before auto-executing anything. +6. Dedupe pending continuation actions by `thread_id`. + +Important implementation rule: + +- continuation actions should reuse the existing `approve_action()` seam by + including a `payload.suggested_colony` +- do not invent a second approval/execution mechanism if the existing one can + dispatch the colony for you + +Suggested payload shape: + +```python +payload = { + "thread_id": candidate["thread_id"], + "description": candidate["description"], + "priority": candidate.get("priority", "medium"), + "blast_radius_score": blast.score, + "blast_radius_level": blast.level, + "suggested_colony": { + "task": candidate["description"], + "caste": "coder", + "strategy": "sequential", + "max_rounds": 3, + }, +} +``` + +That keeps queue review, manual approval, and automatic execution on one +shared contract. + +## Track 6: Consolidate The Background Scheduler + +You own the background cadence in `app.py`. + +Wave 72 should have one clear operational sweep: + +1. `run_proactive_dispatch()` — capture returned briefing insights +2. Team A `scan_knowledge_for_review(...)` — pass briefing insights via + `briefing_insights` kwarg so contradiction detection reuses the briefing + instead of re-generating it +3. `queue_continuation_proposals(...)` +4. `execute_idle_continuations(...)` +5. Team C `extract_workflow_patterns(...)` +6. Team C `detect_operator_patterns(...)` +7. existing approved-action processing / compaction + +If you move `run_proactive_dispatch()` into `_operational_sweep_loop()`, then +the old daily `_maintenance_loop()` should stay responsible only for the +consolidation services. Do not run proactive dispatch in both places. + +This is the main structural cleanup in the wave. + +## Track 7: Cross-Session Warm Start And Idle Execution + +In `queen_runtime.py`, enrich the first Queen response of a returning session +with a continuation cue. + +Requirements: + +- use the same continuation candidate source as the sweep +- keep it as a proposal, not an automatic dispatch +- cap the hint block to the top few candidates +- place it after the existing continuity/session-summary context, not before + +The output should nudge the Queen toward: + +- "here is what was in progress" +- "here is the most promising next step" +- "confirm or redirect me" + +Even at `autonomous`, the first turn after the operator returns should stay +proposal-first. + +Idle-time execution also lives in this track. Add to `continuation.py`: + +```python +async def execute_idle_continuations( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + dispatcher: MaintenanceDispatcher, + *, + max_per_sweep: int = 1, +) -> int: + """Execute low-risk continuation actions during operator idle time.""" +``` + +Guard rails: + +1. workspace autonomy level is `autonomous` +2. operator idle time exceeds the configured threshold +3. no pending-review actions of any kind exist +4. blast radius remains low at execution time +5. daily budget still has capacity + +Execution rules: + +- limit to 1 continuation per sweep cycle +- journal every autonomous continuation +- update action status cleanly +- increment daily spend + +## Config Guidance + +If you need an idle threshold setting, keep it in workspace config / maintenance +policy, not in a new sidecar file or a second settings system. + +Do not invent a new persistence mechanism for this. + +## Acceptance Gates + +- continuation actions are queued through the action queue +- queued continuation actions reuse `approve_action()` via `suggested_colony` +- recent operator activity blocks continuation dispatch +- blast radius and budget gate auto-execution +- the background scheduler has one clear ownership/call order in `app.py` +- proactive dispatch is not duplicated across two loops +- warm start surfaces continuation opportunities on the first returning turn +- idle-time continuation work journals what it did + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +``` diff --git a/docs/waves/wave_72/team_c_prompt.md b/docs/waves/wave_72/team_c_prompt.md new file mode 100644 index 0000000..74a406e --- /dev/null +++ b/docs/waves/wave_72/team_c_prompt.md @@ -0,0 +1,268 @@ +# Wave 72 - Team C: Workflow Learning, Product Polish, And Docs + +Theme: learn from successful work, codify operator preferences, and fix the +product surfaces that still feel cluttered or underpowered. + +## Read First + +- `docs/waves/wave_72/wave_72_plan.md` +- `docs/waves/wave_72/design_note.md` +- `docs/waves/wave_72_polish_reference.md` +- `CLAUDE.md` + +## Repo Truth You Must Start From + +- `settings-view.ts` is structurally inverted: mostly read-only inventory, very + little real control. +- `config-overrides` is not a trustworthy generic settings-persistence seam for + product polish. Do not build fake saves on top of it. +- model policy is only partially editable today + (`PATCH /api/v1/models/{address}`), and model add/hide are still missing. +- selection surfaces still expose models that are `no_key` / unavailable unless + the frontend filters them out. +- the top nav is overcrowded and centered. +- the protocol badges look interactive but are not. +- Team B owns `app.py` and calls your workflow-learning helpers from the sweep. + +## Key Seams To Read Before Coding + +- `src/formicos/surface/workflow_learning.py` - you will create this +- `src/formicos/surface/operational_state.py` + Read `append_procedure_rule()`, `load_procedures()`, and `save_procedures()`. +- `src/formicos/surface/action_queue.py` + Read queue creation and status update helpers. +- `src/formicos/surface/routes/api.py` + Read: + - `approve_action()` / `reject_action()` + - `update_model_policy()` + - `get_autonomy_status()` + - addon config routes +- `src/formicos/surface/model_registry_view.py` +- `src/formicos/core/types.py` + Read `ModelRecord` and `MaintenancePolicy`. +- `src/formicos/surface/mcp_server.py` + Read `set_maintenance_policy()` / `get_maintenance_policy()` so your HTTP + route, if added, mirrors the existing contract instead of inventing a new one. +- `frontend/src/components/operations-inbox.ts` +- `frontend/src/components/settings-view.ts` +- `frontend/src/components/model-registry.ts` +- `frontend/src/components/caste-editor.ts` +- `frontend/src/components/formicos-app.ts` + +## Your Files + +- `src/formicos/surface/workflow_learning.py` - new +- `src/formicos/surface/operational_state.py` - additive helper only +- `src/formicos/surface/routes/api.py` - additive model-admin / + maintenance-policy routes if needed +- `src/formicos/surface/model_registry_view.py` - additive hidden-field support +- `src/formicos/core/types.py` - additive model field only if needed +- `frontend/src/components/operations-inbox.ts` - new action kinds +- `frontend/src/components/settings-view.ts` - writable-first restructure +- `frontend/src/components/model-registry.ts` - model admin +- `frontend/src/components/caste-editor.ts` - model filtering +- `frontend/src/components/formicos-app.ts` - nav cleanup + badge fix +- `CLAUDE.md` +- `docs/AUTONOMOUS_OPERATIONS.md` - new +- `tests/unit/surface/test_workflow_learning.py` - new + +## Do Not Touch + +- `src/formicos/surface/knowledge_catalog.py` +- `src/formicos/surface/projections.py` +- `src/formicos/surface/queen_runtime.py` +- `src/formicos/surface/app.py` +- `frontend/src/components/knowledge-browser.ts` + +## Overlap Rules + +- Team A owns active Knowledge-tab ingest/reindex work. +- Team B owns the scheduler in `app.py`. +- You provide pure learning helpers and UI/admin surfaces. + +## Track 8: Workflow Pattern Recognition + +Create `src/formicos/surface/workflow_learning.py` with a deterministic pattern +extractor that proposes `kind="workflow_template"` actions. + +Requirements: + +- derive patterns from successful outcome history +- keep matching deterministic +- dedupe against existing learned templates and pending workflow-template + actions +- queue proposals through the existing action queue only + +Keep the pattern heuristic simple: + +- similar caste set +- same strategy +- clear repeated task-shape overlap +- repeated success across multiple threads + +When approved, save a learned template using the existing template manager. + +Important: + +- approval should extend the existing approve-action flow, not create a second + template-approval mechanism + +## Track 9: Procedure Suggestions + +Add a second detector in `workflow_learning.py` that proposes +`kind="procedure_suggestion"` actions from repeated operator behavior. + +Use conservative heuristics only. Good starting signals: + +- repeated rejection of autonomous work on a shared keyword/domain +- repeated "review after coding" patterns +- repeated testing-after-change behavior + +When approved, append the rule through `append_procedure_rule()`. + +Keep the logic explainable. The inbox card should be able to say why the system +noticed the pattern. + +## Track 10: Product Surface Polish + +### A. Make Settings writable-first + +Restructure `frontend/src/components/settings-view.ts` around real operator +controls: + +1. Workspace +2. Budgeting +3. Governance +4. Model defaults / selection policy +5. Integrations + +Collapse or move read-only diagnostics: + +- system overview +- protocol inventory +- addon summary inventory +- full model inventory table +- retrieval diagnostics + +Do not leave Settings as a second dashboard. + +### B. Use a real persistence seam for budgeting and autonomy + +If there is no clean HTTP route yet, add a small +`GET/PUT /api/v1/workspaces/{workspace_id}/maintenance-policy` pair that mirrors +the JSON contract already used by the MCP maintenance-policy helpers. + +Use that route for: + +- autonomy level +- daily maintenance budget +- max maintenance colonies +- auto-action policy if surfaced + +Do not fake these saves through `config-overrides`. + +For workspace taxonomy tags: + +- if there is no clean dedicated write seam, keep them read-only this wave +- do not invent a fake save path just to make the card editable + +### C. Add model admin, not just model filtering + +This wave should close the biggest model-lifecycle gaps called out in the +polish reference. + +Add a bounded model-admin surface in `model-registry.ts`: + +- `Add Model` flow +- `Hide / Unhide` model +- `Show unavailable` toggle +- existing policy edit remains + +Recommended backend shape: + +- extend `ModelRecord` with additive `hidden: bool = False` — this is an + additive field with a default; it does not affect the event union, replay + safety, or existing serialization. No ADR required. +- surface `hidden` through `model_registry_view.py` +- extend `PATCH /api/v1/models/{address}` to allow `hidden` +- add `POST /api/v1/models` to append a new registry entry and persist it + +Selection rules: + +- default selectors must hide models that are: + - `hidden` + - `no_key` + - `unavailable` + - `error` +- admin views may still show them + +`caste-editor.ts` must honor that default filtering. + +### D. Clean up the top nav + +In `formicos-app.ts`: + +- left-align the main workflow tabs +- split primary vs secondary destinations +- keep `Queen`, `Knowledge`, `Workspace`, and `Operations` visually primary + +This should feel like a product nav, not a debug toolbar. + +### E. Fix the protocol badges + +Either: + +- make them real controls that navigate to the relevant Integrations / + protocol details surface + +or: + +- visually demote them so they no longer look clickable + +Do not leave animated pseudo-buttons that do nothing. + +## Track 11: Documentation Refresh + +### CLAUDE.md + +Refresh it to match the post-Wave-72 reality: + +- operational layer / action queue / procedures / journal +- workflow learning additions +- product-surface truth +- current tool count and main seams + +### docs/AUTONOMOUS_OPERATIONS.md + +Write the operator guide for: + +- autonomy levels +- budgeting and maintenance policy +- operations inbox +- continuation behavior +- knowledge review +- workflow-template proposals +- procedure suggestions +- journal / procedures + +Write for someone operating the system, not for someone reading ADRs. + +## Acceptance Gates + +- workflow-template proposals appear through the action queue +- approved workflow-template actions create learned templates +- procedure suggestions append to operating procedures on approval +- Settings is clearly writable-first +- budget/autonomy persistence uses a real route, not fake config-overrides saves +- the Models tab can add and hide models +- default model selectors hide hidden / no-key / unavailable models +- the nav is visually simplified +- the protocol badges are either functional or clearly passive +- `CLAUDE.md` and `docs/AUTONOMOUS_OPERATIONS.md` reflect the shipped system + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +npm run build && npm run lint +``` diff --git a/docs/waves/wave_72/wave_72_plan.md b/docs/waves/wave_72/wave_72_plan.md new file mode 100644 index 0000000..dedfb16 --- /dev/null +++ b/docs/waves/wave_72/wave_72_plan.md @@ -0,0 +1,185 @@ +# Wave 72: The Self-Governing System + +Status: dispatch-ready packet +Predecessor: Wave 71.0 + 71.5 +Theme: the system improves its own knowledge, learns from its own patterns, +continues work autonomously, and stays legible to the operator. + +## Packet Authority + +Use these docs: + +- `docs/waves/wave_72/design_note.md` - four invariants +- `docs/waves/wave_72/team_a_prompt.md` +- `docs/waves/wave_72/team_b_prompt.md` +- `docs/waves/wave_72/team_c_prompt.md` +- `docs/waves/wave_72_polish_reference.md` - repo-truth polish reference + +## Locked Boundaries + +- No new event types. +- No retrieval/scoring changes. +- No `knowledge_catalog.py` changes. +- No new projection fields beyond what existing handlers already write. +- No new Qdrant collections or schema changes. +- No automatic knowledge correction. +- `app.py` remains the single scheduler seam for background work. +- Everything hangs off the existing action queue, operational state, and + maintenance/autonomy infrastructure from Waves 70 and 71. + +## Scope + +| Track | Outcome | Team | +|-------|---------|------| +| 1 | Knowledge review scanning | A | +| 2 | Knowledge review processing | A | +| 3 | Knowledge health surface | A | +| 4 | Trigger fix + visible document ingest on active Knowledge tab | A | +| 5 | Continuation proposals | B | +| 6 | Scheduler consolidation | B | +| 7 | Cross-session warm start + idle execution | B | +| 8 | Workflow pattern recognition | C | +| 9 | Procedure suggestions | C | +| 10 | Product surface polish | C | +| 11 | Documentation refresh | C | + +## Team Missions + +### Team A - Knowledge Governance + +Own the knowledge review lifecycle end to end: + +- scan for problematic entries +- queue `knowledge_review` actions +- let the operator confirm / edit / invalidate +- expose knowledge health and visible document ingest on the active Knowledge + tab + +Also own the addon trigger fix and the active-surface ingest/reindex polish. + +### Team B - Autonomous Continuation + +Own the Queen's ability to continue work coherently across sessions and during +idle time. + +This includes: + +- continuation proposals +- warm-start continuation cues +- idle-time execution guard rails +- the single background scheduler order in `app.py` + +### Team C - Workflow Learning + Product Polish + Documentation + +Own: + +- workflow-template proposals +- procedure suggestions +- writable-first Settings +- budget/autonomy persistence +- model admin +- nav cleanup +- docs refresh + +## Shared Seams + +### `app.py` operational sweep + +Owner: Team B + +Teams A and C provide pure helper functions. Team B wires the order. + +Required order in the operational sweep: + +1. `run_proactive_dispatch()` — capture briefing insights +2. Team A `scan_knowledge_for_review(...)` — receives briefing insights +3. Team B `queue_continuation_proposals(...)` +4. Team B `execute_idle_continuations(...)` +5. Team C `extract_workflow_patterns(...)` +6. Team C `detect_operator_patterns(...)` +7. existing approved-action processing / compaction + +If proactive dispatch moves into `_operational_sweep_loop()`, the old daily +maintenance loop should remain responsible only for consolidation services. +Do not run the same proactive dispatch in both loops. + +### `operations-inbox.ts` + +Shared by Teams A and C. + +- Team A adds `knowledge_review` +- Team C adds `workflow_template` +- Team C adds `procedure_suggestion` + +Different `kind` values. Match the existing card/render pattern. + +### `routes/api.py` + +Shared across teams, additive only. + +- Team A: review processing endpoint +- Team C: maintenance-policy route if needed +- Team C: model-admin route(s) if needed +- Team B: only add an endpoint if continuation truly cannot ride the existing + `approve_action()` contract + +### High-Value Verified Seams + +These are the facts worth preserving across prompts: + +- `build_operations_summary()` in `operations_coordinator.py` is the + continuation candidate source. +- `knowledge_entry_usage` is separate from `memory_entries`. +- `approve_action()` already executes actions that carry + `payload.suggested_colony`. +- the live active Knowledge tab is `knowledge-browser.ts` +- the existing upload/ingest flow is in `knowledge-view.ts`, which is not the + active Knowledge tab +- both addon manual triggers are miswired to `incremental_reindex` +- `maintenance_policy` lives in `ws.config` +- `config-overrides` is not a trustworthy generic settings persistence seam + +## Merge Order + +``` +Team B (scheduler + continuation) - merges first +Team A (knowledge governance) - merges second +Team C (learning + polish) - merges third +``` + +All three can build in parallel, but Team B owns the scheduler seam and should +land before final integration. Team C's docs refresh is the literal last merge. + +## What Wave 72 Does Not Do + +- no new event types +- no retrieval or scoring redesign +- no new knowledge storage layer +- no auto-correction of institutional memory +- no multi-user support + +## Validation + +```bash +ruff check src/ && pyright src/ && python scripts/lint_imports.py && pytest +npm run build && npm run lint +``` + +## Success Condition + +Wave 72 succeeds if: + +- the system surfaces review-worthy knowledge through the action queue +- the operator can confirm, edit, or invalidate from the inbox flow +- the Queen proposes continuation naturally when a session resumes +- low-risk continuation work can execute while the operator is away +- successful multi-step patterns become reusable workflow-template proposals +- operator behavior can become procedure suggestions +- the active Knowledge tab has visible `Upload & Ingest` plus working reindex + controls +- Settings is clearly writable-first +- budget/autonomy controls persist through a real route +- default model selectors hide hidden / no-key / unavailable models +- the Models admin surface can add and hide specific models +- addon reindex triggers actually work +- `CLAUDE.md` and `docs/AUTONOMOUS_OPERATIONS.md` reflect the real system diff --git a/docs/waves/wave_72_5/team_a_prompt.md b/docs/waves/wave_72_5/team_a_prompt.md new file mode 100644 index 0000000..bd0d4d7 --- /dev/null +++ b/docs/waves/wave_72_5/team_a_prompt.md @@ -0,0 +1,198 @@ +# Wave 72.5 Team A: Topbar Cleanup + Cost Popover + +## Mission + +Remove decorative protocol badges and the always-on connection dot from the +topbar. Make the `$X.XX spent` display clickable — it opens a budget popover +showing all money-related settings in one place. The topbar should be clean: +logo, nav, cost button, approval badge. + +## Owned files + +- `frontend/src/components/formicos-app.ts` — topbar layout, protocol bar, cost display, connection indicator + +### Do not touch + +- `settings-view.ts` (Team C) +- `addons-view.ts` (Team B) +- `routes/api.py` (Team B) +- `addon_loader.py` (Team C) +- `view_state.py` (Team B) +- `types.ts` (Team B) + +## Repo truth (read before coding) + +1. **formicos-app.ts lines 749-768** — `renderProtocolBar(ps)` renders 3 protocol items + (MCP, AG-UI, A2A) as `proto-item` divs with status dots, labels, and detail text. + Called at line 390. Each shows: label, status dot, detail (tool count / event count / + A2A endpoint). This data is NOT shown elsewhere in equivalent detail — the Settings + Protocols section only shows name + status pill (no counts, no endpoints). Team C is + responsible for adding equivalent detail to Settings before this removal is safe. + +2. **formicos-app.ts lines 736-747** — `_renderConnectionIndicator(conn)` renders a + green/yellow/red dot with optional label. Called at line 392. When connected, it + renders a green dot with no label — pure visual noise. When disconnected, the app + already visibly stops working (colonies freeze, WS messages stop), so the dot adds + no information the operator doesn't already have. + +3. **formicos-app.ts lines 388-399** — `topbar-right-wrap` contains: protocol bar, + `$X.XX spent` text, connection indicator, and approval badge. The approval badge + stays — it's useful and actionable (navigates to Queen on click). + +4. **formicos-app.ts lines 52-57** — topbar CSS: 3-column grid + `minmax(0,1fr) auto minmax(0,1fr)`. + +5. **formicos-app.ts line 350** — cost calculation: + `colonies.reduce((a,c) => a + ((c as any).cost ?? 0), 0)` + +6. **Protocol status data** lives in `store.state.protocolStatus`. It's already consumed + by `settings-view.ts` in the Integrations card (`_renderProtocolsSummary()` at + line 676). The Settings tab is the correct home for protocol diagnostics. Team C will + enhance it to show the same detail level (tool count, event count, endpoint) that the + topbar badges currently show, so nothing is lost. + +7. **Budget data sources** the popover needs: + - **Total cost across all colonies**: already computed at line 350. + - **Default budget per colony**: `store.state.runtimeConfig?.governance?.defaultBudgetPerColony` + (default $1.00 from formicos.yaml:561, editable in Settings governance card). + - **Daily maintenance budget + autonomy status**: NOT in the WebSocket snapshot. The + `` component fetches this lazily from + `GET /api/v1/workspaces/{id}/autonomy-status`. The popover should either embed that + component or fetch the same endpoint. The maintenance policy itself is at + `GET /api/v1/workspaces/{id}/maintenance-policy`. + - The workspace ID for the fetch: `store.state.tree?.[0]?.id` (first workspace) or + track the selected workspace if multiple exist. + +## Track 1: Remove protocol badges + +1. **Delete `renderProtocolBar()`** (lines 749-768) entirely. +2. **Remove its call** at line 390: `${this.renderProtocolBar(s.protocolStatus)}` +3. **Delete all `.proto-*` CSS rules** — search the static styles block for `proto-bar`, + `proto-item`, `proto-label`, `proto-detail`. Remove them all. +4. Protocol data (`store.state.protocolStatus`) stays in the store — Settings still uses it. + +## Track 2: Remove the connection indicator + +1. **Delete `_renderConnectionIndicator()`** (lines 736-747). +2. **Remove its call** at line 392: `${this._renderConnectionIndicator(s.connection)}` +3. **Delete `.conn-indicator`, `.conn-dot`, `.conn-label` CSS rules** from the static + styles block. +4. Connection state stays in the store — the reconnection logic is unaffected. + +## Track 3: Clickable cost display with budget popover + +Replace the plain `$X.XX spent` text with a clickable element that toggles a popover. + +### State additions + +```typescript +@state() private _showBudgetPopover = false; +@state() private _policyData: { daily_maintenance_budget: number; autonomy_level: string } | null = null; +@state() private _autonomyData: { grade: string; level: string; budget_spent: number; budget_total: number } | null = null; +``` + +### The cost element + +```html + { + e.stopPropagation(); + this._showBudgetPopover = !this._showBudgetPopover; + if (this._showBudgetPopover) this._fetchBudgetData(); +}}> + ${formatCost(totalCost)} spent + +``` + +Style `cost-btn`: cursor pointer, subtle hover highlight (`rgba(232,88,26,0.1)`), +border-radius 6px, padding 4px 8px. Make it look tappable. + +### The popover + +Absolutely positioned below/left of the cost button. Rendered conditionally: + +```html +${this._showBudgetPopover ? html` +
{ this._showBudgetPopover = false; }}>
+
+ ${this._renderBudgetPopover(totalCost)} +
+` : nothing} +``` + +**Popover CSS:** +- `position: absolute; top: 100%; right: 0; margin-top: 6px;` +- `background: var(--v-recessed); border: 1px solid var(--v-border);` +- `border-radius: 10px; padding: 16px; min-width: 260px; z-index: 100;` +- `box-shadow: 0 8px 32px rgba(0,0,0,0.4);` +- Backdrop: `position: fixed; inset: 0; z-index: 99;` (transparent, catches click-outside) + +**Popover content** — 4 rows, compact: + +| Row | Label | Value | Source | +|-----|-------|-------|--------| +| 1 | Total spent | `$X.XX` | `totalCost` (already computed) | +| 2 | Per-colony cap | `$Y.YY` | `store.state.runtimeConfig?.governance?.defaultBudgetPerColony` | +| 3 | Daily maintenance | `$A.AA / $B.BB` | Fetched from autonomy-status endpoint | +| 4 | Autonomy | `Grade F · suggest` | Fetched from autonomy-status endpoint | + +Each row: label on left (10px mono, dim), value on right (12px mono, accent for costs). +Compact vertical spacing (8px gap between rows). + +Below the rows, a subtle link: ` this.navTab('settings')}>All budget settings →` that navigates to the Settings tab. + +### Data fetching + +```typescript +private async _fetchBudgetData() { + const wsId = store.state.tree?.[0]?.id; + if (!wsId) return; + try { + const resp = await fetch(`/api/v1/workspaces/${wsId}/autonomy-status`); + if (resp.ok) this._autonomyData = await resp.json(); + } catch { /* popover shows what it has */ } +} +``` + +Cache the result — don't re-fetch on every open. Clear cache when store updates +(in the store subscription callback, set `_autonomyData = null`). + +### The resulting topbar-right + +```html +
+
+ + ${formatCost(totalCost)} spent + + ${budgetPopover} +
+ ${approvalBadge} +
+``` + +## Validation + +```bash +cd frontend && npm run build && npm run lint +``` + +Verify in the running stack at http://localhost:8080: +- Topbar has no protocol badges +- Topbar has no green/red connection dot +- `$X.XX spent` is clickable and shows the budget popover +- Popover shows all 4 rows with real data +- Click-outside closes the popover +- "All budget settings →" link navigates to Settings + +## Acceptance criteria + +- [ ] `renderProtocolBar()` method and CSS deleted +- [ ] `_renderConnectionIndicator()` method and CSS deleted +- [ ] No protocol badges or connection dot visible in topbar +- [ ] `$X.XX spent` is clickable with hover highlight +- [ ] Budget popover shows: total spent, per-colony cap, daily maintenance budget/remaining, autonomy grade+level +- [ ] Popover closes on click-outside +- [ ] Popover includes "All budget settings →" link to Settings tab +- [ ] Topbar is clean: logo + nav + cost button + approval badge only +- [ ] Protocol status data still flows to Settings (no store changes) +- [ ] Frontend builds and lints clean diff --git a/docs/waves/wave_72_5/team_b_prompt.md b/docs/waves/wave_72_5/team_b_prompt.md new file mode 100644 index 0000000..b78382b --- /dev/null +++ b/docs/waves/wave_72_5/team_b_prompt.md @@ -0,0 +1,462 @@ +# Wave 72.5 Team B: Fix Addon Triggers + Interactive Addons + Metadata Expansion + +## Mission + +The addon trigger endpoint is broken — it calls handlers without required positional +arguments. Fix the backend. Then expand the addon metadata pipeline so the frontend has +the data it needs for interactive features (config editing, tool testing). Finally, +transform the addons tab from a read-only diagnostic dashboard into an interactive +operator surface. + +## Owned files + +- `src/formicos/surface/routes/api.py` — `trigger_addon()` endpoint (lines 1427-1491) +- `src/formicos/surface/view_state.py` — `_build_addons()` (lines 47-97) +- `frontend/src/types.ts` — `AddonToolSummary`, `AddonSummary` interfaces (lines 681-717) +- `frontend/src/components/addons-view.ts` — full component (221 lines) + +### Do not touch + +- `formicos-app.ts` (Team A) +- `addon_loader.py` (Team C) +- `settings-view.ts` (Team C) +- Addon source files (`src/formicos/addons/*/`) +- Addon manifests (`addons/*/addon.yaml`) + +## Repo truth (read before coding) + +### The trigger bug + +1. **api.py lines 1471-1477** — the trigger endpoint calls handlers like this: + ```python + if accepts_ctx: + result = await handler_fn(runtime_context=reg.runtime_context) + else: + result = await handler_fn() + ``` + +2. **But handler signatures expect 3 positional args.** Both `codebase_index/search.py:94` + and `docs_index/search.py:94` have: + ```python + async def handle_reindex( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, + ) -> str: + ``` + +3. **The error:** `handle_reindex() missing 3 required positional arguments: 'inputs', 'workspace_id', and 'thread_id'` + +4. **How Queen tool dispatch does it right** — in `addon_loader.py` lines 239-260, the + `_tool_wrapper` closure receives `(inputs, workspace_id, thread_id)` from the tool + dispatch pipeline and passes them through to the real handler. The trigger endpoint + bypasses this wrapper — it resolves the raw handler function via `_resolve_handler()` + and calls it directly, which means it must provide those positional args itself. + +### The metadata gap + +5. **Current `AddonToolSummary`** in `types.ts:681-684` has only: + ```typescript + { name: string; description: string; callCount: number; } + ``` + Missing: `handler` (needed for trigger endpoint call), `parameters` (JSON schema, + needed for "Try" form generation). + +6. **Current `AddonSummary`** in `types.ts:707-717` has no `config` field. The addon + manifest declares `config: list[AddonConfigParam]` with fields + `key`, `type` (boolean|string|integer|cron|select), `default`, `label`, `options` + (addon_loader.py:52-59), but none of this reaches the frontend via the snapshot. + +7. **`_build_addons()`** in `view_state.py:60-66` only emits `name`, `description`, + `callCount` per tool. It does not emit `handler` or `parameters`. + +8. **Addon config endpoints exist** — `GET /api/v1/addons/{name}/config?workspace_id=X` + at api.py:1495 returns schema + current values. `PUT /api/v1/addons/{name}/config` + saves values. These were shipped in Wave 66 but never wired into the frontend. + +### The addons view + +9. **addons-view.ts** is a 221-line component: sidebar list + detail panel. Detail shows + name, version, status, description, error badge, tools table (name/desc/calls), + handlers table (event/lastFired/errors), triggers table (type/schedule/button). + Everything is read-only except the broken "Trigger Now" button. + +10. **`_fireTrigger()`** at line 201-220 sends `POST /api/v1/addons/${addonName}/trigger` + with body `{ handler: manualTrigger?.handler ?? '' }`. No workspace_id, no inputs. + +## Track 1: Fix the trigger endpoint (BLOCKER — do this first) + +The handler signature convention for addon tools is `(inputs, workspace_id, thread_id, *, runtime_context=None)`. The trigger endpoint must detect this and provide the args. + +**In `trigger_addon()` at api.py line 1463**, after resolving `handler_fn` and checking +`accepts_ctx`, also detect whether the handler expects positional args: + +```python +import inspect # already imported above + +sig = inspect.signature(handler_fn) +positional_params = [ + p for p in sig.parameters.values() + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.name not in ('self', 'cls', 'runtime_context') +] +has_tool_args = len(positional_params) >= 3 + +inputs = body.get("inputs", {}) +workspace_id = body.get("workspace_id", "") +thread_id = body.get("thread_id", "") + +try: + if has_tool_args and accepts_ctx: + result = await handler_fn( + inputs, workspace_id, thread_id, + runtime_context=reg.runtime_context, + ) + elif has_tool_args: + result = await handler_fn(inputs, workspace_id, thread_id) + elif accepts_ctx: + result = await handler_fn(runtime_context=reg.runtime_context) + else: + result = await handler_fn() +``` + +This replaces the existing try block at lines 1471-1477. The rest of the function +(error handling, trigger_fire_times recording, response) stays the same. + +**Frontend side** — in `_fireTrigger()` at addons-view.ts line 209, add `workspace_id`: +```typescript +body: JSON.stringify({ + handler: manualTrigger?.handler ?? '', + workspace_id: store.state.tree?.[0]?.id ?? '', +}), +``` + +**Test this immediately** after implementing. Trigger the codebase-index reindex from +the Addons tab — it should return a success message instead of the positional args error. + +## Track 2: Expand addon metadata pipeline + +The frontend needs tool parameters and config schema to build interactive forms. Expand +the data pipeline in two places. + +### 2a. Expand `view_state.py` `_build_addons()` + +Add `handler` and `parameters` to tool summaries, and add `config` schema to addon +summaries. At view_state.py lines 60-66, change the tools list: + +```python +"tools": [ + { + "name": t.name, + "description": t.description, + "handler": t.handler, # NEW + "parameters": t.parameters, # NEW — JSON schema dict + "callCount": reg.tool_call_counts.get(t.name, 0), + } + for t in manifest.tools +], +``` + +After the `"lastError"` field (line 95), add: + +```python +"config": [ + { + "key": c.key, + "type": c.type, + "default": c.default, + "label": c.label, + "options": c.options, + } + for c in manifest.config +], +``` + +### 2b. Expand `types.ts` interfaces + +Update `AddonToolSummary` (types.ts:681): +```typescript +export interface AddonToolSummary { + name: string; + description: string; + handler: string; // NEW + parameters: Record; // NEW — JSON schema + callCount: number; +} +``` + +Add a new interface: +```typescript +export interface AddonConfigParam { + key: string; + type: 'boolean' | 'string' | 'integer' | 'cron' | 'select'; + default: any; + label: string; + options: string[]; +} +``` + +Update `AddonSummary` (types.ts:707): +```typescript +export interface AddonSummary { + name: string; + version: string; + description: string; + tools: AddonToolSummary[]; + handlers: AddonHandlerSummary[]; + triggers: AddonTriggerSummary[]; + panels: AddonPanelSummary[]; + config: AddonConfigParam[]; // NEW + status: 'healthy' | 'degraded' | 'error'; + lastError: string | null; +} +``` + +### 2c. Coordinate with Team C + +Team C is adding `hidden: bool` and `disabled: bool` to addon data. Those fields will +also need to appear in the snapshot and types. If Team C adds them to `AddonRegistration` +and `AddonManifest`, you add them to `_build_addons()` and `types.ts`. Merge order: +**Team B lands first** (metadata expansion), then Team C adds their fields on top. + +## Track 3: Interactive tool testing ("Try It") + +Add a "Try" button to each row in the tools table. When clicked, expand an inline form +below the row. + +### State additions + +```typescript +@state() private _tryingTool: string | null = null; // tool name being tested +@state() private _tryInputs: Record = {}; // current form values +@state() private _tryResult: string = ''; // result text +@state() private _tryLoading = false; // loading indicator +``` + +### Tools table modification + +In the tools table (lines 144-156), add a "Try" column header and button per row: + +```html +NameDescriptionCalls +${addon.tools.map(t => html` + + ${t.name} + ${t.description} + ${t.callCount} + + + + + ${this._tryingTool === t.name ? html` + ${this._renderTryForm(addon, t)} + ` : nothing} +`)} +``` + +### The try form + +`_renderTryForm(addon: AddonSummary, tool: AddonToolSummary)`: + +1. Parse `tool.parameters` JSON schema. For each property in + `tool.parameters.properties` (if it exists): + - `string` type → `` + - `integer`/`number` type → `` + - `boolean` type → `` + - `array`/`object` type → ``; + } + // Default: string + return html` { + this._tryInputs = { ...this._tryInputs, [key]: (e.target as HTMLInputElement).value }; + }}>`; + } + + private async _runTryTool(addon: AddonSummary, tool: AddonToolSummary) { + this._tryLoading = true; + this._tryResult = ''; + try { + const resp = await fetch(`/api/v1/addons/${addon.name}/trigger`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + handler: tool.handler, + inputs: this._tryInputs, + workspace_id: store.state.tree?.[0]?.id ?? '', + }), + }); + const data = await resp.json(); + this._tryResult = resp.ok + ? (data.result ?? 'ok') + : `Error: ${data.error ?? resp.statusText}`; + } catch (e) { + this._tryResult = `Error: ${e}`; + } + this._tryLoading = false; + } + + // ---- Track 4: Config editing ---- + + private _renderConfigSection(addon: AddonSummary) { + if (!addon.config || addon.config.length === 0) return nothing; + return html` +
Configuration
+
+ ${addon.config.map(c => this._renderConfigField(c))} + + ${this._configSaved ? html`Saved` : nothing} +
+ `; + } + + private _renderConfigField(param: AddonConfigParam) { + const label = param.label || param.key; + const val = this._configValues[param.key] ?? param.default; + + if (param.type === 'boolean') { + return html` +
+ +
`; + } + if (param.type === 'select') { + return html` +
+ + +
`; + } + if (param.type === 'integer') { + return html` +
+ + { + this._configValues = { ...this._configValues, [param.key]: Number((e.target as HTMLInputElement).value) }; + }}> +
`; + } + if (param.type === 'cron') { + return html` +
+ + { + this._configValues = { ...this._configValues, [param.key]: (e.target as HTMLInputElement).value }; + }}> +
`; + } + // Default: string + return html` +
+ + { + this._configValues = { ...this._configValues, [param.key]: (e.target as HTMLInputElement).value }; + }}> +
`; + } + + private async _fetchConfig(addonName: string) { + this._configLoading = true; + this._configValues = {}; + try { + const wsId = store.state.tree?.[0]?.id ?? ''; + const resp = await fetch( + `/api/v1/addons/${addonName}/config?workspace_id=${encodeURIComponent(wsId)}`, + ); + if (resp.ok) { + const data = await resp.json(); + this._configValues = data.values ?? {}; + } + } catch { /* ignore */ } + this._configLoading = false; + } + + private async _saveConfig(addonName: string) { + this._configSaving = true; + this._configSaved = false; + try { + await fetch(`/api/v1/addons/${addonName}/config`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + workspace_id: store.state.tree?.[0]?.id ?? '', + values: this._configValues, + }), + }); + this._configSaved = true; + setTimeout(() => { this._configSaved = false; }, 2000); + } catch { /* ignore */ } + this._configSaving = false; + } + + // ---- Trigger Now ---- + + private async _fireTrigger(addonName: string) { + this._triggerStatus = 'Triggering...'; + try { + const addon = this._addons.find(a => a.name === addonName); + const manualTrigger = addon?.triggers.find(t => t.type === 'manual'); + const resp = await fetch(`/api/v1/addons/${addonName}/trigger`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + handler: manualTrigger?.handler ?? '', + workspace_id: store.state.tree?.[0]?.id ?? '', + }), + }); + const data = await resp.json(); + if (resp.ok) { + this._triggerStatus = `Triggered: ${data.result ?? 'ok'}`; + } else { + this._triggerStatus = `Error: ${data.error ?? resp.statusText}`; + } + } catch (e) { + this._triggerStatus = `Error: ${e}`; + } + } +} diff --git a/frontend/src/components/autonomy-card.ts b/frontend/src/components/autonomy-card.ts new file mode 100644 index 0000000..4379849 --- /dev/null +++ b/frontend/src/components/autonomy-card.ts @@ -0,0 +1,210 @@ +/** + * Wave 70.5 Track 3: Autonomy trust card for settings page. + * Fetches from GET /api/v1/workspaces/{id}/autonomy-status and renders + * trust score, daily budget, and recent autonomous actions. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import type { AutonomyStatusData } from '../types.js'; +import './atoms.js'; + +@customElement('fc-autonomy-card') +export class FcAutonomyCard extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + + .grade { + display: inline-flex; align-items: center; justify-content: center; + width: 28px; height: 28px; border-radius: 6px; + font-family: var(--f-display); font-weight: 700; font-size: 14px; + margin-right: 10px; + } + .grade-a { background: rgba(45,212,168,0.12); color: var(--v-success); } + .grade-b { background: rgba(91,156,245,0.12); color: var(--v-blue); } + .grade-c { background: rgba(245,183,49,0.12); color: var(--v-warn); } + .grade-d { background: rgba(245,113,49,0.12); color: var(--v-danger); } + .grade-f { background: rgba(232,72,72,0.12); color: var(--v-danger); } + + .header-row { + display: flex; align-items: center; margin-bottom: 10px; + } + .score-text { + font-family: var(--f-mono); font-size: 11px; color: var(--v-fg-muted); + } + .level-pill { + font-family: var(--f-mono); font-size: 9px; font-weight: 600; + padding: 2px 7px; border-radius: 5px; margin-left: 8px; + background: rgba(255,255,255,0.05); color: var(--v-fg-dim); + border: 1px solid var(--v-border); + } + + .budget-bar { + display: flex; align-items: center; gap: 10px; + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-muted); + margin-bottom: 10px; + } + .bar-track { + flex: 1; height: 4px; border-radius: 2px; + background: rgba(255,255,255,0.06); + } + .bar-fill { + height: 100%; border-radius: 2px; + transition: width 0.3s; + } + .bar-fill-ok { background: var(--v-success); } + .bar-fill-warn { background: var(--v-warn); } + .bar-fill-danger { background: var(--v-danger); } + + .components { + display: grid; grid-template-columns: 1fr 1fr; gap: 4px 16px; + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-muted); + margin-bottom: 10px; + } + .comp-label { color: var(--v-fg-dim); } + + .rec-text { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-muted); + padding: 6px 8px; border-radius: 6px; + background: rgba(167,139,250,0.04); + border: 1px solid rgba(167,139,250,0.12); + margin-bottom: 10px; + } + + .actions-table { + width: 100%; border-collapse: collapse; + font-family: var(--f-mono); font-size: 10px; + } + .actions-table th { + text-align: left; font-weight: 600; color: var(--v-fg-dim); + border-bottom: 1px solid var(--v-border); padding: 4px 6px; + font-size: 9px; letter-spacing: 0.04em; + } + .actions-table td { + color: var(--v-fg-muted); padding: 3px 6px; + border-bottom: 1px solid rgba(255,255,255,0.02); + } + .outcome-ok { color: var(--v-success); } + .outcome-fail { color: var(--v-danger); } + + .empty-text { + font-family: var(--f-mono); font-size: 10.5px; color: var(--v-fg-dim); + } + + @media (prefers-reduced-motion: reduce) { + * { transition: none !important; } + } + `]; + + @property({ type: String }) workspaceId = ''; + @state() private _data: AutonomyStatusData | null = null; + @state() private _error = ''; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + } + + updated(changed: Map) { + if (changed.has('workspaceId') && this.workspaceId) { + void this._fetch(); + } + } + + private async _fetch() { + if (!this.workspaceId) return; + try { + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/autonomy-status`, + ); + if (!resp.ok) { + this._error = `HTTP ${resp.status}`; + return; + } + this._data = await resp.json() as AutonomyStatusData; + this._error = ''; + } catch { + this._error = 'Failed to fetch autonomy status'; + } + } + + private _gradeClass(grade: string): string { + const g = grade.toLowerCase(); + if (g === 'a') return 'grade-a'; + if (g === 'b') return 'grade-b'; + if (g === 'c') return 'grade-c'; + if (g === 'd') return 'grade-d'; + return 'grade-f'; + } + + render() { + if (this._error) { + return html`
${this._error}
`; + } + const d = this._data; + if (!d) return html`
Loading autonomy status\u2026
`; + + const budgetPct = d.daily_budget > 0 + ? Math.min(100, (d.daily_spend / d.daily_budget) * 100) + : 0; + const barClass = budgetPct >= 90 + ? 'bar-fill-danger' + : budgetPct >= 70 ? 'bar-fill-warn' : 'bar-fill-ok'; + + const recent = (d.recent_actions ?? []).slice(0, 5); + + return html` +
+ ${d.grade} + + ${d.score}/100 + + ${d.level} +
+ +
+ $${d.daily_spend.toFixed(2)} / $${d.daily_budget.toFixed(2)} +
+
+
+ $${d.remaining.toFixed(2)} left +
+ +
+ ${Object.entries(d.components).map(([k, v]) => html` + ${k.replace(/_/g, ' ')} + ${(v as number).toFixed(2)} + `)} +
+ + ${d.recommendation ? html` +
${d.recommendation}
+ ` : nothing} + + ${recent.length > 0 ? html` + + + + + ${recent.map(a => html` + + + + + + + `)} +
ColonyStrategyCostOutcome
${a.colony_id.slice(0, 8)}${a.strategy}$${a.cost.toFixed(3)} + ${a.outcome} +
+ ` : html` +
No recent autonomous actions.
+ `} + `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-autonomy-card': FcAutonomyCard; } +} diff --git a/frontend/src/components/caste-editor.ts b/frontend/src/components/caste-editor.ts index 5b2c0fc..356108e 100644 --- a/frontend/src/components/caste-editor.ts +++ b/frontend/src/components/caste-editor.ts @@ -325,8 +325,10 @@ export class FcCasteEditor extends LitElement { private _availableModels(): string[] { const registry = this.runtimeConfig?.models?.registry ?? []; + const _SELECTABLE = new Set(['available', 'loaded']); return [...new Set( registry + .filter(model => !model.hidden && _SELECTABLE.has(model.status ?? 'available')) .map(model => model.address) .filter((address): address is string => Boolean(address)), )]; diff --git a/frontend/src/components/colony-creator.ts b/frontend/src/components/colony-creator.ts index 751cda8..f8f2142 100644 --- a/frontend/src/components/colony-creator.ts +++ b/frontend/src/components/colony-creator.ts @@ -145,6 +145,7 @@ export class FcColonyCreator extends LitElement { @property({ type: String }) initialTemplateId = ''; /** Available service colonies for attachment. */ @property({ type: Array }) availableServices: Colony[] = []; + @property({ type: Object }) governance: { defaultBudgetPerColony: number; maxRoundsPerColony: number } | null = null; @state() private step: 1 | 2 | 3 | 4 = 1; @state() private objective = ''; @@ -156,8 +157,8 @@ export class FcColonyCreator extends LitElement { { caste: 'reviewer', tier: 'standard', count: 1 }, ]; @state() private attachedServices: string[] = []; - @state() private budget = 2.0; - @state() private maxRounds = 10; + @state() private budget = 0; + @state() private maxRounds = 0; @state() private strategy: 'stigmergic' | 'sequential' = 'stigmergic'; @state() private loadingSuggestions = false; @state() private launching = false; @@ -165,11 +166,29 @@ export class FcColonyCreator extends LitElement { @state() private previewLoading = false; private _initialized = false; + connectedCallback() { + super.connectedCallback(); + this._applyGovernanceDefaults(); + } + + private _applyGovernanceDefaults() { + if (this.governance) { + if (!this.budget) this.budget = this.governance.defaultBudgetPerColony ?? 1.0; + if (!this.maxRounds) this.maxRounds = this.governance.maxRoundsPerColony ?? 10; + } else { + if (!this.budget) this.budget = 1.0; + if (!this.maxRounds) this.maxRounds = 10; + } + } + updated(changed: Map) { if (!this._initialized) { if (this.initialObjective) this.objective = this.initialObjective; this._initialized = true; } + if (changed.has('governance') && this.governance) { + this._applyGovernanceDefaults(); + } if (changed.has('templates' as never) && this.initialTemplateId && this.templates.length > 0 && !this.selectedTemplate) { const tmpl = this.templates.find(t => t.id === this.initialTemplateId); if (tmpl) this.applyTemplate(tmpl); @@ -339,13 +358,13 @@ export class FcColonyCreator extends LitElement {
Budget ($)
{ this.budget = parseFloat((e.target as HTMLInputElement).value) || 2.0; }}> + @input=${(e: Event) => { this.budget = parseFloat((e.target as HTMLInputElement).value) || this.governance?.defaultBudgetPerColony || 1.0; }}>
Max Rounds
{ this.maxRounds = parseInt((e.target as HTMLInputElement).value) || 10; }}> + @input=${(e: Event) => { this.maxRounds = parseInt((e.target as HTMLInputElement).value) || this.governance?.maxRoundsPerColony || 10; }}>
Strategy
@@ -368,10 +387,6 @@ export class FcColonyCreator extends LitElement { // -- Step 4: Launch summary (real preview truth) ------------------------- private _renderLaunch() { - const estCost = this.team.reduce((sum, t) => { - const rate = t.tier === 'light' ? 0 : t.tier === 'flash' ? 0.01 : t.tier === 'heavy' ? 0.08 : 0.02; - return sum + rate * t.count * this.maxRounds; - }, 0); const isFastPath = this.team.length === 1 && this.team[0].count === 1 && this.strategy === 'sequential'; const totalAgents = this.team.reduce((s, t) => s + t.count, 0); @@ -419,7 +434,9 @@ export class FcColonyCreator extends LitElement { $${this.budget.toFixed(2)} budget ${this.maxRounds} rounds max ${this.strategy} - est. ~$${estCost.toFixed(2)} + ${this.previewData?.estimatedCost + ? html`est. ~$${this.previewData.estimatedCost.toFixed(2)}` + : html`cost varies by model`} ${this.selectedTemplate ? html`tmpl: ${this.selectedTemplate.name}` : nothing}
@@ -639,8 +656,8 @@ export class FcColonyCreator extends LitElement { { caste: 'reviewer', tier: 'standard', count: 1 }, ]; this.attachedServices = []; - this.budget = 2.0; - this.maxRounds = 10; + this.budget = this.governance?.defaultBudgetPerColony ?? 1.0; + this.maxRounds = this.governance?.maxRoundsPerColony ?? 10; this.strategy = 'stigmergic'; this.launching = false; this.previewData = null; diff --git a/frontend/src/components/colony-progress-card.ts b/frontend/src/components/colony-progress-card.ts new file mode 100644 index 0000000..b5e57bc --- /dev/null +++ b/frontend/src/components/colony-progress-card.ts @@ -0,0 +1,204 @@ +/** + * Wave 69 Track 1: Inline colony progress card for Queen chat. + * + * Subscribes to store updates for a specific colony and renders live progress + * inline within the Queen chat. Transitions to a compact completed state + * when the colony finishes. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import { store, findNode } from '../state/store.js'; +import type { Colony, TreeNode } from '../types.js'; + +const STATUS_COLOR: Record = { + running: 'var(--v-accent)', + completed: 'var(--v-success)', + failed: 'var(--v-danger)', + killed: 'var(--v-danger)', + pending: 'var(--v-fg-dim)', + queued: 'var(--v-fg-dim)', +}; + +@customElement('fc-colony-progress') +export class ColonyProgressCard extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + .card { + border: 1px solid var(--v-border); + border-radius: 10px; + background: rgba(255,255,255,0.015); + padding: 10px 12px; + font-family: var(--f-mono); + font-size: 10.5px; + color: var(--v-fg); + transition: border-color 0.15s, background 0.15s; + } + @media (prefers-reduced-motion: reduce) { + .card { transition: none; } + } + .card.running { border-color: rgba(232,88,26,0.18); } + .card.completed { border-color: rgba(45,212,168,0.18); background: rgba(45,212,168,0.02); } + .card.failed { border-color: rgba(248,113,113,0.18); background: rgba(248,113,113,0.02); } + .header { + display: flex; align-items: center; gap: 6px; margin-bottom: 6px; + } + .status-dot { + width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; + } + .task-label { + font-family: var(--f-body); font-size: 11px; color: var(--v-fg-muted); + flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .caste-badge { + font-size: 8px; padding: 1px 5px; border-radius: 3px; + background: var(--v-accent-muted); color: var(--v-accent); + font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase; + } + .progress-row { + display: flex; align-items: center; gap: 8px; margin-bottom: 4px; + } + .bar-track { + flex: 1; height: 4px; border-radius: 2px; + background: rgba(255,255,255,0.06); + overflow: hidden; + } + .bar-fill { + height: 100%; border-radius: 2px; + background: var(--v-accent); + transition: width 0.3s ease; + } + @media (prefers-reduced-motion: reduce) { + .bar-fill { transition: none; } + } + .bar-label { + font-size: 9px; color: var(--v-fg-dim); white-space: nowrap; + font-feature-settings: 'tnum'; + } + .meta-row { + display: flex; gap: 10px; font-size: 9px; color: var(--v-fg-dim); + } + .meta-item { display: flex; gap: 3px; align-items: center; } + .meta-val { color: var(--v-fg-muted); font-weight: 600; } + .sparkline { flex-shrink: 0; } + .compact { + display: flex; align-items: center; gap: 8px; + } + .compact-status { + font-size: 10px; font-weight: 700; + } + .compact-task { + font-family: var(--f-body); font-size: 10.5px; color: var(--v-fg-muted); + flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .compact-meta { + font-size: 9px; color: var(--v-fg-dim); white-space: nowrap; + font-feature-settings: 'tnum'; + } + `]; + + @property() colonyId = ''; + @property() task = ''; + private _unsub?: () => void; + + connectedCallback() { + super.connectedCallback(); + this._unsub = store.subscribe(() => this.requestUpdate()); + } + + disconnectedCallback() { + super.disconnectedCallback(); + this._unsub?.(); + } + + render() { + const node = findNode(store.state.tree, this.colonyId) as Colony | null; + if (!node) return nothing; + + const status = node.status ?? 'pending'; + const isTerminal = status === 'completed' || status === 'failed' || status === 'killed'; + + if (isTerminal) return this._renderCompact(node, status); + return this._renderRunning(node, status); + } + + private _renderRunning(c: Colony, status: string) { + const round = c.round ?? 0; + const maxRounds = c.maxRounds ?? 10; + const pct = maxRounds > 0 ? Math.min(100, (round / maxRounds) * 100) : 0; + const cost = c.cost ?? 0; + const strategy = c.strategy ?? ''; + const caste = c.castes?.[0]?.caste ?? ''; + const history = c.convergenceHistory ?? []; + const color = STATUS_COLOR[status] ?? 'var(--v-fg-dim)'; + + return html` +
+
+ + ${this.task || c.task || this.colonyId} + ${caste ? html`${caste}` : nothing} +
+
+
+
+
+ ${round}/${maxRounds} + ${history.length >= 3 ? this._renderSparkline(history) : nothing} +
+
+ ${strategy ? html`
Strategy: ${strategy}
` : nothing} +
Cost: $${cost.toFixed(2)}
+
+
+ `; + } + + private _renderCompact(c: Colony, status: string) { + const color = STATUS_COLOR[status] ?? 'var(--v-fg-dim)'; + const icon = status === 'completed' ? '\u2713' : '\u2717'; + const cost = c.cost ?? 0; + const quality = c.qualityScore ?? 0; + + return html` +
+
+ ${icon} + ${this.task || c.task || this.colonyId} + + $${cost.toFixed(2)} + ${quality > 0 ? html` · ${(quality * 100).toFixed(0)}%` : nothing} + +
+
+ `; + } + + private _renderSparkline(history: number[]) { + const recent = history.slice(-8); + const w = 40; + const h = 16; + const max = Math.max(...recent, 0.01); + const step = w / Math.max(recent.length - 1, 1); + const points = recent.map((v, i) => + `${(i * step).toFixed(1)},${(h - (v / max) * h).toFixed(1)}` + ).join(' '); + + return html` + + + + `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-colony-progress': ColonyProgressCard; } +} diff --git a/frontend/src/components/consulted-sources.ts b/frontend/src/components/consulted-sources.ts new file mode 100644 index 0000000..4ad9fb9 --- /dev/null +++ b/frontend/src/components/consulted-sources.ts @@ -0,0 +1,93 @@ +/** + * Wave 69 Track 2: Consulted-sources chip strip. + * + * Renders a horizontal strip of clickable chips below Queen messages + * showing which knowledge entries were available during reasoning. + * Labeled "Consulted Knowledge" — not "Citations." + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import type { ConsultedEntry } from '../types.js'; +import './atoms.js'; + +@customElement('fc-consulted-sources') +export class ConsultedSources extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + .strip { + display: flex; flex-wrap: wrap; align-items: center; + gap: 5px; padding: 4px 0 2px; + } + .label { + font-size: 8px; font-family: var(--f-mono); font-weight: 600; + color: var(--v-fg-dim); letter-spacing: 0.08em; + text-transform: uppercase; margin-right: 2px; + } + .chip { + display: inline-flex; align-items: center; gap: 4px; + padding: 2px 8px; border-radius: 4px; + border: 1px solid var(--v-border); + background: rgba(255,255,255,0.02); + font-size: 10px; font-family: var(--f-body); + color: var(--v-fg-muted); cursor: pointer; + transition: border-color 0.15s, background 0.15s; + max-width: 220px; + } + @media (prefers-reduced-motion: reduce) { + .chip { transition: none; } + } + .chip:hover { + border-color: var(--v-border-hover); + background: rgba(255,255,255,0.04); + color: var(--v-fg); + } + .chip-title { + overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .conf-dot { + width: 5px; height: 5px; border-radius: 50%; flex-shrink: 0; + } + `]; + + @property({ type: Array }) entries: ConsultedEntry[] = []; + + render() { + if (!this.entries?.length) return nothing; + + return html` +
+ Consulted Knowledge + ${this.entries.map(e => this._renderChip(e))} +
+ `; + } + + private _renderChip(entry: ConsultedEntry) { + const conf = entry.confidence ?? 0.5; + const color = conf >= 0.7 ? 'var(--v-success)' + : conf >= 0.4 ? 'var(--v-warn)' + : 'var(--v-danger)'; + const title = (entry.title ?? '').slice(0, 40) || 'Untitled'; + + return html` + this._navigate(entry.id)}> + + ${title} + + `; + } + + private _navigate(entryId: string) { + if (!entryId) return; + this.dispatchEvent(new CustomEvent('navigate-knowledge', { + detail: { entryId }, + bubbles: true, composed: true, + })); + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-consulted-sources': ConsultedSources; } +} diff --git a/frontend/src/components/fc-result-card.ts b/frontend/src/components/fc-result-card.ts index 561c770..dcf3a79 100644 --- a/frontend/src/components/fc-result-card.ts +++ b/frontend/src/components/fc-result-card.ts @@ -89,6 +89,18 @@ export class FcResultCard extends LitElement { transition: all 0.15s; text-transform: uppercase; letter-spacing: 0.05em; } .retry-btn:hover { border-color: rgba(248,113,113,0.5); background: rgba(248,113,113,0.1); } + /* Wave 69: diff badge */ + .files-badge { + display: inline-flex; align-items: center; gap: 4px; + font-size: 9.5px; font-family: var(--f-mono); padding: 3px 8px; + border-radius: 5px; border: 1px solid var(--v-border); + background: rgba(255,255,255,0.02); color: var(--v-fg-dim); + cursor: pointer; transition: all 0.15s; + text-transform: uppercase; letter-spacing: 0.05em; + } + @media (prefers-reduced-motion: reduce) { .files-badge { transition: none; } } + .files-badge:hover { border-color: var(--v-border-hover); color: var(--v-fg); background: rgba(255,255,255,0.04); } + .files-icon { font-size: 10px; } `]; @property({ type: Object }) result: ResultCardMeta | null = null; @@ -145,6 +157,12 @@ export class FcResultCard extends LitElement { ${r.threadId ? html` this._nav('timeline')}>Timeline ` : nothing} + ${(r as Record).filesChanged ? html` + this._nav('colony')}> + \u2261 + Files: ${(r as Record).filesChanged} changed + + ` : nothing} ${isFailure ? html` this._retry()}>Retry ` : nothing} diff --git a/frontend/src/components/formicos-app.ts b/frontend/src/components/formicos-app.ts index 478e7d2..8b4700e 100644 --- a/frontend/src/components/formicos-app.ts +++ b/frontend/src/components/formicos-app.ts @@ -22,13 +22,20 @@ import './knowledge-browser.js'; import './colony-creator.js'; import './colony-chat.js'; import './workspace-browser.js'; +import './addons-view.js'; +import './operations-view.js'; -type ViewId = 'queen' | 'tree' | 'knowledge' | 'workspace' | 'playbook' | 'models' | 'settings'; +type ViewId = 'queen' | 'tree' | 'knowledge' | 'workspace' | 'operations' | 'addons' | 'playbook' | 'models' | 'settings'; -const NAV = [ +const NAV_PRIMARY = [ { id: 'queen' as const, label: 'Queen', icon: '\u265B' }, { id: 'knowledge' as const, label: 'Knowledge', icon: '\u25C8' }, { id: 'workspace' as const, label: 'Workspace', icon: '\u2302' }, + { id: 'operations' as const, label: 'Operations', icon: '\u2318' }, +]; + +const NAV_SECONDARY = [ + { id: 'addons' as const, label: 'Addons', icon: '\u2B9E' }, { id: 'playbook' as const, label: 'Playbook', icon: '\u29C9' }, { id: 'models' as const, label: 'Models', icon: '\u2B22' }, { id: 'settings' as const, label: 'Settings', icon: '\u2699' }, @@ -49,7 +56,7 @@ export class FormicOSApp extends LitElement { background: rgba(6,6,12,0.85); backdrop-filter: blur(14px); -webkit-backdrop-filter: blur(14px); } .topbar-left { display: flex; align-items: center; gap: 14px; min-width: 0; } - .topbar-center { display: flex; justify-content: center; } + .topbar-center { display: flex; justify-content: flex-start; } .topbar-right-wrap { display: flex; align-items: center; justify-content: flex-end; gap: 10px; min-width: 0; } .logo { display: flex; align-items: center; gap: 6px; cursor: pointer; } .logo-text { font-family: var(--f-display); font-weight: 800; font-size: 15px; color: var(--v-fg); letter-spacing: -0.04em; } @@ -57,10 +64,15 @@ export class FormicOSApp extends LitElement { .logo-ver { font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); letter-spacing: 0.05em; } .topbar-right { display: flex; align-items: center; gap: 14px; font-size: 12px; font-family: var(--f-mono); font-feature-settings: 'tnum'; } .top-nav { - display: inline-grid; grid-template-columns: repeat(6, minmax(72px, auto)); - gap: 4px; padding: 4px; border: 1px solid var(--v-border); border-radius: 11px; + display: flex; align-items: center; gap: 6px; + } + .nav-group { + display: inline-grid; gap: 4px; padding: 4px; + border: 1px solid var(--v-border); border-radius: 11px; background: rgba(13,14,22,0.78); box-shadow: inset 0 1px 0 rgba(255,255,255,0.02); } + .nav-group.primary { grid-template-columns: repeat(4, minmax(66px, auto)); } + .nav-group.secondary { grid-template-columns: repeat(4, minmax(52px, auto)); } .top-nav-tab { min-height: 34px; min-width: 0; display: flex; align-items: center; justify-content: center; gap: 6px; border-radius: 8px; cursor: pointer; font-size: 12px; color: var(--v-fg-dim); @@ -83,6 +95,8 @@ export class FormicOSApp extends LitElement { .mini-colonies { flex: 1; display: flex; flex-direction: column; align-items: center; padding-top: 10px; gap: 5px; } .mini-colony { width: 26px; height: 26px; border-radius: 6px; display: flex; align-items: center; justify-content: center; cursor: pointer; border: 1px solid var(--v-border); font-size: 10px; color: var(--v-fg-muted); } .mini-colony.active { background: rgba(232,88,26,0.05); border-color: rgba(232,88,26,0.15); } + .create-ws-btn { display: block; width: 100%; padding: 6px 12px; margin-top: 4px; background: transparent; border: 1px dashed var(--v-border); border-radius: 6px; color: var(--v-fg-dim); font-size: 10px; font-family: var(--f-mono); cursor: pointer; text-align: left; } + .create-ws-btn:hover { border-color: var(--v-accent); color: var(--v-accent); } .content { flex: 1; padding: 16px; overflow: hidden; display: flex; flex-direction: column; } .content-inner { flex: 1; min-height: 0; overflow: hidden; } .startup-shell { @@ -215,13 +229,32 @@ export class FormicOSApp extends LitElement { } .creator-overlay { position: fixed; inset: 0; z-index: 100; display: flex; align-items: center; justify-content: center; background: rgba(4,4,8,0.7); backdrop-filter: blur(6px); -webkit-backdrop-filter: blur(6px); } .creator-panel { width: 480px; max-height: 80vh; overflow: auto; padding: 20px; border-radius: 12px; background: var(--v-surface); border: 1px solid var(--v-border); box-shadow: 0 20px 60px rgba(0,0,0,0.5); } - .proto-bar { display: flex; gap: 10px; align-items: center; } - .proto-item { display: flex; align-items: center; gap: 4px; padding: 2px 8px; border-radius: 999px; border: 1px solid var(--v-border); background: var(--v-recessed); } - .proto-label { font-family: var(--f-mono); font-size: 9.5px; font-weight: 600; letter-spacing: 0.08em; color: var(--v-fg-dim); } - .proto-detail { font-family: var(--f-mono); font-size: 9.5px; color: var(--v-fg-dim); } - .conn-indicator { display: flex; align-items: center; gap: 4px; } - .conn-dot { width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; } - .conn-label { font-size: 9.5px; font-family: var(--f-mono); color: var(--v-fg-dim); } + .cost-btn { + cursor: pointer; padding: 4px 8px; border-radius: 6px; + transition: background 0.15s; color: var(--v-accent); + } + .cost-btn:hover { background: rgba(232,88,26,0.1); } + .budget-backdrop { position: fixed; inset: 0; z-index: 99; } + .budget-popover { + position: absolute; top: 100%; right: 0; margin-top: 6px; + background: var(--v-recessed); border: 1px solid var(--v-border); + border-radius: 10px; padding: 16px; min-width: 260px; z-index: 100; + box-shadow: 0 8px 32px rgba(0,0,0,0.4); + } + .budget-row { + display: flex; justify-content: space-between; align-items: center; + padding: 4px 0; font-family: var(--f-mono); + } + .budget-label { font-size: 10px; color: var(--v-fg-dim); } + .budget-value { font-size: 12px; color: var(--v-accent); } + .budget-value.neutral { color: var(--v-fg); } + .popover-link { + display: block; margin-top: 10px; padding-top: 8px; + border-top: 1px solid var(--v-border); font-size: 10px; + font-family: var(--f-mono); color: var(--v-fg-dim); cursor: pointer; + transition: color 0.15s; + } + .popover-link:hover { color: var(--v-accent); } .sidebar-toggle { padding: 4px 8px; cursor: pointer; text-align: center; font-size: 10px; color: var(--v-fg-dim); border-bottom: 1px solid var(--v-border); user-select: none; transition: color 0.15s; } .sidebar-toggle:hover { color: var(--v-fg-muted); } @media (max-width: 1380px) { @@ -234,7 +267,9 @@ export class FormicOSApp extends LitElement { } .topbar-center { order: 2; } .topbar-right-wrap { order: 3; } - .top-nav { width: 100%; grid-template-columns: repeat(5, minmax(0, 1fr)); } + .top-nav { width: 100%; flex-wrap: wrap; } + .nav-group { flex: 1; } + .nav-group.primary, .nav-group.secondary { grid-template-columns: repeat(4, minmax(0, 1fr)); } .view-shell { flex-direction: column; } .queen-rail.open { width: 100%; height: 280px; } .queen-rail.closed { width: 100%; height: 46px; } @@ -253,6 +288,11 @@ export class FormicOSApp extends LitElement { @state() private showCreator = false; @state() private creatorTemplateId = ''; @state() private knowledgeSourceColony = ''; + @state() private _showBudgetPopover = false; + @state() private _autonomyData: { grade: string; level: string; budget_spent: number; budget_total: number; daily_maintenance_budget?: number } | null = null; + @state() private _showCreateWorkspace = false; + @state() private _newWorkspaceName = ''; + @state() private _creatingWorkspace = false; private unsub?: () => void; private _subscribed = false; @@ -351,21 +391,40 @@ export class FormicOSApp extends LitElement {
- ${NAV.map(n => { - const active = this.view === n.id || (this.view === 'tree' && n.id === 'queen'); - return html`
this.navTab(n.id as ViewId)} title=${n.label}> - ${n.icon} - ${n.label} -
`; - })} + +
-
+
- ${this.renderProtocolBar(s.protocolStatus)} - ${formatCost(totalCost)} spent - ${this._renderConnectionIndicator(s.connection)} + { + e.stopPropagation(); + this._showBudgetPopover = !this._showBudgetPopover; + if (this._showBudgetPopover) void this._fetchBudgetData(); + }}>${formatCost(totalCost)} spent
+ ${this._showBudgetPopover ? html` +
{ this._showBudgetPopover = false; }}>
+
+ ${this._renderBudgetPopover(totalCost)} +
+ ` : nothing} ${s.approvals.length > 0 ? html`
this.navTab('queen')}> ${s.approvals.length} @@ -385,6 +444,27 @@ export class FormicOSApp extends LitElement { this.navTree(e.detail)}>
+
+ ${this._showCreateWorkspace ? html` +
+ { this._newWorkspaceName = (e.target as HTMLInputElement).value; }} + @keydown=${(e: KeyboardEvent) => { if (e.key === 'Enter') this._createWorkspace(); }} + style="width:100%;box-sizing:border-box;margin-bottom:6px"> +
+ { this._showCreateWorkspace = false; this._newWorkspaceName = ''; }}>Cancel + this._createWorkspace()}> + ${this._creatingWorkspace ? 'Creating...' : 'Create'} + +
+
+ ` : html` + + `} +
` : html`
@@ -407,6 +487,7 @@ export class FormicOSApp extends LitElement { { store.send('spawn_colony', this.activeWorkspaceId, e.detail); this._closeCreator(); @@ -514,6 +595,8 @@ export class FormicOSApp extends LitElement { 'tree': () => this._renderTree(), 'knowledge': () => this._renderKnowledge(), 'workspace': () => this._renderWorkspace(), + 'operations': () => html``, + 'addons': () => html``, 'playbook': () => this._renderPlaybook(), 'models': () => this._renderModels(), 'settings': () => this._renderSettings(), @@ -551,6 +634,7 @@ export class FormicOSApp extends LitElement { @send-colony-message=${(e: CustomEvent) => store.send('chat_colony', this.activeWorkspaceId, e.detail)} @confirm-preview=${(e: CustomEvent) => this._handleConfirmPreview(e)} @open-colony-editor=${() => this._openCreator()} + @update-config=${(e: CustomEvent) => store.send('update_config', this.activeWorkspaceId, e.detail)} >`; } @@ -595,17 +679,32 @@ export class FormicOSApp extends LitElement { .castes=${s.castes} .runtimeConfig=${s.runtimeConfig} @navigate=${(e: CustomEvent) => this.navTree(e.detail)} @update-config=${(e: CustomEvent) => store.send('update_config', sel.id, e.detail)} + @spawn-colony-request=${() => this._openCreator()} + @navigate-tab=${(e: CustomEvent) => this.navTab(e.detail)} >`; return nothing; } + private get _addonPanels() { + return store.state.addons.flatMap(a => + (a.panels ?? []).map(p => ({ + target: p.target, + display_type: p.displayType, + path: p.path, + addon_name: p.addonName, + })) + ); + } + private _renderKnowledge() { return html``; + .sourceColonyId=${this.knowledgeSourceColony} + .addonPanels=${this._addonPanels}>`; } private _renderWorkspace() { - return html``; + return html``; } private _renderPlaybook() { @@ -627,7 +726,7 @@ export class FormicOSApp extends LitElement { private _renderSettings() { const s = store.state; - return html``; + return html``; } private _renderQueenChatRail() { @@ -693,38 +792,66 @@ export class FormicOSApp extends LitElement { this.creatorTemplateId = ''; } - private _renderConnectionIndicator(conn: string) { - const color = conn === 'connected' ? 'var(--v-success)' - : conn === 'connecting' ? 'var(--v-warn)' : 'var(--v-danger)'; - const label = conn === 'connecting' ? 'Connecting' - : conn === 'error' ? 'Retrying...' - : conn === 'connected' ? '' : 'Disconnected'; - return html` - - - ${label ? html`${label}` : nothing} - `; + private async _createWorkspace() { + if (!this._newWorkspaceName.trim()) return; + this._creatingWorkspace = true; + try { + const resp = await fetch('/api/v1/workspaces', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: this._newWorkspaceName.trim() }), + }); + if (resp.ok) { + this._showCreateWorkspace = false; + this._newWorkspaceName = ''; + } else { + const data = await resp.json().catch(() => ({})); + console.error('Failed to create workspace:', (data as Record).error ?? resp.statusText); + } + } catch (e) { + console.error('Failed to create workspace:', e); + } + this._creatingWorkspace = false; + } + + private async _fetchBudgetData() { + const wsId = store.state.tree?.[0]?.id; + if (!wsId) return; + try { + const resp = await fetch(`/api/v1/workspaces/${wsId}/autonomy-status`); + if (resp.ok) this._autonomyData = await resp.json() as typeof this._autonomyData; + } catch { /* popover shows what it has */ } } - private renderProtocolBar(ps: any) { - if (!ps) return nothing; - const aguiStatus = ps.agui?.status ?? 'inactive'; - const a2aStatus = ps.a2a?.status ?? 'inactive'; - const a2aDetail = a2aStatus === 'inactive' - ? (ps.a2a?.note ?? 'inactive') - : `${ps.a2a?.semantics ?? 'poll/result'} ${ps.a2a?.endpoint ?? '/a2a/tasks'}`; - const items = [ - { label: 'MCP', status: ps.mcp?.status, detail: `${ps.mcp?.tools ?? 0} tools` }, - { label: 'AG-UI', status: aguiStatus, detail: aguiStatus === 'inactive' ? 'inactive' : `${ps.agui?.events ?? 0} events` }, - { label: 'A2A', status: a2aStatus, detail: a2aDetail }, - ]; - return html`
${items.map(p => html` -
- - ${p.label} - ${p.detail} + private _renderBudgetPopover(totalCost: number) { + const perColonyCap = (store.state as any).runtimeConfig?.governance?.defaultBudgetPerColony ?? 1.0; + const a = this._autonomyData; + return html` +
+ Total spent + ${formatCost(totalCost)} +
+
+ Per-colony cap + ${formatCost(perColonyCap as number)}
- `)}
`; + ${a ? html` +
+ Daily maintenance + ${formatCost(a.budget_spent)} / ${formatCost(a.budget_total || a.daily_maintenance_budget || 0)} +
+
+ Autonomy + Grade ${a.grade} · ${a.level} +
+ ` : html` +
+ Daily maintenance + +
+ `} + { this._showBudgetPopover = false; this.navTab('settings'); }}>All budget settings \u2192 + `; } } diff --git a/frontend/src/components/knowledge-browser.ts b/frontend/src/components/knowledge-browser.ts index 9c820c8..2acb8cd 100644 --- a/frontend/src/components/knowledge-browser.ts +++ b/frontend/src/components/knowledge-browser.ts @@ -2,11 +2,24 @@ import { LitElement, html, css, nothing } from 'lit'; import { customElement, state, property } from 'lit/decorators.js'; import { voidTokens, sharedStyles } from '../styles/shared.js'; import { timeAgo } from '../helpers.js'; -import type { KnowledgeItemDetail, KnowledgeItemPreview, ContradictionPair, TrustRationale, KnowledgeProvenance, ForagerProvenance } from '../types.js'; +import type { KnowledgeItemDetail, KnowledgeItemPreview, ContradictionPair, TrustRationale, KnowledgeProvenance, ForagerProvenance, ProvenanceChainItem, UnifiedSearchResult, UnifiedSearchResponse } from '../types.js'; import './atoms.js'; +import './addon-panel.js'; import './knowledge-view.js'; +import './knowledge-search-results.js'; +import './knowledge-health-card.js'; -type SubView = 'catalog' | 'graph'; +interface AddonPanel { target: string; display_type: string; path: string; addon_name: string; } + +type SubView = 'catalog' | 'graph' | 'tree'; + +interface TreeBranch { + path: string; + label: string; + entryCount: number; + confidence: { alpha: number; beta: number; mean: number }; + children: TreeBranch[]; +} type FilterId = '' | 'skill' | 'experience'; type SortBy = 'newest' | 'confidence' | 'relevance'; type ThreadFilter = 'all' | 'thread' | 'workspace' | 'global'; @@ -258,6 +271,25 @@ export class FcKnowledgeBrowser extends LitElement { text-decoration-color: rgba(255,255,255,0.1); } .rel-link:hover { color: var(--v-fg); text-decoration-color: rgba(232,88,26,0.3); } + /* Wave 67.5: provenance timeline */ + .provenance-section { + margin-top: 8px; padding: 6px 8px; background: rgba(255,255,255,0.02); + border-radius: 4px; border: 1px solid var(--v-border); + } + .prov-header { + font-size: 8px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.4px; + color: var(--v-fg-muted); margin-bottom: 4px; + } + .prov-item { + font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-dim); line-height: 1.8; + display: flex; align-items: baseline; gap: 6px; flex-wrap: wrap; + } + .prov-time { color: var(--v-fg-muted); min-width: 50px; } + .prov-type { font-weight: 600; color: var(--v-accent); font-size: 8px; } + .prov-detail { flex: 1; } + .prov-delta { font-weight: 700; font-size: 8px; } + .prov-delta-pos { color: var(--v-success, #2DD4A8); } + .prov-delta-neg { color: var(--v-accent, #F06464); } /* Wave 63: inline edit + create forms */ .edit-overlay, .create-overlay { padding: 12px; margin-bottom: 10px; @@ -276,6 +308,87 @@ export class FcKnowledgeBrowser extends LitElement { .form-textarea { min-height: 80px; resize: vertical; } .form-row { display: flex; gap: 6px; margin-bottom: 6px; } .form-actions { display: flex; gap: 6px; margin-top: 8px; } + /* Wave 67: hierarchy tree view */ + .tree-branch { + padding: 6px 8px; margin-bottom: 2px; border-radius: 6px; cursor: pointer; + border: 1px solid transparent; transition: all 0.15s; + } + .tree-branch:hover { border-color: var(--v-border); background: rgba(255,255,255,0.02); } + .tree-branch-header { display: flex; align-items: center; gap: 8px; } + .tree-chevron { + font-size: 9px; color: var(--v-fg-dim); width: 12px; text-align: center; + transition: transform 0.15s; display: inline-block; + } + .tree-chevron.open { transform: rotate(90deg); } + .tree-label { font-family: var(--f-mono); font-size: 12px; color: var(--v-fg); font-weight: 600; } + .tree-count { + font-family: var(--f-mono); font-size: 9px; color: var(--v-fg-dim); + background: rgba(255,255,255,0.04); padding: 1px 5px; border-radius: 4px; + } + .tree-conf-bar { width: 60px; height: 3px; background: rgba(255,255,255,0.04); border-radius: 2px; overflow: hidden; } + .tree-conf-fill { height: 100%; border-radius: 2px; } + .tree-conf-pct { font-family: var(--f-mono); font-size: 9px; color: var(--v-fg-dim); min-width: 32px; text-align: right; } + .tree-children { padding-left: 20px; } + .tree-empty { font-size: 11px; color: var(--v-fg-dim); padding: 20px; text-align: center; } + /* Wave 69: Search-first layout */ + .search-hero { + margin-bottom: 16px; + } + .search-hero-input { + width: 100%; padding: 10px 14px; border-radius: 10px; + border: 1px solid var(--v-border); background: var(--v-surface); + color: var(--v-fg); font-size: 14px; font-family: var(--f-body); + outline: none; box-sizing: border-box; + transition: border-color 0.15s; + } + .search-hero-input::placeholder { color: var(--v-fg-dim); } + .search-hero-input:focus { border-color: var(--v-accent); } + .quick-stats { + font-family: var(--f-mono); font-size: 10.5px; color: var(--v-fg-muted); + margin-top: 8px; padding-left: 2px; + } + .quick-stats .sep { margin: 0 6px; color: var(--v-fg-dim); } + /* Wave 69 Track 9: Filter pills */ + .filter-strip { + display: flex; gap: 6px; align-items: center; flex-wrap: wrap; + margin-bottom: 12px; + } + .filter-group-label { + font-size: 8px; font-family: var(--f-mono); color: var(--v-fg-dim); + text-transform: uppercase; letter-spacing: 0.08em; margin-right: 2px; + } + .qf-pill { + padding: 4px 10px; border-radius: 12px; + font-family: var(--f-mono); font-size: 10px; font-weight: 600; + text-transform: uppercase; letter-spacing: 0.08em; + background: var(--v-surface); border: 1px solid var(--v-border); + cursor: pointer; transition: all 0.15s; user-select: none; color: var(--v-fg-dim); + } + .qf-pill.active { + background: rgba(232,88,26,0.08); border-color: rgba(232,88,26,0.3); + color: var(--v-accent); + } + .qf-pill:hover:not(.active) { border-color: rgba(232,88,26,0.15); } + /* Wave 69 Track 8: Detail mode toggle */ + .detail-mode-toggle { + font-size: 9px; font-family: var(--f-mono); padding: 3px 10px; border-radius: 8px; + cursor: pointer; border: 1px solid var(--v-border); background: transparent; + color: var(--v-fg-dim); transition: all 0.15s; user-select: none; margin-left: auto; + } + .detail-mode-toggle.active { + background: rgba(232,88,26,0.08); border-color: rgba(232,88,26,0.2); + color: var(--v-accent); + } + .detail-mode-toggle:hover { border-color: rgba(232,88,26,0.25); color: var(--v-accent); } + /* Wave 69: Simple entry card (non-detail mode) */ + .simple-conf { + display: flex; align-items: center; gap: 4px; + font-size: 9px; font-family: var(--f-mono); font-weight: 600; + text-transform: uppercase; letter-spacing: 0.04em; + } + .simple-conf-high { color: var(--v-success, #2DD4A8); } + .simple-conf-medium { color: var(--v-warn, #F5B731); } + .simple-conf-low { color: var(--v-danger, #F06464); } `]; @property() workspaceId = ''; @@ -283,6 +396,8 @@ export class FcKnowledgeBrowser extends LitElement { @property() sourceColonyId = ''; /** Current thread ID for thread-scoped filtering (Wave 29). */ @property() threadId = ''; + /** Addon panels targeting this tab (Wave 66 T3). */ + @property({ type: Array }) addonPanels: AddonPanel[] = []; @state() private subView: SubView = 'catalog'; @state() private items: KnowledgeItemPreview[] = []; @@ -313,12 +428,34 @@ export class FcKnowledgeBrowser extends LitElement { @state() private _createContent = ''; @state() private _createCategory = 'experience'; @state() private _createDomain = ''; + /** Wave 67: hierarchy tree data. */ + @state() private _treeBranches: TreeBranch[] = []; + @state() private _treeLoading = false; + @state() private _treeExpanded: Set = new Set(); + /** Wave 67.5: provenance chain cache. */ + @state() private _provCache: Record = {}; + /** Wave 69 Track 8: Detail mode toggle (off = simple, on = power-user). */ + @state() private _detailMode = false; + /** Wave 72 Track 4: Ingest/reindex status. */ + @state() private _ingestStatus = ''; + @state() private _reindexStatus = ''; + /** Wave 69 Track 7: Unified search results from /workspaces/{id}/search. */ + @state() private _unifiedResults: UnifiedSearchResult[] = []; + @state() private _unifiedSearchActive = false; + @state() private _unifiedLoading = false; + /** Wave 69 Track 9: Quick filter state. */ + @state() private _sourceFilter: 'all' | 'memory' | 'docs' | 'code' = 'all'; + @state() private _domainFilter = ''; + @state() private _statusFilter: 'all' | 'verified' | 'candidate' = 'all'; + /** Wave 69: Quick stats for search-first view. */ + @state() private _quickStats: { entryCount: number; domainCount: number } | null = null; private _debounceTimer = 0; connectedCallback() { super.connectedCallback(); void this._fetchItems(); + void this._fetchQuickStats(); } override updated(changed: Map) { @@ -407,7 +544,97 @@ export class FcKnowledgeBrowser extends LitElement { private _onSearchInput(e: Event) { this.searchQuery = (e.target as HTMLInputElement).value; clearTimeout(this._debounceTimer); - this._debounceTimer = window.setTimeout(() => void this._fetchItems(), 300); + if (!this._detailMode) { + // Wave 69: In simple mode, use unified search + this._debounceTimer = window.setTimeout(() => void this._unifiedSearch(), 300); + } else { + this._debounceTimer = window.setTimeout(() => void this._fetchItems(), 300); + } + } + + /** Wave 69 Track 7: Search across memory + addon indices via unified endpoint. */ + private async _unifiedSearch() { + const q = this.searchQuery.trim(); + if (!q) { + this._unifiedResults = []; + this._unifiedSearchActive = false; + return; + } + if (!this.workspaceId) return; + this._unifiedLoading = true; + this._unifiedSearchActive = true; + try { + const params = new URLSearchParams({ q, limit: '10' }); + if (this._sourceFilter !== 'all') { + params.set('sources', this._sourceFilter); + } + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/search?${params}`, + ); + if (resp.ok) { + const data = await resp.json() as UnifiedSearchResponse; + let results = data.results; + // Client-side domain filter + if (this._domainFilter) { + results = results.filter(r => { + const domains = (r.metadata?.domains as string[]) ?? []; + return domains.includes(this._domainFilter); + }); + } + // Client-side status filter + if (this._statusFilter !== 'all') { + results = results.filter(r => { + const status = (r.metadata?.status as string) ?? ''; + return status === this._statusFilter; + }); + } + this._unifiedResults = results; + } else { + this._unifiedResults = []; + } + } catch { + this._unifiedResults = []; + } + this._unifiedLoading = false; + } + + /** Wave 69: Fetch quick stats for the search-first landing. */ + private async _fetchQuickStats() { + if (!this.workspaceId) return; + try { + const resp = await fetch(`/api/v1/knowledge?workspace=${this.workspaceId}&limit=1`); + if (resp.ok) { + const data = await resp.json() as { items: KnowledgeItemPreview[]; total: number }; + const domainSet = new Set(); + // Use full items list to count domains (fetch a larger batch) + const resp2 = await fetch(`/api/v1/knowledge?workspace=${this.workspaceId}&limit=200`); + if (resp2.ok) { + const data2 = await resp2.json() as { items: KnowledgeItemPreview[]; total: number }; + for (const it of data2.items) { + for (const d of it.domains) domainSet.add(d); + } + } + this._quickStats = { entryCount: data.total, domainCount: domainSet.size }; + } + } catch { /* best-effort */ } + } + + /** Wave 69 Track 9: Set source filter and re-search. */ + private _setSourceFilter(f: 'all' | 'memory' | 'docs' | 'code') { + this._sourceFilter = f; + if (this._unifiedSearchActive) void this._unifiedSearch(); + } + + /** Wave 69 Track 9: Set domain filter and re-search. */ + private _setDomainFilter(d: string) { + this._domainFilter = this._domainFilter === d ? '' : d; + if (this._unifiedSearchActive) void this._unifiedSearch(); + } + + /** Wave 69 Track 9: Set status filter and re-search. */ + private _setStatusFilter(s: 'all' | 'verified' | 'candidate') { + this._statusFilter = s; + if (this._unifiedSearchActive) void this._unifiedSearch(); } private _setFilter(f: FilterId) { @@ -428,6 +655,7 @@ export class FcKnowledgeBrowser extends LitElement { this.expandedId = id; // Wave 60: fetch relationships alongside detail void this._fetchRelationships(id); + void this._fetchProvenance(id); if (this.detailCache[id] || this.detailLoadingId === id) { return; } @@ -601,6 +829,40 @@ export class FcKnowledgeBrowser extends LitElement { } catch (err) { console.warn('fetchRelationships failed:', err); } } + // --- Wave 67.5: fetch provenance chain --- + private async _fetchProvenance(entryId: string) { + if (this._provCache[entryId]) return; + try { + const resp = await fetch(`/api/v1/knowledge/${encodeURIComponent(entryId)}/provenance`); + if (resp.ok) { + const data = await resp.json() as { chain: ProvenanceChainItem[] }; + this._provCache = { ...this._provCache, [entryId]: data.chain ?? [] }; + } + } catch { /* non-critical */ } + } + + private _renderProvenance(entryId: string) { + const chain = this._provCache[entryId]; + if (!chain || chain.length === 0) return nothing; + return html` +
+
Provenance
+ ${chain.map(item => html` +
+ ${timeAgo(item.timestamp)} + ${item.event_type.replace(/([A-Z])/g, ' $1').trim()} + ${item.detail} + ${item.confidence_delta != null ? html` + + ${item.confidence_delta >= 0 ? '+' : ''}${(item.confidence_delta * 100).toFixed(1)}% + + ` : nothing} +
+ `)} +
+ `; + } + // --- Wave 60 B2: submit operator feedback --- private async _submitFeedback(entryId: string, positive: boolean) { if (this._feedbackSent[entryId]) return; @@ -642,22 +904,169 @@ export class FcKnowledgeBrowser extends LitElement { ${this.sourceColonyId ? html`colony: ${this.sourceColonyId.slice(0, 12)}` : nothing} -
- { this.subView = 'catalog'; }}>Catalog - { this.subView = 'graph'; }}>Graph -
+ { this._detailMode = !this._detailMode; }} + title="${this._detailMode ? 'Switch to simple view' : 'Switch to detail view'}"> + ${this._detailMode ? '◆ Detail' : '◇ Simple'} + + ${this._detailMode ? html` +
+ { this.subView = 'catalog'; }}>Catalog + { this.subView = 'graph'; }}>Graph + { this.subView = 'tree'; void this._fetchTree(); }}>Tree +
+ ` : nothing}
- ${this.subView === 'graph' + + +
+ + + + + ${this._ingestStatus ? html` + ${this._ingestStatus} + ` : nothing} + ${this._reindexStatus ? html` + ${this._reindexStatus} + ` : nothing} +
+ + ${this.addonPanels.filter(p => p.target === 'knowledge').map(p => html` + + + `)} + + ${!this._detailMode + ? this._renderSearchFirst() + : this.subView === 'graph' ? html`
` + : this.subView === 'tree' + ? this._renderTreeView() : this._renderCatalog()} `; } + /** Wave 69 Track 7: Search-first default view. */ + private _renderSearchFirst() { + const stats = this._quickStats; + const topDomains = this.healthStats?.topDomains ?? []; + return html` +
+ + ${stats ? html` +
+ ${stats.entryCount} entries·${stats.domainCount} domains +
+ ` : nothing} +
+ + ${this._renderFilterStrip(topDomains)} + + ${this._unifiedLoading + ? html`
Searching\u2026
` + : this._unifiedSearchActive + ? html` { + this.expandedId = e.detail.id; + this._detailMode = true; + this.subView = 'catalog'; + void this._toggleDetail(e.detail.id); + }} + >` + : this._renderSimpleCatalog()} + `; + } + + /** Wave 69 Track 9: Source / domain / status filter pills. */ + private _renderFilterStrip(topDomains: [string, number][]) { + return html` +
+ Source + ${(['all', 'memory', 'docs', 'code'] as const).map(s => html` + this._setSourceFilter(s)}>${s} + `)} + + ${topDomains.length > 0 ? html` + Domain + ${topDomains.slice(0, 6).map(([d]) => html` + this._setDomainFilter(d)}>${d} + `)} + + ` : nothing} + Status + ${(['all', 'verified', 'candidate'] as const).map(s => html` + this._setStatusFilter(s)}>${s} + `)} +
+ `; + } + + /** Wave 69: Simplified catalog for non-detail mode (no score bars, no Beta, no provenance). */ + private _renderSimpleCatalog() { + if (this.loading) return html`
Loading knowledge entries\u2026
`; + const sorted = this.sorted; + if (sorted.length === 0) { + return this.total === 0 && !this.searchQuery.trim() + ? html`
+
No knowledge entries yet.
+
Knowledge is extracted automatically when colonies complete.
+
` + : html`
No entries match the current filter.
`; + } + return html`
${sorted.map(e => this._renderSimpleEntry(e))}
`; + } + + /** Wave 69 Track 8: Simple entry card (detail mode off). */ + private _renderSimpleEntry(e: KnowledgeItemPreview) { + const conf = this._betaConf(e); + const level = conf >= 0.7 ? 'high' : conf >= 0.4 ? 'medium' : 'low'; + const label = level === 'high' ? 'High' : level === 'medium' ? 'Medium' : 'Low'; + const snippet = e.summary || e.content_preview || ''; + return html` +
{ + this.expandedId = e.id; + this._detailMode = true; + this.subView = 'catalog'; + void this._toggleDetail(e.id); + }} style="cursor:pointer"> +
+ ${e.title || e.id.slice(0, 24)} + ${e.canonical_type} + ${e.status} +
+ ${snippet ? html` +
${snippet.slice(0, 160)}
+ ` : nothing} + +
+ `; + } + private _renderHealthWidget() { const h = this.healthStats; if (!h) return nothing; @@ -730,6 +1139,124 @@ export class FcKnowledgeBrowser extends LitElement {
`; } + /* ---- Wave 67: hierarchy tree view ---- */ + + private async _fetchTree() { + if (!this.workspaceId || this._treeLoading) return; + this._treeLoading = true; + try { + const res = await fetch(`/api/v1/workspaces/${this.workspaceId}/knowledge-tree`); + if (res.ok) { + const data = await res.json(); + this._treeBranches = data.branches ?? []; + } + } catch { /* ignore */ } + this._treeLoading = false; + } + + private _toggleBranch(path: string) { + const next = new Set(this._treeExpanded); + if (next.has(path)) next.delete(path); else next.add(path); + this._treeExpanded = next; + } + + private _filterByBranch(path: string) { + this.searchQuery = `path:${path}`; + this.subView = 'catalog'; + void this._fetchItems(); + } + + private _confColor(mean: number): string { + if (mean >= 0.7) return 'var(--v-tier-high, #2DD4A8)'; + if (mean >= 0.4) return 'var(--v-tier-moderate, #F5B731)'; + return 'var(--v-tier-exploratory, #F06464)'; + } + + // Wave 72 Track 4: Upload & Ingest (ported from knowledge-view.ts) + private _ingestUpload() { + if (!this.workspaceId) return; + const input = document.createElement('input'); + input.type = 'file'; + input.multiple = true; + input.accept = '.txt,.md,.py,.json,.yaml,.yml,.csv'; + input.onchange = async () => { + if (!input.files?.length) return; + const form = new FormData(); + for (const file of Array.from(input.files)) { + form.append(file.name, file); + } + this._ingestStatus = 'Ingesting...'; + try { + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/ingest`, + { method: 'POST', body: form }, + ); + if (resp.ok) { + const data = await resp.json() as { ingested?: Array<{ name: string; chunks: number }> }; + const items = (data.ingested ?? []) as Array<{ name: string; chunks: number }>; + const names = items.map((i: { name: string; chunks: number }) => `${i.name} (${i.chunks})`).join(', '); + this._ingestStatus = items.length > 0 ? `Ingested: ${names}` : 'No files ingested.'; + } else { + this._ingestStatus = 'Ingest failed.'; + } + } catch { + this._ingestStatus = 'Ingest error.'; + } + }; + input.click(); + } + + private async _triggerReindex(addonName: string) { + this._reindexStatus = `Reindexing ${addonName}...`; + try { + const resp = await fetch(`/api/v1/addons/${addonName}/trigger`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ handler: '' }), + }); + if (resp.ok) { + const data = await resp.json() as { result?: string }; + this._reindexStatus = `${addonName}: ${data.result ?? 'ok'}`; + } else { + this._reindexStatus = `${addonName}: failed`; + } + } catch { + this._reindexStatus = `${addonName}: error`; + } + } + + private _renderTreeView() { + if (this._treeLoading) return html`
Loading hierarchy...
`; + if (this._treeBranches.length === 0) return html`
No hierarchy branches yet. Entries will be organized as knowledge is created.
`; + return html`
+ ${this._treeBranches.map(b => this._renderBranch(b, 0))} +
`; + } + + private _renderBranch(branch: TreeBranch, depth: number) { + const expanded = this._treeExpanded.has(branch.path); + const hasChildren = branch.children.length > 0; + const pct = (branch.confidence.mean * 100).toFixed(0); + return html` +
+
+ { e.stopPropagation(); if (hasChildren) this._toggleBranch(branch.path); }} + style="visibility: ${hasChildren ? 'visible' : 'hidden'}">▶ + this._filterByBranch(branch.path)}>${branch.label} + ${branch.entryCount} +
+
+
+ ${pct}% +
+
+ ${expanded && hasChildren ? html`
+ ${branch.children.map(c => this._renderBranch(c, depth + 1))} +
` : nothing} + `; + } + private _renderCatalog() { const sorted = this.sorted; return html` @@ -870,7 +1397,7 @@ export class FcKnowledgeBrowser extends LitElement { /** Render horizontal stacked bar for score breakdown (Wave 35 A3). */ private _renderScoreBar(e: KnowledgeItemPreview) { - const sb = (e as Record).score_breakdown as Record | undefined; + const sb = ((e as Record).score_breakdown ?? (e as Record)._score_breakdown) as Record | undefined; if (!sb) return nothing; const signals = ['semantic', 'thompson', 'freshness', 'status', 'thread', 'cooccurrence', 'graph_proximity'] as const; const weights = (sb as Record).weights as Record | undefined ?? {}; @@ -1042,6 +1569,7 @@ export class FcKnowledgeBrowser extends LitElement { ${e.domains.map(d => html`${d}`)} ${e.tool_refs.map(t => html`${t}`)}
` : nothing} + ${this._renderScoreBar(e)}
${e.last_accessed ? html`
${timeAgo(e.last_accessed)}
@@ -1113,6 +1640,7 @@ export class FcKnowledgeBrowser extends LitElement { ${this._renderTrustPanel(detail)} ` : nothing} ${this._renderRelationships(e.id)} + ${this._renderProvenance(e.id)} ` : nothing} `; diff --git a/frontend/src/components/knowledge-health-card.ts b/frontend/src/components/knowledge-health-card.ts new file mode 100644 index 0000000..f6bcd6a --- /dev/null +++ b/frontend/src/components/knowledge-health-card.ts @@ -0,0 +1,162 @@ +/** + * Wave 72 Track 3: Knowledge health summary card. + * Compact widget showing knowledge quality metrics for a workspace. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; + +interface KnowledgeEntry { + entry_id: string; + title: string; + category: string; + domains: string[]; + confidence: number; + conf_alpha: number; + conf_beta: number; + status: string; +} + +interface ActionsResponse { + actions: { source_category: string }[]; + total: number; + counts_by_kind: Record; +} + +@customElement('fc-knowledge-health-card') +export class FcKnowledgeHealthCard extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .health-row { + display: flex; gap: 12px; flex-wrap: wrap; padding: 10px 14px; + background: rgba(255,255,255,0.02); border: 1px solid var(--v-border); + border-radius: 8px; font-family: var(--f-mono); font-size: 10px; + color: var(--v-fg-muted); align-items: center; + } + .stat { display: flex; flex-direction: column; gap: 1px; } + .stat-label { + font-size: 8px; text-transform: uppercase; letter-spacing: 0.5px; + color: var(--v-fg-dim); + } + .stat-value { + font-size: 13px; font-weight: 600; font-feature-settings: 'tnum'; + color: var(--v-fg); + } + .stat-value.warn { color: var(--v-warn, #F5B731); } + .stat-value.danger { color: var(--v-danger, #F06464); } + .divider { + width: 1px; height: 24px; background: var(--v-border); flex-shrink: 0; + } + .domain-tags { display: flex; gap: 3px; flex-wrap: wrap; } + .domain-tag { + font-size: 7.5px; padding: 1px 4px; border-radius: 3px; + background: rgba(255,255,255,0.04); color: var(--v-fg-dim); + border: 1px solid var(--v-border); + } + `]; + + @property() workspaceId = ''; + + @state() private _entries: KnowledgeEntry[] = []; + @state() private _reviewCount = 0; + @state() private _staleCount = 0; + @state() private _contradictionCount = 0; + @state() private _loading = true; + + connectedCallback(): void { + super.connectedCallback(); + this._fetchData(); + } + + updated(changed: Map): void { + if (changed.has('workspaceId') && this.workspaceId) { + this._fetchData(); + } + } + + private async _fetchData(): Promise { + if (!this.workspaceId) { this._loading = false; return; } + this._loading = true; + try { + const [kRes, aRes] = await Promise.all([ + fetch(`/api/v1/knowledge?workspace=${this.workspaceId}&limit=200`), + fetch(`/api/v1/workspaces/${this.workspaceId}/operations/actions?kind=knowledge_review`), + ]); + if (kRes.ok) { + const data = await kRes.json() as { entries?: KnowledgeEntry[] }; + this._entries = data.entries ?? []; + } + if (aRes.ok) { + const data = await aRes.json() as ActionsResponse; + this._reviewCount = data.total; + this._staleCount = data.actions.filter(a => a.source_category === 'stale_authority').length; + this._contradictionCount = data.actions.filter(a => a.source_category === 'contradiction').length; + } + } catch { /* endpoint unavailable */ } + this._loading = false; + } + + render() { + if (this._loading || this._entries.length === 0) return nothing; + + const total = this._entries.length; + const avgConf = total > 0 + ? this._entries.reduce((sum, e) => sum + (e.confidence ?? 0.5), 0) / total + : 0; + + // Top domains by entry count + const domainCounts = new Map(); + for (const e of this._entries) { + for (const d of (e.domains ?? [])) { + domainCounts.set(d, (domainCounts.get(d) ?? 0) + 1); + } + } + const topDomains = [...domainCounts.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 4); + + return html` +
+
+ Entries + ${total} +
+
+
+ Pending Review + ${this._reviewCount} +
+
+
+ Avg Confidence + ${(avgConf * 100).toFixed(0)}% +
+ ${this._staleCount > 0 ? html` +
+
+ Stale + ${this._staleCount} +
+ ` : nothing} + ${this._contradictionCount > 0 ? html` +
+
+ Contradictions + ${this._contradictionCount} +
+ ` : nothing} + ${topDomains.length > 0 ? html` +
+
+ Top Domains +
+ ${topDomains.map(([d, c]) => html` + ${d} (${c}) + `)} +
+
+ ` : nothing} +
+ `; + } +} diff --git a/frontend/src/components/knowledge-search-results.ts b/frontend/src/components/knowledge-search-results.ts new file mode 100644 index 0000000..3e4d2d3 --- /dev/null +++ b/frontend/src/components/knowledge-search-results.ts @@ -0,0 +1,185 @@ +/** + * Wave 69 Track 7: Source-grouped search results from unified search endpoint. + * + * Renders results grouped by source with distinct card styling per source type. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; +import type { UnifiedSearchResult } from '../types.js'; +import './atoms.js'; + +@customElement('fc-knowledge-search-results') +export class FcKnowledgeSearchResults extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .source-group { margin-bottom: 16px; } + .source-header { + font-family: var(--f-display); font-size: 11px; font-weight: 600; + color: var(--v-fg-muted); text-transform: uppercase; letter-spacing: 0.08em; + margin-bottom: 8px; padding-left: 2px; + } + .result-list { display: flex; flex-direction: column; gap: 6px; } + .result-card { padding: 10px 12px; cursor: pointer; transition: border-color 0.15s; } + .result-card:hover { border-color: rgba(232,88,26,0.25); } + .result-title { + font-family: var(--f-display); font-size: 12px; font-weight: 600; + color: var(--v-fg); margin-bottom: 4px; word-break: break-word; + } + .result-snippet { + font-size: 11px; color: var(--v-fg-muted); line-height: 1.45; + max-height: 48px; overflow: hidden; word-break: break-word; + } + .result-snippet.code-snippet { + font-family: var(--f-mono); font-size: 10.5px; white-space: pre-wrap; + background: rgba(255,255,255,0.02); padding: 4px 6px; border-radius: 4px; + border: 1px solid var(--v-border); + } + .result-meta { display: flex; gap: 6px; align-items: center; flex-wrap: wrap; margin-top: 6px; } + .conf-indicator { display: flex; align-items: center; gap: 4px; } + .conf-label { + font-size: 9px; font-family: var(--f-mono); font-weight: 600; + text-transform: uppercase; letter-spacing: 0.04em; + } + .conf-high { color: var(--v-success, #2DD4A8); } + .conf-medium { color: var(--v-warn, #F5B731); } + .conf-low { color: var(--v-danger, #F06464); } + .domain-tag { + font-size: 8px; font-family: var(--f-mono); padding: 1px 5px; border-radius: 4px; + background: rgba(255,255,255,0.04); color: var(--v-fg-dim); border: 1px solid var(--v-border); + } + .status-badge { + font-size: 8px; font-family: var(--f-mono); padding: 1px 5px; border-radius: 4px; + font-weight: 600; letter-spacing: 0.04em; text-transform: uppercase; + } + .status-verified { background: rgba(45,212,168,0.1); color: var(--v-success); border: 1px solid rgba(45,212,168,0.2); } + .status-candidate { background: rgba(245,183,49,0.1); color: var(--v-warn); border: 1px solid rgba(245,183,49,0.2); } + .status-active { background: rgba(167,139,250,0.1); color: #A78BFA; border: 1px solid rgba(167,139,250,0.2); } + .file-path { + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); + } + .line-range { + font-size: 9px; font-family: var(--f-mono); color: var(--v-accent); + } + .score-bar { + width: 40px; height: 3px; background: rgba(255,255,255,0.04); + border-radius: 2px; overflow: hidden; display: inline-block; vertical-align: middle; + } + .score-fill { height: 100%; border-radius: 2px; background: var(--v-accent); } + .empty-state { + padding: 24px; text-align: center; color: var(--v-fg-muted); + font-size: 12px; font-family: var(--f-body); + } + `]; + + @property({ type: Array }) results: UnifiedSearchResult[] = []; + @property() activeWorkspaceId = ''; + + private _groupResults(): Map { + const groups = new Map(); + for (const r of this.results) { + const existing = groups.get(r.source) ?? []; + existing.push(r); + groups.set(r.source, existing); + } + return groups; + } + + private _confLevel(conf: number): 'high' | 'medium' | 'low' { + if (conf >= 0.7) return 'high'; + if (conf >= 0.4) return 'medium'; + return 'low'; + } + + private _confLabel(conf: number): string { + if (conf >= 0.7) return 'High'; + if (conf >= 0.4) return 'Medium'; + return 'Low'; + } + + private _isCodeSource(source: string): boolean { + return source === 'codebase-index' || source.includes('code'); + } + + private _onEntryClick(result: UnifiedSearchResult) { + if (result.source === 'memory') { + this.dispatchEvent(new CustomEvent('entry-selected', { + detail: { id: result.id }, + bubbles: true, composed: true, + })); + } else { + const filePath = (result.metadata?.file_path as string) || result.title; + this.dispatchEvent(new CustomEvent('file-selected', { + detail: { filePath, source: result.source }, + bubbles: true, composed: true, + })); + } + } + + private _renderMemoryResult(r: UnifiedSearchResult) { + const conf = (r.metadata?.confidence as number) ?? 0.5; + const level = this._confLevel(conf); + const domains = (r.metadata?.domains as string[]) ?? []; + const status = (r.metadata?.status as string) ?? ''; + return html` +
this._onEntryClick(r)}> +
${r.title || r.id.slice(0, 24)}
+ ${r.snippet ? html`
${r.snippet}
` : nothing} +
+
+ + ${this._confLabel(conf)} +
+ ${status ? html`${status}` : nothing} + ${domains.slice(0, 3).map(d => html`${d}`)} +
+
+ `; + } + + private _renderAddonResult(r: UnifiedSearchResult) { + const filePath = (r.metadata?.file_path as string) || r.title; + const lineRange = (r.metadata?.line_range as string) || ''; + const isCode = this._isCodeSource(r.source); + return html` +
this._onEntryClick(r)}> +
+ ${filePath} + ${lineRange ? html`:${lineRange}` : nothing} +
+ ${r.snippet ? html` +
${r.snippet}
+ ` : nothing} +
+ ${r.score > 0 ? html` + + ${r.score.toFixed(3)} + ` : nothing} +
+
+ `; + } + + render() { + if (this.results.length === 0) { + return html`
No results found. Try a different query.
`; + } + const groups = this._groupResults(); + return html` + ${Array.from(groups.entries()).map(([source, items]) => { + const label = items[0]?.source_label || source; + return html` +
+
From ${label}
+
+ ${items.map(r => source === 'memory' + ? this._renderMemoryResult(r) + : this._renderAddonResult(r) + )} +
+
+ `; + })} + `; + } +} diff --git a/frontend/src/components/mcp-servers-card.ts b/frontend/src/components/mcp-servers-card.ts new file mode 100644 index 0000000..d9f2568 --- /dev/null +++ b/frontend/src/components/mcp-servers-card.ts @@ -0,0 +1,472 @@ +/** + * Wave 70.5 Team A: MCP server management card. + * + * Self-contained Lit component that fetches bridge health and config + * from existing 70.0 contracts. No store dependency — Team C mounts it + * in settings-view without shared state changes. + */ + +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; + +interface McpServer { + name: string; + url: string; + status?: string; + toolCount?: number; + callCount?: number; + lastError?: string | null; +} + +interface BridgeHealth { + connectedServers: number; + unhealthyServers: number; + totalRemoteTools: number; + servers: McpServer[]; +} + +@customElement('fc-mcp-servers-card') +export class McpServersCard extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + + .card { + background: var(--v-surface); + border: 1px solid var(--v-border); + border-radius: 10px; + padding: 16px 20px; + } + + .card-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 14px; + } + + .card-title { + font-family: var(--f-display); + font-size: 14px; + font-weight: 600; + color: var(--v-fg); + } + + .summary { + font-family: var(--f-mono); + font-size: 10px; + color: var(--v-fg-dim); + letter-spacing: 0.06em; + } + + .server-list { + display: flex; + flex-direction: column; + gap: 8px; + margin-bottom: 14px; + } + + .server-row { + display: flex; + align-items: center; + gap: 10px; + padding: 10px 12px; + background: var(--v-recessed); + border: 1px solid var(--v-border); + border-radius: 8px; + transition: border-color 0.15s; + } + .server-row:hover { border-color: var(--v-border-hover); } + + .health-dot { + width: 8px; + height: 8px; + border-radius: 50%; + flex-shrink: 0; + } + .health-dot.connected { background: var(--v-success); } + .health-dot.disconnected { background: var(--v-fg-dim); } + .health-dot.error { background: var(--v-danger); } + + .server-info { + flex: 1; + min-width: 0; + } + + .server-name { + font-family: var(--f-mono); + font-size: 12px; + font-weight: 600; + color: var(--v-fg); + } + + .server-url { + font-family: var(--f-mono); + font-size: 10px; + color: var(--v-fg-dim); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .server-meta { + font-family: var(--f-mono); + font-size: 9.5px; + color: var(--v-fg-dim); + margin-top: 2px; + } + + .server-error { + font-family: var(--f-mono); + font-size: 9px; + color: var(--v-danger); + margin-top: 2px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .server-actions { + display: flex; + gap: 6px; + flex-shrink: 0; + } + + .action-btn { + font-family: var(--f-mono); + font-size: 9px; + padding: 3px 8px; + border-radius: 5px; + cursor: pointer; + border: 1px solid var(--v-border); + background: rgba(255,255,255,0.02); + color: var(--v-fg-dim); + transition: all 0.15s; + text-transform: uppercase; + letter-spacing: 0.05em; + } + .action-btn:hover { + border-color: var(--v-border-hover); + color: var(--v-fg); + background: rgba(255,255,255,0.04); + } + .action-btn.danger:hover { + border-color: var(--v-danger); + color: var(--v-danger); + } + + .add-form { + display: flex; + gap: 8px; + align-items: flex-end; + } + + .field { + display: flex; + flex-direction: column; + gap: 3px; + flex: 1; + } + + .field label { + font-family: var(--f-mono); + font-size: 9.5px; + font-weight: 600; + color: var(--v-fg-dim); + letter-spacing: 0.1em; + text-transform: uppercase; + } + + .field input { + font-family: var(--f-mono); + font-size: 11px; + padding: 6px 10px; + border-radius: 6px; + border: 1px solid var(--v-border); + background: var(--v-recessed); + color: var(--v-fg); + outline: none; + transition: border-color 0.15s; + } + .field input:focus { border-color: var(--v-accent); } + .field input::placeholder { color: var(--v-fg-dim); } + + .add-btn { + font-family: var(--f-mono); + font-size: 10px; + padding: 6px 14px; + border-radius: 6px; + cursor: pointer; + border: 1px solid var(--v-accent); + background: rgba(232,88,26,0.1); + color: var(--v-accent); + font-weight: 600; + letter-spacing: 0.05em; + text-transform: uppercase; + transition: all 0.15s; + white-space: nowrap; + } + .add-btn:hover { + background: rgba(232,88,26,0.2); + } + .add-btn:disabled { + opacity: 0.4; + cursor: default; + } + + .empty { + text-align: center; + padding: 20px; + font-family: var(--f-body); + font-size: 12px; + color: var(--v-fg-dim); + } + + .status-msg { + font-family: var(--f-mono); + font-size: 10px; + margin-top: 8px; + padding: 6px 10px; + border-radius: 6px; + } + .status-msg.ok { + color: var(--v-success); + background: rgba(45,212,168,0.08); + } + .status-msg.err { + color: var(--v-danger); + background: rgba(240,100,100,0.08); + } + + @media (prefers-reduced-motion: reduce) { + .server-row, .action-btn, .add-btn, .field input { transition: none; } + } + `]; + + @state() private _health: BridgeHealth | null = null; + @state() private _bridgeInstalled = true; + @state() private _loading = true; + @state() private _newName = ''; + @state() private _newUrl = ''; + @state() private _saving = false; + @state() private _statusMsg = ''; + @state() private _statusOk = true; + + connectedCallback(): void { + super.connectedCallback(); + this._fetch(); + } + + private async _fetch(): Promise { + this._loading = true; + try { + const resp = await fetch('/api/v1/addons'); + if (!resp.ok) { + this._bridgeInstalled = false; + this._loading = false; + return; + } + const addons: Array> = await resp.json(); + const bridge = addons.find( + (a) => a.bridgeHealth !== undefined && a.bridgeHealth !== null, + ); + if (!bridge) { + // No addon exposes bridge health — check if mcp-bridge addon exists + const mcpAddon = addons.find((a) => a.name === 'mcp-bridge'); + this._bridgeInstalled = !!mcpAddon; + if (mcpAddon) { + // Installed but no health yet (no servers configured) + this._health = { + connectedServers: 0, + unhealthyServers: 0, + totalRemoteTools: 0, + servers: [], + }; + } + this._loading = false; + return; + } + this._bridgeInstalled = true; + this._health = bridge.bridgeHealth as BridgeHealth; + } catch { + this._bridgeInstalled = false; + } + this._loading = false; + } + + private async _addServer(): Promise { + const name = this._newName.trim(); + const url = this._newUrl.trim(); + if (!name || !url) return; + + this._saving = true; + this._statusMsg = ''; + + // Build updated server list + const existing = (this._health?.servers ?? []).map((s) => ({ + name: s.name, + url: s.url, + })); + existing.push({ name, url }); + + try { + const resp = await fetch('/api/v1/addons/mcp-bridge/config', { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + workspace_id: '_global', + values: { mcp_servers: JSON.stringify(existing) }, + }), + }); + if (resp.ok) { + this._newName = ''; + this._newUrl = ''; + this._statusMsg = `Server "${name}" added`; + this._statusOk = true; + await this._fetch(); + } else { + const data = await resp.json().catch(() => ({})); + this._statusMsg = `Error: ${(data as Record).message ?? resp.statusText}`; + this._statusOk = false; + } + } catch (e) { + this._statusMsg = `Error: ${e}`; + this._statusOk = false; + } + this._saving = false; + } + + private async _removeServer(name: string): Promise { + const remaining = (this._health?.servers ?? []) + .filter((s) => s.name !== name) + .map((s) => ({ name: s.name, url: s.url })); + + try { + const resp = await fetch('/api/v1/addons/mcp-bridge/config', { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + workspace_id: '_global', + values: { mcp_servers: JSON.stringify(remaining) }, + }), + }); + if (resp.ok) { + this._statusMsg = `Server "${name}" removed`; + this._statusOk = true; + await this._fetch(); + } + } catch (e) { + this._statusMsg = `Error: ${e}`; + this._statusOk = false; + } + } + + render() { + if (this._loading) { + return html`
Loading MCP bridge status\u2026
`; + } + + if (!this._bridgeInstalled) { + return html` +
+
+ MCP Servers +
+
MCP bridge addon is not installed.
+
+ `; + } + + const servers = this._health?.servers ?? []; + const connected = this._health?.connectedServers ?? 0; + const tools = this._health?.totalRemoteTools ?? 0; + + return html` +
+
+ MCP Servers + + ${connected} connected \u00b7 ${tools} tools + +
+ + ${servers.length > 0 ? html` +
+ ${servers.map((s) => this._renderServer(s))} +
+ ` : html` +
No MCP servers configured. Add one below.
+ `} + + ${this._renderAddForm()} + + ${this._statusMsg ? html` +
+ ${this._statusMsg} +
+ ` : nothing} +
+ `; + } + + private _renderServer(s: McpServer) { + const status = s.status ?? 'disconnected'; + const dotClass = status === 'connected' ? 'connected' + : status === 'error' ? 'error' + : 'disconnected'; + + return html` +
+
+
+
${s.name}
+
${s.url}
+ ${(s.toolCount ?? 0) > 0 ? html` +
+ ${s.toolCount} tools \u00b7 ${s.callCount ?? 0} calls +
+ ` : nothing} + ${s.lastError ? html` +
${s.lastError}
+ ` : nothing} +
+
+ +
+
+ `; + } + + private _renderAddForm() { + return html` +
+
+ + { this._newName = (e.target as HTMLInputElement).value; }}> +
+
+ + { this._newUrl = (e.target as HTMLInputElement).value; }}> +
+ +
+ `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-mcp-servers-card': McpServersCard; } +} diff --git a/frontend/src/components/model-registry.ts b/frontend/src/components/model-registry.ts index a23161b..7547605 100644 --- a/frontend/src/components/model-registry.ts +++ b/frontend/src/components/model-registry.ts @@ -90,6 +90,11 @@ export class FcModelRegistry extends LitElement { @state() private recipes: Record = {}; @state() private saving = false; @state() private _lastRefreshed: number = 0; + @state() private _showAddForm = false; + @state() private _addAddress = ''; + @state() private _addProvider = ''; + @state() private _addCtx = 8192; + @state() private _showHidden = false; private _refreshTimer?: ReturnType; // Inline edit state for policy fields @@ -156,7 +161,45 @@ export class FcModelRegistry extends LitElement { `)} -
Model Policy
+
+ Model Policy + { this._showAddForm = !this._showAddForm; }}>+ Add Model + +
+ ${this._showAddForm ? html` +
+
+
+ + { this._addAddress = (e.target as HTMLInputElement).value; }}> +
+
+ + { this._addProvider = (e.target as HTMLInputElement).value; }}> +
+
+ + { this._addCtx = parseInt((e.target as HTMLInputElement).value, 10) || 8192; }}> +
+
+
+ { this._showAddForm = false; }}>Cancel + this._addModel()}> + ${this.saving ? 'Adding\u2026' : 'Add Model'} + +
+
` : nothing}
${this._renderGroupedRegistry(registry)}
@@ -279,8 +322,9 @@ export class FcModelRegistry extends LitElement { } private _renderGroupedRegistry(registry: ModelRegistryEntry[]) { + const filtered = this._showHidden ? registry : registry.filter(m => !m.hidden); const groups = new Map(); - for (const m of registry) { + for (const m of filtered) { const provider = providerOf(m.address); const list = groups.get(provider) ?? []; list.push(m); @@ -325,7 +369,7 @@ export class FcModelRegistry extends LitElement {
this._expandPolicy(m)}>
-
${primaryLabel}
+
${primaryLabel}${m.hidden ? html` (hidden)` : nothing}
${m.address} ${m.provider} @@ -375,6 +419,11 @@ export class FcModelRegistry extends LitElement {
+ this._toggleHidden(m.address, !m.hidden)}> + ${m.hidden ? '\u{1F441} Unhide' : '\u{1F6AB} Hide'} + + { this.policyExpanded = null; }}>Cancel
` : nothing} + ${this._renderBlastRadius()} +
Let me adjust
@@ -184,6 +237,26 @@ export class FcProposalCard extends LitElement { `; } + private _renderBlastRadius() { + const br = this.blastRadius; + if (!br) return nothing; + + return html` +
+
+ Blast Radius + ${br.score.toFixed(2)} + ${br.recommendation} +
+ ${br.factors.length > 0 ? html` +
    + ${br.factors.map(f => html`
  • ${f}
  • `)} +
+ ` : nothing} +
+ `; + } + private _renderOption(opt: ProposalOption) { const isFree = opt.estimated_cost?.toLowerCase().includes('free'); return html` diff --git a/frontend/src/components/queen-autonomy-card.ts b/frontend/src/components/queen-autonomy-card.ts new file mode 100644 index 0000000..4c88e8b --- /dev/null +++ b/frontend/src/components/queen-autonomy-card.ts @@ -0,0 +1,166 @@ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; +import './atoms.js'; + +interface AutonomyComponents { + success_rate: number; + volume: number; + cost_efficiency: number; + operator_trust: number; +} + +interface AutonomyData { + score: number; + grade: string; + level: string; + components: AutonomyComponents; + recommendation: string; + daily_budget: number; + daily_spend: number; + auto_actions: string[]; +} + +const GRADE_COLORS: Record = { + a: 'var(--v-green, #22c55e)', + b: 'var(--v-green, #22c55e)', + c: 'var(--v-warning, #f59e0b)', + d: 'var(--v-danger, #ef4444)', + f: 'var(--v-danger, #ef4444)', +}; + +const COMPONENT_LABELS: [keyof AutonomyComponents, string, string][] = [ + ['success_rate', 'Success Rate', '40%'], + ['volume', 'Volume', '20%'], + ['cost_efficiency', 'Cost Eff.', '20%'], + ['operator_trust', 'Trust', '20%'], +]; + +@customElement('fc-queen-autonomy-card') +export class FcQueenAutonomyCard extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .grade-badge { + display: inline-flex; align-items: center; justify-content: center; + width: 28px; height: 28px; border-radius: 6px; font-family: var(--f-display); + font-size: 16px; font-weight: 700; color: var(--v-void); + } + .bar-track { + flex: 1; height: 6px; border-radius: 3px; background: rgba(255,255,255,0.06); + overflow: hidden; + } + .bar-fill { + height: 100%; border-radius: 3px; transition: width 0.3s; + } + .comp-row { + display: flex; align-items: center; gap: 6px; margin-bottom: 5px; + font-size: 10px; font-family: var(--f-mono); + } + .comp-label { width: 80px; color: var(--v-fg-dim); } + .comp-value { width: 32px; text-align: right; color: var(--v-fg); } + .comp-weight { width: 24px; text-align: right; color: var(--v-fg-dim); font-size: 8px; } + .recommendation { + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); + line-height: 1.4; margin-top: 8px; padding: 6px 8px; + border-left: 2px solid var(--v-border); background: rgba(255,255,255,0.02); + } + .budget-bar { + height: 4px; border-radius: 2px; background: rgba(255,255,255,0.06); + margin-top: 6px; overflow: hidden; + } + .budget-fill { height: 100%; border-radius: 2px; background: var(--v-accent); } + `]; + + @property() workspaceId = ''; + @state() private _data: AutonomyData | null = null; + @state() private _expanded = false; + private _timer: ReturnType | null = null; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + this._timer = setInterval(() => void this._fetch(), 60_000); + } + + disconnectedCallback() { + super.disconnectedCallback(); + if (this._timer) clearInterval(this._timer); + } + + updated(changed: Map) { + if (changed.has('workspaceId') && this.workspaceId) { + void this._fetch(); + } + } + + private async _fetch() { + if (!this.workspaceId) return; + try { + const resp = await fetch(`/api/v1/workspaces/${this.workspaceId}/autonomy-status`); + if (resp.ok) { + this._data = await resp.json() as AutonomyData; + } + } catch { /* silent */ } + } + + render() { + if (!this._data) return html`
Queen Health
Loading...
`; + + const d = this._data; + const gradeColor = GRADE_COLORS[d.grade.toLowerCase()] ?? 'var(--v-fg-dim)'; + const budgetPct = d.daily_budget > 0 ? Math.min(100, (d.daily_spend / d.daily_budget) * 100) : 0; + + return html` +
Queen Health
+
+
+ ${d.grade} + ${d.score}/100 + ${d.level} + $${d.daily_spend.toFixed(2)} / $${d.daily_budget.toFixed(2)} +
+ ${this._expanded ? this._renderBreakdown() : nothing} +
+ { this._expanded = !this._expanded; }}> + ${this._expanded ? 'Less' : 'Details'} + +
+
+ `; + } + + private _renderBreakdown() { + const d = this._data!; + const budgetPct = d.daily_budget > 0 ? Math.min(100, (d.daily_spend / d.daily_budget) * 100) : 0; + + return html` +
+ ${COMPONENT_LABELS.map(([key, label, weight]) => { + const val = d.components[key]; + return html` +
+ ${label} +
+
+
+ ${val.toFixed(2)} + (${weight}) +
`; + })} +
+
+
+
+
+ daily budget: $${d.daily_spend.toFixed(2)} / $${d.daily_budget.toFixed(2)} +
+ ${d.recommendation ? html` +
${d.recommendation}
+ ` : nothing} + `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-queen-autonomy-card': FcQueenAutonomyCard; } +} diff --git a/frontend/src/components/queen-budget-viz.ts b/frontend/src/components/queen-budget-viz.ts new file mode 100644 index 0000000..ebf39a1 --- /dev/null +++ b/frontend/src/components/queen-budget-viz.ts @@ -0,0 +1,101 @@ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; +import './atoms.js'; + +interface BudgetSlot { + name: string; + fraction: number; + fallback_tokens: number; +} + +const SLOT_COLORS: Record = { + system_prompt: '#8b5cf6', + memory_retrieval: '#22c55e', + project_context: '#3b82f6', + project_plan: '#6366f1', + operating_procedures: '#f59e0b', + queen_journal: '#ef4444', + thread_context: '#06b6d4', + tool_memory: '#ec4899', + conversation_history: '#E8581A', +}; + +function displayName(name: string): string { + return name.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); +} + +@customElement('fc-queen-budget-viz') +export class FcQueenBudgetViz extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .slot-row { + display: flex; align-items: center; gap: 6px; margin-bottom: 4px; + font-size: 10px; font-family: var(--f-mono); + } + .slot-label { width: 140px; color: var(--v-fg-dim); overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } + .slot-bar-track { + flex: 1; height: 8px; border-radius: 4px; background: rgba(255,255,255,0.06); + overflow: hidden; + } + .slot-bar-fill { height: 100%; border-radius: 4px; transition: width 0.3s; } + .slot-pct { width: 30px; text-align: right; color: var(--v-fg); } + .slot-tokens { width: 55px; text-align: right; color: var(--v-fg-dim); font-size: 9px; } + .summary { + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); cursor: pointer; + padding: 6px 8px; user-select: none; + } + .summary:hover { color: var(--v-fg-muted); } + `]; + + @state() private _slots: BudgetSlot[] = []; + @state() private _expanded = false; + @state() private _loaded = false; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + } + + private async _fetch() { + try { + const resp = await fetch('/api/v1/queen-budget'); + if (resp.ok) { + const data = await resp.json() as { slots: BudgetSlot[] }; + this._slots = data.slots; + this._loaded = true; + } + } catch { /* silent */ } + } + + render() { + if (!this._loaded) return nothing; + + const largest = this._slots.reduce((a, b) => b.fraction > a.fraction ? b : a, this._slots[0]); + + return html` +
Context Budget
+
{ this._expanded = !this._expanded; }}> + ${this._expanded ? '\u25BE' : '\u25B8'} ${this._slots.length} slots, largest = ${displayName(largest?.name ?? '')} ${Math.round((largest?.fraction ?? 0) * 100)}% +
+ ${this._expanded ? html` +
+ ${this._slots.map(slot => html` +
+ ${displayName(slot.name)} +
+
+
+ ${Math.round(slot.fraction * 100)}% + ${slot.fallback_tokens} min +
+ `)} +
+ ` : nothing} + `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-queen-budget-viz': FcQueenBudgetViz; } +} diff --git a/frontend/src/components/queen-chat.ts b/frontend/src/components/queen-chat.ts index 8fa296a..ebc89f6 100644 --- a/frontend/src/components/queen-chat.ts +++ b/frontend/src/components/queen-chat.ts @@ -10,7 +10,7 @@ import { LitElement, html, css, nothing } from 'lit'; import { customElement, property, state, query } from 'lit/decorators.js'; import { voidTokens } from '../styles/shared.js'; import { timeAgo } from '../helpers.js'; -import type { QueenThread, QueenChatMessage, EventKind, PreviewCardMeta, ResultCardMeta, ProposalData, EditProposalMeta, ParallelResultMeta } from '../types.js'; +import type { QueenThread, QueenChatMessage, EventKind, PreviewCardMeta, ResultCardMeta, ProposalData, EditProposalMeta, ParallelResultMeta, ConsultedEntry, ThreadPlan } from '../types.js'; import './atoms.js'; import './directive-panel.js'; import './fc-preview-card.js'; @@ -18,6 +18,8 @@ import './fc-result-card.js'; import './proposal-card.js'; import './edit-proposal.js'; import './parallel-result.js'; +import './colony-progress-card.js'; +import './consulted-sources.js'; const kindColor: Record = { spawn: '#2DD4A8', merge: '#3DD6F5', metric: '#A78BFA', route: '#F5B731', pheromone: '#E8581A', @@ -134,6 +136,61 @@ export class FcQueenChat extends LitElement { border: 1px solid var(--v-border); background: var(--v-recessed); color: var(--v-fg); outline: none; margin-left: auto; } + + /* Wave 69: plan progress bar */ + .plan-bar { + display: flex; align-items: center; gap: 8px; + padding: 6px 12px; border-bottom: 1px solid var(--v-border); + background: rgba(255,255,255,0.01); cursor: pointer; + transition: background 0.15s; + } + @media (prefers-reduced-motion: reduce) { .plan-bar { transition: none; } } + .plan-bar:hover { background: rgba(255,255,255,0.025); } + .plan-title { + font-size: 10.5px; font-family: var(--f-body); color: var(--v-fg-muted); + flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .plan-steps { display: flex; gap: 3px; align-items: center; } + .step-dot { + width: 8px; height: 8px; border-radius: 50%; + border: 1.5px solid var(--v-fg-dim); background: transparent; + transition: all 0.15s; + } + @media (prefers-reduced-motion: reduce) { .step-dot { transition: none; } } + .step-dot.completed { background: var(--v-success); border-color: var(--v-success); } + .step-dot.started { background: transparent; border-color: var(--v-accent); box-shadow: 0 0 4px rgba(232,88,26,0.3); } + .step-dot.blocked { background: var(--v-danger); border-color: var(--v-danger); } + .plan-count { + font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-dim); + font-feature-settings: 'tnum'; white-space: nowrap; + } + .plan-expanded { + padding: 4px 12px 8px; border-bottom: 1px solid var(--v-border); + font-size: 10.5px; + } + .plan-step-row { + display: flex; align-items: center; gap: 6px; padding: 3px 0; + font-family: var(--f-body); color: var(--v-fg-muted); + } + .plan-step-idx { + font-family: var(--f-mono); font-size: 9px; color: var(--v-fg-dim); + width: 16px; text-align: right; font-feature-settings: 'tnum'; + } + .plan-step-desc { flex: 1; } + + /* Wave 69: inline progress card spacing */ + .progress-wrap { padding: 2px 12px 4px; } + + /* Wave 69: diff badge on result cards */ + .diff-badge { + display: inline-flex; align-items: center; gap: 4px; + font-size: 9px; font-family: var(--f-mono); padding: 2px 7px; + border-radius: 4px; border: 1px solid var(--v-border); + background: rgba(255,255,255,0.02); color: var(--v-fg-dim); + cursor: pointer; transition: all 0.15s; margin-top: 4px; + } + @media (prefers-reduced-motion: reduce) { .diff-badge { transition: none; } } + .diff-badge:hover { border-color: var(--v-border-hover); color: var(--v-fg); } `]; @property({ type: Array }) threads: QueenThread[] = []; @@ -146,12 +203,18 @@ export class FcQueenChat extends LitElement { /** Track which preview cards have been confirmed/cancelled (by message index). */ @state() private _confirmedPreviews = new Set(); @state() private _cancelledPreviews = new Set(); + /** Wave 69 Track 4: plan state for progress bar */ + @state() private _plan: ThreadPlan | null = null; + @state() private _planExpanded = false; @query('.messages') private messagesEl!: HTMLElement; private get activeThread(): QueenThread | undefined { return this.threads.find(t => t.id === this.activeThreadId) ?? this.threads[0]; } + private _lastFetchedThreadId = ''; + private _lastMsgCount = 0; + updated() { if (this.messagesEl) this.messagesEl.scrollTop = this.messagesEl.scrollHeight; if (this._queenPending) { @@ -160,6 +223,28 @@ export class FcQueenChat extends LitElement { this._queenPending = false; } } + // Wave 69: fetch plan on thread switch or new messages + const tid = this.activeThread?.id ?? ''; + const wsId = this.activeThread?.workspaceId ?? 'default'; + const msgCount = this.activeThread?.messages.length ?? 0; + if (tid && (tid !== this._lastFetchedThreadId || msgCount !== this._lastMsgCount)) { + this._lastFetchedThreadId = tid; + this._lastMsgCount = msgCount; + this._fetchPlan(wsId, tid); + } + } + + private async _fetchPlan(wsId: string, threadId: string) { + try { + const res = await fetch(`/api/v1/workspaces/${wsId}/threads/${threadId}/plan`); + if (res.ok) { + this._plan = await res.json(); + } else { + this._plan = null; + } + } catch { + this._plan = null; + } } render() { @@ -172,6 +257,7 @@ export class FcQueenChat extends LitElement { `)}
+
+ ${this._plan?.exists && this._plan.steps?.length ? this._renderPlanBar() : nothing}
${!active?.messages.length ? html`
Ask me anything \u2014 describe a task and I\u2019ll propose a plan
@@ -241,7 +327,8 @@ export class FcQueenChat extends LitElement { @preview-cancel=${() => this._handlePreviewCancel(idx)} @preview-open-editor=${this._handleOpenEditor} > -
`; + + ${this._renderProgressCards(m)}`; } if (renderType === 'result_card' && m.meta) { @@ -320,6 +407,7 @@ export class FcQueenChat extends LitElement {
).blast_radius ?? null} @proposal-action=${this._onProposalAction} >
`; @@ -341,6 +429,11 @@ export class FcQueenChat extends LitElement { ${m.role === 'queen' ? html` this._saveAsPreference(m.text)}>📌` : nothing}
${m.text}
+ ${m.role === 'queen' && (m.meta as Record | undefined)?.consulted_entries + ? html`
).consulted_entries as ConsultedEntry[]} + >
` + : nothing} `; } @@ -420,10 +513,72 @@ export class FcQueenChat extends LitElement { } } + /** Wave 69: render inline progress cards for spawned colonies. */ + private _renderProgressCards(m: QueenChatMessage) { + const meta = m.meta as Record | undefined; + if (!meta) return nothing; + + // Single colony spawn + const colonyId = meta.colony_id ?? meta.colonyId; + if (typeof colonyId === 'string' && colonyId) { + const task = (meta.task ?? '') as string; + return html`
+ +
`; + } + + // Parallel spawn — multiple colonies + const colonyIds = meta.colony_ids ?? meta.colonyIds; + if (Array.isArray(colonyIds) && colonyIds.length > 0) { + return html`${colonyIds.map((cid: string) => + html`
+ +
` + )}`; + } + + return nothing; + } + + /** Wave 69 Track 4: render plan progress bar. */ + private _renderPlanBar() { + const plan = this._plan; + if (!plan?.exists || !plan.steps?.length) return nothing; + + const completed = plan.steps.filter(s => s.status === 'completed').length; + const total = plan.steps.length; + + return html` +
{ this._planExpanded = !this._planExpanded; }}> + \u25B6 + ${plan.title ?? 'Plan'} +
+ ${plan.steps.map(s => html` + + `)} +
+ ${completed}/${total} +
+ ${this._planExpanded ? html` +
+ ${plan.approach ? html`
${plan.approach}
` : nothing} + ${plan.steps.map(s => html` +
+ ${s.index} + + ${s.description} +
+ `)} +
` : nothing} + `; + } + private switchThread(id: string) { // Reset card states when switching threads this._confirmedPreviews = new Set(); this._cancelledPreviews = new Set(); + this._plan = null; + this._planExpanded = false; this.dispatchEvent(new CustomEvent('switch-thread', { detail: id, bubbles: true, composed: true })); } diff --git a/frontend/src/components/queen-continuations.ts b/frontend/src/components/queen-continuations.ts new file mode 100644 index 0000000..64c74cb --- /dev/null +++ b/frontend/src/components/queen-continuations.ts @@ -0,0 +1,90 @@ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; +import './atoms.js'; + +interface ContinuationCandidate { + thread_id: string; + description: string; + ready_for_autonomy: boolean; + blocked_reason: string | null; + priority: string; +} + +@customElement('fc-queen-continuations') +export class FcQueenContinuations extends LitElement { + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .empty { font-size: 10px; color: var(--v-fg-dim); font-family: var(--f-mono); } + .candidate { + padding: 8px 10px; margin-bottom: 4px; display: flex; align-items: center; gap: 6px; + } + .desc { + font-size: 11px; font-family: var(--f-mono); color: var(--v-fg); + flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .blocked-reason { + font-size: 8px; font-family: var(--f-mono); color: var(--v-fg-dim); + margin-top: 2px; + } + `]; + + @property() workspaceId = ''; + @state() private _candidates: ContinuationCandidate[] = []; + @state() private _loading = false; + private _timer: ReturnType | null = null; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + this._timer = setInterval(() => void this._fetch(), 60_000); + } + + disconnectedCallback() { + super.disconnectedCallback(); + if (this._timer) clearInterval(this._timer); + } + + updated(changed: Map) { + if (changed.has('workspaceId') && this.workspaceId) { + void this._fetch(); + } + } + + private async _fetch() { + if (!this.workspaceId) return; + this._loading = true; + try { + const resp = await fetch(`/api/v1/workspaces/${this.workspaceId}/operations/summary`); + if (resp.ok) { + const data = await resp.json() as { continuation_candidates?: ContinuationCandidate[] }; + this._candidates = data.continuation_candidates ?? []; + } + } catch { /* silent */ } + this._loading = false; + } + + render() { + return html` +
Continuations
+ ${this._candidates.length === 0 ? html` +
${this._loading ? 'Loading...' : 'No pending continuations'}
+ ` : this._candidates.map(c => html` +
+ ${c.description.slice(0, 60)} + ${c.ready_for_autonomy + ? html`ready` + : html`blocked`} + ${c.priority} +
+ ${!c.ready_for_autonomy && c.blocked_reason ? html` +
${c.blocked_reason}
+ ` : nothing} + `)} + `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-queen-continuations': FcQueenContinuations; } +} diff --git a/frontend/src/components/queen-display-board.ts b/frontend/src/components/queen-display-board.ts new file mode 100644 index 0000000..698f746 --- /dev/null +++ b/frontend/src/components/queen-display-board.ts @@ -0,0 +1,214 @@ +/** + * Wave 74 Track 2: Queen display board. + * Filters journal entries with display_board metadata and renders them + * as a prioritized observation feed at the top of the Queen tab. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import './atoms.js'; + +interface BoardEntry { + timestamp: string; + heading: string; + body: string; + source: string; + metadata: { display_board?: boolean; type?: string; priority?: string } | null; +} + +interface JournalData { + exists: boolean; + entries: BoardEntry[]; + totalEntries?: number; +} + +const PRIORITY_COLORS: Record = { + critical: 'var(--v-danger)', + attention: 'var(--v-warn)', + normal: 'var(--v-fg-dim)', +}; + +const TYPE_ICONS: Record = { + status: '\u25CF', + concern: '\u26A0', + observation: '\u25C6', + recommendation: '\u2192', +}; + +@customElement('fc-queen-display-board') +export class FcQueenDisplayBoard extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + + .board { + display: flex; flex-direction: column; gap: 3px; + margin-bottom: 8px; + } + + .board-entry { + display: flex; align-items: flex-start; gap: 8px; + padding: 6px 10px; border-radius: 5px; + background: rgba(255,255,255,0.015); + border-left: 2px solid var(--v-border); + transition: border-color 0.15s; + } + .board-entry:hover { border-left-color: var(--v-accent); } + + .board-entry.priority-critical { border-left-color: var(--v-danger); } + .board-entry.priority-attention { border-left-color: var(--v-warn); } + + .entry-icon { + font-size: 10px; flex-shrink: 0; margin-top: 1px; + } + + .entry-content { flex: 1; min-width: 0; } + + .entry-heading { + font-family: var(--f-mono); font-size: 10px; font-weight: 600; + color: var(--v-fg); overflow: hidden; text-overflow: ellipsis; + white-space: nowrap; + } + + .entry-body { + font-family: var(--f-mono); font-size: 9.5px; color: var(--v-fg-muted); + line-height: 1.4; overflow: hidden; text-overflow: ellipsis; + white-space: nowrap; + } + + .entry-ts { + font-family: var(--f-mono); font-size: 8px; color: var(--v-fg-dim); + white-space: nowrap; flex-shrink: 0; margin-top: 2px; + } + + .board-header { + display: flex; align-items: center; gap: 6px; margin-bottom: 6px; + } + .board-title { + font-size: 9px; font-family: var(--f-mono); font-weight: 700; + color: var(--v-fg-dim); letter-spacing: 0.12em; text-transform: uppercase; + } + .board-count { + font-size: 8px; font-family: var(--f-mono); color: var(--v-fg-dim); + padding: 1px 5px; border-radius: 4px; + background: rgba(255,255,255,0.04); + } + + .refresh-btn { + margin-left: auto; + font-family: var(--f-mono); font-size: 8px; font-weight: 600; + color: var(--v-fg-dim); background: none; border: none; + cursor: pointer; padding: 2px 6px; border-radius: 3px; + transition: color 0.15s; + } + .refresh-btn:hover { color: var(--v-fg-muted); } + + .empty-state { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-dim); + padding: 8px 0; + } + + @media (prefers-reduced-motion: reduce) { + * { transition: none !important; } + } + `]; + + @property({ type: String }) workspaceId = ''; + @state() private _entries: BoardEntry[] = []; + @state() private _loaded = false; + @state() private _error = false; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + } + + updated(changed: Map) { + if (changed.has('workspaceId') && this.workspaceId) { + void this._fetch(); + } + } + + private async _fetch() { + if (!this.workspaceId) return; + try { + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/queen-journal`, + ); + if (!resp.ok) { this._error = true; return; } + const data = await resp.json() as JournalData; + // Filter to display_board entries only + this._entries = (data.entries ?? []).filter( + e => e.metadata?.display_board === true, + ); + this._error = false; + } catch { + this._error = true; + } + this._loaded = true; + } + + private _formatTs(ts: string): string { + try { + const d = new Date(ts); + return d.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit' }); + } catch { + return ts; + } + } + + private _parseHeading(heading: string): { type: string; priority: string; label: string } { + // Format: "type:priority — label" e.g. "status:normal — Continuations ready" + const match = heading.match(/^(\w+):(\w+)\s*[—-]\s*(.*)$/); + if (match) { + return { type: match[1], priority: match[2], label: match[3] }; + } + return { type: 'observation', priority: 'normal', label: heading }; + } + + render() { + if (!this._loaded) return nothing; + if (this._error || this._entries.length === 0) return nothing; + + // Sort: critical first, then attention, then normal; within priority by recency (already sorted) + const priorityOrder: Record = { critical: 0, attention: 1, normal: 2 }; + const sorted = [...this._entries].sort((a, b) => { + const pa = priorityOrder[a.metadata?.priority ?? 'normal'] ?? 2; + const pb = priorityOrder[b.metadata?.priority ?? 'normal'] ?? 2; + return pa - pb; + }); + + // Show at most 8 entries + const visible = sorted.slice(0, 8); + + return html` +
+ \u25A3 Display Board + ${this._entries.length} + +
+
+ ${visible.map(e => { + const parsed = this._parseHeading(e.heading); + const icon = TYPE_ICONS[parsed.type] ?? '\u25CF'; + const color = PRIORITY_COLORS[parsed.priority] ?? 'var(--v-fg-dim)'; + return html` +
+ ${icon} +
+
${parsed.label}
+ ${e.body ? html`
${e.body}
` : nothing} +
+ ${this._formatTs(e.timestamp)} +
+ `; + })} +
+ `; + } +} + +declare global { + interface HTMLElementTagNameMap { + 'fc-queen-display-board': FcQueenDisplayBoard; + } +} diff --git a/frontend/src/components/queen-journal-panel.ts b/frontend/src/components/queen-journal-panel.ts new file mode 100644 index 0000000..6b262c2 --- /dev/null +++ b/frontend/src/components/queen-journal-panel.ts @@ -0,0 +1,193 @@ +/** + * Wave 71.5 Track 3A: Queen journal panel. + * Fetches from GET /api/v1/workspaces/{id}/queen-journal and renders + * the Queen's operational log as a scannable timeline. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import './atoms.js'; + +interface JournalEntry { + timestamp: string; + heading: string; + body: string; +} + +interface JournalData { + exists: boolean; + entries: JournalEntry[]; +} + +@customElement('fc-queen-journal-panel') +export class FcQueenJournalPanel extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + + .entries { + display: flex; flex-direction: column; gap: 2px; + } + + .entry { + padding: 8px 10px; + border-left: 2px solid var(--v-border); + transition: border-color 0.15s; + } + .entry:hover { + border-left-color: var(--v-accent); + } + + .entry-header { + display: flex; align-items: baseline; gap: 8px; + margin-bottom: 3px; + } + .entry-ts { + font-family: var(--f-mono); font-size: 9px; color: var(--v-fg-dim); + white-space: nowrap; + } + .entry-heading { + font-family: var(--f-mono); font-size: 10.5px; font-weight: 600; + color: var(--v-fg); overflow: hidden; text-overflow: ellipsis; + } + .entry-body { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-muted); + line-height: 1.5; white-space: pre-wrap; word-break: break-word; + } + + .controls { + display: flex; gap: 8px; margin-top: 8px; + } + .btn-sm { + font-family: var(--f-mono); font-size: 9px; font-weight: 600; + color: var(--v-fg-dim); background: rgba(255,255,255,0.03); + border: 1px solid var(--v-border); border-radius: 5px; + padding: 4px 10px; cursor: pointer; + transition: border-color 0.15s, color 0.15s; + } + .btn-sm:hover { + border-color: var(--v-border-hover); color: var(--v-fg-muted); + } + + .empty-state { + font-family: var(--f-mono); font-size: 10.5px; color: var(--v-fg-dim); + padding: 16px 0; text-align: center; line-height: 1.7; + } + .empty-hint { + font-size: 9.5px; color: var(--v-fg-dim); opacity: 0.7; + } + + .error-text { + font-family: var(--f-mono); font-size: 10px; color: var(--v-danger); + } + + @media (prefers-reduced-motion: reduce) { + * { transition: none !important; } + } + `]; + + @property({ type: String }) workspaceId = ''; + @state() private _data: JournalData | null = null; + @state() private _error = ''; + @state() private _limit = 10; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + } + + updated(changed: Map) { + if (changed.has('workspaceId') && this.workspaceId) { + void this._fetch(); + } + } + + private async _fetch() { + if (!this.workspaceId) return; + try { + const resp = await fetch( + `/api/v1/workspaces/${this.workspaceId}/queen-journal`, + ); + if (!resp.ok) { + this._error = `HTTP ${resp.status}`; + return; + } + this._data = await resp.json() as JournalData; + this._error = ''; + } catch { + this._error = 'Failed to fetch journal'; + } + } + + private _formatTs(iso: string): string { + try { + const d = new Date(iso); + return d.toLocaleString(undefined, { + month: 'short', day: 'numeric', + hour: '2-digit', minute: '2-digit', + }); + } catch { + return iso; + } + } + + private _showMore() { + this._limit += 10; + } + + render() { + if (this._error) { + return html`
${this._error}
`; + } + if (!this._data) { + return html`
Loading journal\u2026
`; + } + + const entries = this._data.entries ?? []; + if (entries.length === 0) { + return html` +
+ No journal entries yet.
+ + The Queen records operational decisions and session boundaries here + as work progresses. + +
+ `; + } + + const visible = entries.slice(0, this._limit); + const hasMore = entries.length > this._limit; + + return html` +
+ ${visible.map(e => html` +
+
+ ${this._formatTs(e.timestamp)} + ${e.heading} +
+ ${e.body ? html` +
${e.body}
+ ` : nothing} +
+ `)} +
+
+ + ${hasMore ? html` + + ` : nothing} +
+ `; + } +} + +declare global { + interface HTMLElementTagNameMap { + 'fc-queen-journal-panel': FcQueenJournalPanel; + } +} diff --git a/frontend/src/components/queen-overrides.ts b/frontend/src/components/queen-overrides.ts new file mode 100644 index 0000000..6443496 --- /dev/null +++ b/frontend/src/components/queen-overrides.ts @@ -0,0 +1,293 @@ +/** + * Wave 74 Team C: Queen behavioral override forms. + * Workspace-scoped overrides injected into Queen context via WorkspaceConfigChanged. + * Mount contract: see Track 1e in wave_74/team_c_prompt.md. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, property, state } from 'lit/decorators.js'; +import { voidTokens, sharedStyles } from '../styles/shared.js'; + +/** Known Queen tool names — derived from queen_tools.py tool_specs(). */ +const QUEEN_TOOLS = [ + 'spawn_colony', 'spawn_parallel', 'approve_config_change', 'get_status', + 'kill_colony', 'list_templates', 'inspect_template', 'inspect_colony', + 'read_workspace_files', 'suggest_config_change', 'redirect_colony', + 'escalate_colony', 'read_colony_output', 'memory_search', + 'write_workspace_file', 'queen_note', 'set_thread_goal', 'complete_thread', + 'archive_thread', 'query_service', 'define_workflow_steps', 'propose_plan', + 'mark_plan_step', 'query_outcomes', 'analyze_colony', 'query_briefing', + 'search_codebase', 'run_command', 'edit_file', 'run_tests', 'delete_file', + 'retry_colony', 'batch_command', 'summarize_thread', 'draft_document', + 'list_addons', 'trigger_addon', 'set_workspace_tags', + 'propose_project_milestone', 'complete_project_milestone', + 'check_autonomy_budget', +] as const; + +@customElement('fc-queen-overrides') +export class FcQueenOverrides extends LitElement { + + static styles = [voidTokens, sharedStyles, css` + :host { display: block; } + .section { + margin-bottom: 16px; padding: 12px; + background: var(--v-recessed); border: 1px solid var(--v-border); + border-radius: 8px; + } + .section-title { + font-size: 10px; font-family: var(--f-mono); font-weight: 700; + color: var(--v-fg-dim); letter-spacing: 0.08em; text-transform: uppercase; + margin: 0 0 8px; + } + .section-desc { + font-size: 10px; color: var(--v-fg-muted); margin: 0 0 8px; line-height: 1.4; + } + .tool-grid { + display: grid; grid-template-columns: repeat(auto-fill, minmax(180px, 1fr)); + gap: 4px 8px; + } + .tool-check { + display: flex; align-items: center; gap: 4px; + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); + cursor: pointer; + } + .tool-check input { accent-color: var(--v-accent, #e8581a); cursor: pointer; } + .tool-check.disabled-tool { color: var(--v-fg-muted); text-decoration: line-through; } + textarea, .json-editor { + width: 100%; box-sizing: border-box; padding: 8px 10px; + background: var(--v-bg); border: 1px solid var(--v-border); + border-radius: 6px; color: var(--v-fg); font-family: var(--f-mono); + font-size: 11px; outline: none; resize: vertical; min-height: 60px; + } + textarea:focus, .json-editor:focus { border-color: rgba(232,88,26,0.3); } + .json-error { + font-size: 9px; color: var(--v-danger, #ef4444); margin-top: 4px; + font-family: var(--f-mono); + } + .save-row { + display: flex; justify-content: flex-end; margin-top: 8px; gap: 6px; + } + .save-btn { + font-size: 10px; font-family: var(--f-mono); padding: 4px 12px; + border-radius: 4px; border: 1px solid var(--v-border); cursor: pointer; + background: var(--v-accent, #e8581a); color: #fff; font-weight: 600; + } + .save-btn:hover { opacity: 0.85; } + .save-btn[disabled] { opacity: 0.4; cursor: not-allowed; } + .disabled-count { + font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-muted); + margin-bottom: 6px; + } + `]; + + @property() workspaceId = ''; + @property({ type: Object }) workspace: any = null; // eslint-disable-line @typescript-eslint/no-explicit-any + + @state() private _disabledTools: string[] = []; + @state() private _customRules = ''; + @state() private _teamCompJson = ''; + @state() private _teamCompError = ''; + @state() private _roundBudgetJson = ''; + @state() private _roundBudgetError = ''; + @state() private _initialized = false; + + override willUpdate() { + if (!this._initialized && this.workspace?.config) { + this._loadFromConfig(this.workspace.config); + this._initialized = true; + } + } + + private _loadFromConfig(cfg: Record) { + // Disabled tools + const dt = cfg['queen.disabled_tools']; + if (dt) { + try { + const parsed = typeof dt === 'string' ? JSON.parse(dt) : dt; + if (Array.isArray(parsed)) this._disabledTools = parsed; + } catch { /* ignore parse errors */ } + } + // Custom rules + const cr = cfg['queen.custom_rules']; + if (cr) { + try { + this._customRules = typeof cr === 'string' + ? (cr.startsWith('"') ? JSON.parse(cr) : cr) : String(cr); + } catch { this._customRules = String(cr); } + } + // Team composition + const tc = cfg['queen.team_composition']; + if (tc) { + try { + const obj = typeof tc === 'string' ? JSON.parse(tc) : tc; + this._teamCompJson = JSON.stringify(obj, null, 2); + } catch { this._teamCompJson = String(tc); } + } + // Round budget + const rb = cfg['queen.round_budget']; + if (rb) { + try { + const obj = typeof rb === 'string' ? JSON.parse(rb) : rb; + this._roundBudgetJson = JSON.stringify(obj, null, 2); + } catch { this._roundBudgetJson = String(rb); } + } + } + + private _emitConfig(field: string, value: string) { + this.dispatchEvent(new CustomEvent('update-config', { + detail: { field, value }, + bubbles: true, + composed: true, + })); + } + + // ── Disabled tools ── + + private _toggleTool(name: string) { + if (this._disabledTools.includes(name)) { + this._disabledTools = this._disabledTools.filter(t => t !== name); + } else { + this._disabledTools = [...this._disabledTools, name]; + } + } + + private _saveDisabledTools() { + this._emitConfig('queen.disabled_tools', JSON.stringify(this._disabledTools)); + } + + // ── Custom rules ── + + private _saveCustomRules() { + this._emitConfig('queen.custom_rules', JSON.stringify(this._customRules)); + } + + // ── Team composition ── + + private _onTeamCompInput(value: string) { + this._teamCompJson = value; + if (!value.trim()) { this._teamCompError = ''; return; } + try { JSON.parse(value); this._teamCompError = ''; } + catch { this._teamCompError = 'Invalid JSON'; } + } + + private _saveTeamComp() { + if (this._teamCompError) return; + this._emitConfig('queen.team_composition', this._teamCompJson.trim() || '{}'); + } + + // ── Round budget ── + + private _onRoundBudgetInput(value: string) { + this._roundBudgetJson = value; + if (!value.trim()) { this._roundBudgetError = ''; return; } + try { JSON.parse(value); this._roundBudgetError = ''; } + catch { this._roundBudgetError = 'Invalid JSON'; } + } + + private _saveRoundBudget() { + if (this._roundBudgetError) return; + this._emitConfig('queen.round_budget', this._roundBudgetJson.trim() || '{}'); + } + + // ── Render ── + + override render() { + return html` + ${this._renderDisabledTools()} + ${this._renderCustomRules()} + ${this._renderTeamComp()} + ${this._renderRoundBudget()} + `; + } + + private _renderDisabledTools() { + const disabled = this._disabledTools; + return html` +
+
Disabled Tools
+

+ Checked tools will require operator confirmation before the Queen can use them. +

+ ${disabled.length > 0 ? html` +
${disabled.length} tool${disabled.length !== 1 ? 's' : ''} disabled
+ ` : nothing} +
+ ${QUEEN_TOOLS.map(name => html` + + `)} +
+
+ +
+
+ `; + } + + private _renderCustomRules() { + return html` +
+
Custom Rules
+

+ Free-text behavioral guidance injected into the Queen's context. + Use for workspace-specific instructions, priorities, or constraints. +

+ +
+ +
+
+ `; + } + + private _renderTeamComp() { + return html` +
+
Team Composition Overrides
+

+ JSON mapping task types to team shapes. Overrides the Queen's default team suggestions. +

+ + ${this._teamCompError ? html`
${this._teamCompError}
` : nothing} +
+ +
+
+ `; + } + + private _renderRoundBudget() { + return html` +
+
Round / Budget Overrides
+

+ JSON mapping complexity tiers to round and budget limits. +

+ + ${this._roundBudgetError ? html`
${this._roundBudgetError}
` : nothing} +
+ +
+
+ `; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-queen-overrides': FcQueenOverrides; } +} diff --git a/frontend/src/components/queen-overview.ts b/frontend/src/components/queen-overview.ts index 637615d..9e64f43 100644 --- a/frontend/src/components/queen-overview.ts +++ b/frontend/src/components/queen-overview.ts @@ -1,37 +1,23 @@ import { LitElement, html, css, nothing } from 'lit'; import { customElement, property, state } from 'lit/decorators.js'; import { voidTokens, sharedStyles } from '../styles/shared.js'; -import { allColonies, colonyName, providerOf, providerColor, formatCost } from '../helpers.js'; -import type { TreeNode, ApprovalRequest, QueenThread, LocalModel, CasteDefinition, Colony, SkillBankStats, CloudEndpoint, RuntimeConfig } from '../types.js'; +import { allColonies, formatCost } from '../helpers.js'; +import type { TreeNode, ApprovalRequest, QueenThread, LocalModel, CasteDefinition, SkillBankStats, CloudEndpoint, RuntimeConfig } from '../types.js'; import './atoms.js'; import './queen-chat.js'; import './approval-queue.js'; import './proactive-briefing.js'; -import './config-memory.js'; import './demo-guide.js'; -import './learning-card.js'; import './budget-panel.js'; - -interface OutcomeSummary { - total_colonies: number; - succeeded: number; - failed: number; - total_cost: number; - total_extracted: number; - total_accessed: number; - avg_quality: number; - maintenance_spend: number; - total_reasoning_tokens?: number; - total_cache_read_tokens?: number; -} - -interface ColonyOutcomeData { - colony_id: string; - quality_score: number; - total_cost: number; - entries_extracted: number; - maintenance_source: string | null; -} +import './project-plan-card.js'; +import './queen-display-board.js'; +import './queen-tool-stats.js'; +import './queen-continuations.js'; +import './queen-autonomy-card.js'; +import './queen-budget-viz.js'; +import './queen-overrides.js'; +import './operating-procedures-editor.js'; +import './queen-journal-panel.js'; @customElement('fc-queen-overview') export class FcQueenOverview extends LitElement { @@ -42,49 +28,7 @@ export class FcQueenOverview extends LitElement { .title-row h1 { font-family: var(--f-display); font-size: 22px; font-weight: 700; color: var(--v-fg); letter-spacing: -0.04em; margin: 0; } .title-icon { font-size: 22px; filter: drop-shadow(0 0 8px var(--v-accent-glow)); } .subtitle { font-size: 11px; color: var(--v-fg-muted); margin: 0 0 14px; } - .section-header { - font-size: 9px; font-family: var(--f-mono); font-weight: 700; - color: var(--v-fg-dim); letter-spacing: 0.12em; text-transform: uppercase; - margin: 16px 0 8px; padding-bottom: 4px; border-bottom: 1px solid var(--v-border); - } - .resource-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 8px; margin-bottom: 14px; } - .health-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 8px; margin-bottom: 16px; } - .ws-section { margin-bottom: 20px; } - .ws-header { display: flex; align-items: center; gap: 6px; margin-bottom: 8px; } - .colony-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; } - .colony-card { padding: 12px; } - .col-header { display: flex; align-items: center; gap: 5px; margin-bottom: 3px; flex-wrap: wrap; } - .col-name { font-family: var(--f-display); font-size: 12px; font-weight: 600; color: var(--v-fg); } - .col-id { font-size: 8px; color: var(--v-fg-dim); margin-bottom: 2px; } - .col-task { font-size: 10px; color: var(--v-fg-muted); margin-bottom: 5px; line-height: 1.35; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } - .col-meta { display: flex; gap: 8px; font-size: 9.5px; font-family: var(--f-mono); color: var(--v-fg-muted); font-feature-settings: 'tnum'; align-items: center; flex-wrap: wrap; } - .progress { height: 2px; background: rgba(255,255,255,0.03); border-radius: 1px; margin-top: 7px; } - .progress-fill { height: 100%; border-radius: 1px; transition: width 0.4s; } - .knowledge-badge { font-size: 8px; font-family: var(--f-mono); padding: 1px 5px; border-radius: 6px; background: rgba(163,130,250,0.15); color: var(--v-purple); font-weight: 600; } - .outcome-badge { - font-size: 8px; font-family: var(--f-mono); padding: 1px 5px; border-radius: 6px; font-weight: 600; - } - .outcome-quality { background: rgba(45,212,168,0.12); color: var(--v-success); } - .outcome-spinning { background: rgba(245,183,49,0.12); color: var(--v-warn); } - .outcome-cost { background: rgba(232,88,26,0.08); color: var(--v-accent); } - .provider-dots { display: flex; gap: 2px; margin-left: auto; align-items: center; } - .provider-dot { width: 5px; height: 5px; border-radius: 50%; flex-shrink: 0; } - .posture-card { padding: 12px; display: flex; flex-direction: column; gap: 4px; } - .posture-label { font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-dim); text-transform: uppercase; letter-spacing: 0.5px; } - .posture-value { font-size: 13px; font-family: var(--f-mono); color: var(--v-fg); font-weight: 600; } - .posture-detail { font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-muted); } - .posture-autonomy { display: inline-block; padding: 1px 6px; border-radius: 4px; font-size: 9px; font-family: var(--f-mono); font-weight: 600; text-transform: uppercase; } - .autonomy-suggest { background: rgba(91,156,245,0.12); color: var(--v-blue); } - .autonomy-auto_notify { background: rgba(245,183,49,0.12); color: var(--v-warn); } - .autonomy-autonomous { background: rgba(45,212,168,0.12); color: var(--v-success); } - .service-section { margin-bottom: 20px; } - .service-card { padding: 12px; border-color: rgba(34,211,238,0.12); } - .service-card:hover { border-color: rgba(34,211,238,0.25); } - .service-icon { font-size: 10px; color: var(--v-service); } - .service-type-label { - font-size: 8px; font-family: var(--f-mono); font-weight: 700; color: var(--v-service); - letter-spacing: 0.1em; text-transform: uppercase; - } + .health-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-bottom: 16px; } .chat-column { width: 320px; flex-shrink: 0; min-height: 0; overflow: hidden; display: flex; flex-direction: column; gap: 8px; } .chat-column.expanded { width: min(720px, 100%); } .chat-actions { display: flex; justify-content: flex-end; } @@ -132,7 +76,6 @@ export class FcQueenOverview extends LitElement { background: rgba(91,156,245,0.12); color: var(--v-blue); white-space: nowrap; } - .recent-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-bottom: 16px; } /* Wave 49: compact status header above chat in chat-first mode */ .compact-header { display: flex; align-items: center; gap: 10px; padding: 8px 12px; @@ -160,22 +103,12 @@ export class FcQueenOverview extends LitElement { @property({ type: Object }) runtimeConfig: RuntimeConfig | null = null; /** Wave 60.5: dashboard-first default. Chat available on demand. */ @state() private chatExpanded = false; - @state() private _fedPeers: { instanceId: string; trustScore: number; lastSync: string; eventsPending: number }[] = []; - @state() private _fedLoaded = false; - private _fedWorkspaceId = ''; - @state() private _outcomeSummary: OutcomeSummary | null = null; - @state() private _outcomeMap: Map = new Map(); - @state() private _outcomesFailed = false; - @state() private _fedFailed = false; - private _outcomesWorkspaceId = ''; render() { const cols = allColonies(this.tree); const running = cols.filter(c => c.status === 'running'); - const completed = cols.filter(c => c.status === 'completed'); const totalCost = cols.reduce((a, c) => a + ((c as any).cost ?? 0), 0); const totalTok = cols.reduce((a, c) => a + ((c as any).agents ?? []).reduce((b: number, ag: any) => b + (ag.tokens ?? 0), 0), 0); - const recentCompleted = completed.slice(-6).reverse(); return html`
@@ -195,7 +128,12 @@ export class FcQueenOverview extends LitElement { ` : nothing}

- + + + + @@ -207,9 +145,12 @@ export class FcQueenOverview extends LitElement { > ` : nothing} - + + + + ${this.approvals.length > 0 ? html` this.re('approve', e.detail)} @@ -218,56 +159,26 @@ export class FcQueenOverview extends LitElement { ${this._renderActivePlans()} - +
- ${this._renderKnowledgePulse()} - ${this._renderMaintenancePosture()} - ${this._renderFederationSummary()} + +
- - + + - - - - ${this._renderServiceColonies(cols)} + +
+ + +
- - ${running.length > 0 ? html` -
\u25B6 Running
- ` : nothing} - ${cols.length === 0 ? html` -
-
\u265B
-
Ready to orchestrate
-
Describe a task below, or pick a template to spawn your first colony.
- this.re('spawn-colony-request', null)}>+ Spawn Colony -
- ` : this.tree.map(ws => { - const wsCols = allColonies([ws]).filter(c => c.status === 'running'); - if (wsCols.length === 0) return nothing; - return html` -
-
-
\u25A3 ${ws.name}
- ${(ws as any).config?.strategy ?? 'stigmergic'} -
-
- ${wsCols.map(c => this.renderColonyCard(c as Colony))} -
-
`; - })} + + - - ${recentCompleted.length > 0 ? html` -
\u2713 Recent Completions
-
- ${recentCompleted.map(c => this.renderColonyCard(c as Colony))} -
- ` : nothing} + +
@@ -281,85 +192,6 @@ export class FcQueenOverview extends LitElement {
`; } - private renderColonyCard(c: Colony) { - const sk = c.skillsExtracted ?? 0; - const convHistory = (c as any).convergenceHistory as number[] | undefined; - const agents = c.agents ?? []; - const providers = [...new Set(agents.map(a => providerOf(a.model)))]; - const outcome = this._outcomeMap.get(c.id); - const prod = c.productiveCalls ?? 0; - const obs = c.observationCalls ?? 0; - const totalCalls = prod + obs; - const accessed = c.entriesAccessed ?? 0; - return html` -
this.re('navigate', c.id)}> -
- - ${colonyName(c)} - 0 ? c.qualityScore : null}> - ${this._completionPill(c)} - ${totalCalls > 0 ? html`${prod}/${totalCalls} prod` : nothing} - ${accessed > 0 ? html`\u2726 assisted` : nothing} - ${sk > 0 ? html`${sk} extracted` : nothing} - ${outcome && c.status === 'completed' ? html` - ${outcome.quality_score > 0 ? html`${(outcome.quality_score * 100).toFixed(0)}%` : nothing} - $${outcome.total_cost.toFixed(2)} - ${outcome.entries_extracted > 0 ? html`${outcome.entries_extracted} entries` : nothing} - ` : nothing} -
- ${c.displayName ? html`
${c.id}
` : nothing} - ${c.task ? html`
${c.task}
` : nothing} -
- R${c.round ?? 0}/${c.maxRounds ?? 0} - ${agents.length} agents - ${c.convergence > 0 ? html` - conv ${(c.convergence * 100).toFixed(0)}% - - ` : nothing} - $${((c as any).cost ?? 0).toFixed(2)} - - ${providers.map(p => html``)} - -
- ${c.maxRounds > 0 && c.round > 0 ? html` -
- ` : nothing} -
`; - } - - private _renderServiceColonies(cols: Colony[]) { - const services = cols.filter(c => (c as Colony & { serviceType?: string }).serviceType != null); - if (services.length === 0) return nothing; - return html` -
-
-
\u25C6 Service Colonies
- ${services.length} active -
-
- ${services.map(c => this._renderServiceCard(c))} -
-
`; - } - - private _renderServiceCard(c: Colony) { - const sType = (c as Colony & { serviceType?: string }).serviceType ?? 'service'; - return html` -
this.re('navigate', c.id)}> -
- \u25C6 - ${colonyName(c)} - ${sType} -
- ${c.task ? html`
${c.task}
` : nothing} -
- ${sType} - ${(c.agents ?? []).length} agents idle - \u25C6 ready -
-
`; - } - private get activeWorkspaceId(): string { return this.tree[0]?.id ?? ''; } @@ -372,139 +204,7 @@ export class FcQueenOverview extends LitElement { private get _runningColonies(): { id: string; name: string }[] { return allColonies(this.tree) .filter(c => c.status === 'running') - .map(c => ({ id: c.id, name: colonyName(c) })); - } - - override updated(changed: Map) { - if (changed.has('tree')) { - const wsId = this.activeWorkspaceId; - if (wsId && wsId !== this._fedWorkspaceId) { - this._fedWorkspaceId = wsId; - void this._fetchFederation(wsId); - } - if (wsId && wsId !== this._outcomesWorkspaceId) { - this._outcomesWorkspaceId = wsId; - void this._fetchOutcomes(wsId); - } - } - } - - private async _fetchFederation(wsId: string) { - this._fedFailed = false; - try { - const res = await fetch(`/api/v1/federation/status?workspace=${encodeURIComponent(wsId)}`); - if (res.ok) { - const data = await res.json(); - this._fedPeers = (data.peers ?? []).map((p: Record) => ({ - instanceId: p.instance_id as string ?? '', - trustScore: p.trust_score as number ?? 0, - lastSync: p.last_sync as string ?? '', - eventsPending: p.events_pending as number ?? 0, - })); - } else { - this._fedFailed = true; - } - } catch { this._fedFailed = true; } - this._fedLoaded = true; - } - - private async _fetchOutcomes(wsId: string) { - this._outcomesFailed = false; - try { - const res = await fetch(`/api/v1/workspaces/${encodeURIComponent(wsId)}/outcomes?period=24h`); - if (res.ok) { - const data = await res.json(); - this._outcomeSummary = data.summary as OutcomeSummary; - const map = new Map(); - for (const o of data.outcomes ?? []) { - map.set(o.colony_id, { - colony_id: o.colony_id, - quality_score: o.quality_score ?? 0, - total_cost: o.total_cost ?? 0, - entries_extracted: o.entries_extracted ?? 0, - maintenance_source: o.maintenance_source ?? null, - }); - } - this._outcomeMap = map; - } else { - this._outcomesFailed = true; - } - } catch { this._outcomesFailed = true; } - } - - private _getMaintenancePolicy(): { autonomyLevel: string; maxColonies: number; dailyBudget: number } { - const ws = this.tree[0] as any; - const raw = ws?.config?.maintenance_policy; - if (!raw) return { autonomyLevel: 'suggest', maxColonies: 2, dailyBudget: 1.0 }; - const parsed = typeof raw === 'string' ? JSON.parse(raw) : raw; - return { - autonomyLevel: parsed.autonomy_level ?? 'suggest', - maxColonies: parsed.max_maintenance_colonies ?? 2, - dailyBudget: parsed.daily_maintenance_budget ?? 1.0, - }; - } - - private _renderKnowledgePulse() { - const s = this._outcomeSummary; - const kb = this.skillBankStats; - return html` -
- Knowledge Pulse - ${kb.total} entries - - conf ${(kb.avgConfidence * 100).toFixed(0)}%${s ? html` - \u00B7 ${s.total_extracted} extracted - \u00B7 quality ${(s.avg_quality * 100).toFixed(0)}%` : this._outcomesFailed ? html` - \u00B7 outcomes unavailable` : nothing} - -
`; - } - - private _renderMaintenancePosture() { - const policy = this._getMaintenancePolicy(); - const cols = allColonies(this.tree); - const maintenanceCols = cols.filter(c => ((c as any).tags ?? []).includes?.('maintenance') || (c.task ?? '').toLowerCase().includes('maintenance')); - const activeCount = maintenanceCols.filter(c => c.status === 'running').length; - const levelClass = `autonomy-${policy.autonomyLevel}`; - const spent = this._outcomeSummary?.maintenance_spend ?? 0; - return html` -
- Maintenance -
- ${policy.autonomyLevel.replace('_', ' ')} -
- ${activeCount} active \u00B7 $${spent.toFixed(2)}/$${policy.dailyBudget.toFixed(2)} -
`; - } - - private _renderFederationSummary() { - if (!this._fedLoaded) { - return html`
- Federation - Loading\u2026 -
`; - } - if (this._fedFailed) { - return html`
- Federation - Unavailable -
`; - } - if (this._fedPeers.length === 0) { - return html`
- Federation - No peers configured -
`; - } - const avgTrust = this._fedPeers.reduce((s, p) => s + p.trustScore, 0) / this._fedPeers.length; - const totalPending = this._fedPeers.reduce((s, p) => s + p.eventsPending, 0); - const lastSync = this._fedPeers.reduce((latest, p) => p.lastSync > latest ? p.lastSync : latest, ''); - const syncLabel = lastSync ? new Date(lastSync).toLocaleTimeString() : '\u2014'; - return html`
- Federation - ${this._fedPeers.length} peer${this._fedPeers.length !== 1 ? 's' : ''} - trust ${(avgTrust * 100).toFixed(0)}% \u00B7 ${totalPending} pending \u00B7 sync ${syncLabel} -
`; + .map(c => ({ id: c.id, name: c.name })); } /** Render compact Active Plans summary from queenThreads with parallel plans. */ @@ -569,41 +269,6 @@ export class FcQueenOverview extends LitElement { `; } - /** Wave 39 1B: tri-state completion pill for colony cards */ - private _completionPill(c: Colony) { - if (c.status === 'completed') { - if ((c as any).validatorVerdict === 'pass') { - return html`\u2713 validated`; - } - return html`\u25CB unvalidated`; - } - if (c.status === 'failed' || c.status === 'killed') { - return html`\u25A0 stalled`; - } - return nothing; - } - - private _budgetColor(cost: number, limit: number): string { - if (limit <= 0) return 'var(--v-fg-dim)'; - const remaining = (limit - cost) / limit; - if (remaining >= 0.70) return 'var(--v-success)'; - if (remaining >= 0.30) return 'var(--v-warn)'; - if (remaining >= 0.10) return 'var(--v-accent)'; - return 'var(--v-danger)'; - } - - private _renderProviderCard(label: string, endpoint: CloudEndpoint | undefined, color: string) { - if (!endpoint || endpoint.status !== 'connected' || endpoint.limit <= 0) { - return html` -
${label}
-
- ${endpoint?.status === 'no_key' ? 'No key configured' : 'Not active'} -
- `; - } - return html``; - } - /** Wave 49: compact always-visible status header for chat-first mode. */ private _renderCompactHeader(_cols: TreeNode[], running: TreeNode[], totalCost: number) { const plansCount = this.queenThreads.filter(qt => diff --git a/frontend/src/components/queen-tool-stats.ts b/frontend/src/components/queen-tool-stats.ts new file mode 100644 index 0000000..900666d --- /dev/null +++ b/frontend/src/components/queen-tool-stats.ts @@ -0,0 +1,111 @@ +/** + * Wave 74 Track 4d: Queen tool usage stats. + * Compact table showing session tool call counts from + * GET /api/v1/queen-tool-stats. + */ +import { LitElement, html, css, nothing } from 'lit'; +import { customElement, state } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; + +interface ToolStat { + name: string; + count: number; + last_status: string; +} + +@customElement('fc-queen-tool-stats') +export class FcQueenToolStats extends LitElement { + static styles = [voidTokens, css` + :host { display: block; } + + .header { + display: flex; align-items: center; gap: 6px; margin-bottom: 6px; + } + .title { + font-size: 9px; font-family: var(--f-mono); font-weight: 700; + color: var(--v-fg-dim); letter-spacing: 0.12em; text-transform: uppercase; + } + .total { + font-size: 8px; font-family: var(--f-mono); color: var(--v-fg-dim); + padding: 1px 5px; border-radius: 4px; + background: rgba(255,255,255,0.04); + } + .refresh-btn { + margin-left: auto; + font-family: var(--f-mono); font-size: 8px; font-weight: 600; + color: var(--v-fg-dim); background: none; border: none; + cursor: pointer; padding: 2px 6px; border-radius: 3px; + transition: color 0.15s; + } + .refresh-btn:hover { color: var(--v-fg-muted); } + + .tool-grid { + display: grid; grid-template-columns: 1fr auto; + gap: 1px 12px; align-items: center; + } + + .tool-name { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-muted); + overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + + .tool-count { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg); + font-weight: 600; font-feature-settings: 'tnum'; + text-align: right; + } + + .empty { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-dim); + padding: 6px 0; + } + + @media (prefers-reduced-motion: reduce) { + * { transition: none !important; } + } + `]; + + @state() private _stats: ToolStat[] = []; + @state() private _loaded = false; + + connectedCallback() { + super.connectedCallback(); + void this._fetch(); + } + + private async _fetch() { + try { + const resp = await fetch('/api/v1/queen-tool-stats'); + if (!resp.ok) return; + const data = await resp.json(); + this._stats = (data.tools ?? []) as ToolStat[]; + } catch { /* silent */ } + this._loaded = true; + } + + render() { + if (!this._loaded || this._stats.length === 0) return nothing; + + const totalCalls = this._stats.reduce((s, t) => s + t.count, 0); + + return html` +
+ \u2699 Tool Usage + ${totalCalls} calls + +
+
+ ${this._stats.map(t => html` + ${t.name} + ${t.count} + `)} +
+ `; + } +} + +declare global { + interface HTMLElementTagNameMap { + 'fc-queen-tool-stats': FcQueenToolStats; + } +} diff --git a/frontend/src/components/settings-view.ts b/frontend/src/components/settings-view.ts index 8bd2c01..46bd8a3 100644 --- a/frontend/src/components/settings-view.ts +++ b/frontend/src/components/settings-view.ts @@ -1,25 +1,62 @@ +/** + * Wave 72 Track 10A: Settings — writable-first restructure. + * + * Sections: Workspace, Governance, Budgeting & Autonomy, Model Defaults, + * Integrations. Read-only diagnostics collapse into a toggled section. + */ import { LitElement, html, css, nothing } from 'lit'; import { customElement, property, state } from 'lit/decorators.js'; import { voidTokens, sharedStyles } from '../styles/shared.js'; -import type { ProtocolStatus, RuntimeConfig, SkillBankStats, TreeNode } from '../types.js'; +import type { + ProtocolStatus, RuntimeConfig, SkillBankStats, TreeNode, + AddonSummary, ModelRegistryEntry, +} from '../types.js'; import type { RetrievalTiming, RetrievalCounts } from './retrieval-diagnostics.js'; import './atoms.js'; import './retrieval-diagnostics.js'; +import './system-overview.js'; +import './autonomy-card.js'; +import './mcp-servers-card.js'; + +interface MaintenancePolicyData { + autonomy_level: string; + auto_actions: string[]; + max_maintenance_colonies: number; + daily_maintenance_budget: number; +} @customElement('fc-settings-view') export class FcSettingsView extends LitElement { static styles = [voidTokens, sharedStyles, css` - :host { display: block; max-width: 580px; overflow: auto; height: 100%; } - h2 { font-family: var(--f-display); font-size: 20px; font-weight: 700; color: var(--v-fg); margin-bottom: 18px; } - .section { margin-bottom: 16px; } - .desc { font-size: 10.5px; font-family: var(--f-mono); color: var(--v-fg-muted); margin-bottom: 8px; } - .strat-desc { font-size: 10.5px; color: var(--v-fg-muted); margin-bottom: 8px; line-height: 1.4; } - .pills { display: flex; gap: 6px; } - .proto-row { display: flex; align-items: center; gap: 8px; padding: 5px 0; border-bottom: 1px solid var(--v-border); } - .proto-name { font-family: var(--f-mono); font-size: 10.5px; font-weight: 600; color: var(--v-fg); width: 50px; } - .proto-desc { font-size: 10px; color: var(--v-fg-muted); } + :host { + display: block; max-width: 640px; overflow-y: auto; height: 100%; + padding: 0 4px 24px; + } + h2 { + font-family: var(--f-display); font-size: 20px; font-weight: 700; + color: var(--v-fg); margin: 0 0 6px; + } + .subtitle { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-dim); + margin-bottom: 14px; + } + + /* --- Card sections --- */ + .settings-card { + background: var(--v-surface); + border: 1px solid var(--v-border); + border-radius: 10px; + padding: 16px 20px; + margin-bottom: 12px; + } + .settings-card h3 { + font-family: var(--f-display); font-size: 13px; font-weight: 600; + color: var(--v-fg); margin: 0 0 12px; + } + + /* --- Shared control styles --- */ .control-row { - display: flex; align-items: center; gap: 10px; padding: 8px 0; + display: flex; align-items: center; gap: 10px; padding: 7px 0; border-bottom: 1px solid var(--v-border); } .control-row:last-child { border-bottom: none; } @@ -27,47 +64,162 @@ export class FcSettingsView extends LitElement { font-family: var(--f-mono); font-size: 10.5px; font-weight: 600; color: var(--v-fg); flex: 1; } - .control-hint { font-size: 9px; color: var(--v-fg-dim); font-family: var(--f-mono); } + .control-hint { + font-size: 9px; color: var(--v-fg-dim); font-family: var(--f-mono); + } .control-row select, .control-row input { - padding: 4px 8px; background: var(--v-surface); border: 1px solid var(--v-border); - border-radius: 6px; color: var(--v-fg); font-family: var(--f-mono); font-size: 11px; + padding: 4px 8px; background: var(--v-surface); + border: 1px solid var(--v-border); border-radius: 6px; + color: var(--v-fg); font-family: var(--f-mono); font-size: 11px; outline: none; transition: border-color 0.15s; } - .control-row select:focus, .control-row input:focus { border-color: rgba(232,88,26,0.3); } + .control-row select:focus, .control-row input:focus { + border-color: rgba(232,88,26,0.3); + } .control-row input[type="number"] { width: 80px; text-align: right; } - .control-row input[type="range"] { width: 120px; accent-color: var(--v-accent); } - .save-row { display: flex; justify-content: flex-end; margin-top: 8px; gap: 6px; align-items: center; } - .save-msg { font-size: 9px; font-family: var(--f-mono); color: var(--v-success); } + .control-row input[type="range"] { + width: 120px; accent-color: var(--v-accent); + } + + /* --- Inline save indicator --- */ + .save-indicator { + color: var(--v-success); font-size: 12px; + opacity: 0; transition: opacity 0.15s; + margin-left: 6px; + } + .save-indicator[visible] { opacity: 1; } + + /* --- Validation error --- */ + .field-error { + font-size: 10px; font-family: var(--f-mono); + color: var(--v-danger); margin-top: 2px; + } + + /* --- Read-only label --- */ + .read-only-label { + font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-dim); + font-style: italic; margin-top: 4px; + } + + /* --- Tag pills --- */ + .tag-pills { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 4px; } + .tag-pill { + padding: 2px 8px; border-radius: 999px; font-size: 10px; + font-family: var(--f-mono); background: var(--v-accent-muted); + color: var(--v-fg-muted); border: 1px solid var(--v-border); + } + + /* --- Caste grid --- */ + .caste-grid { + display: grid; grid-template-columns: repeat(5, 1fr); gap: 8px; + margin-top: 8px; + } + .caste-cell { + text-align: center; padding: 8px 4px; border-radius: 8px; + border: 1px solid var(--v-border); background: var(--v-recessed); + } + .caste-name { + font-size: 9px; font-family: var(--f-mono); font-weight: 600; + color: var(--v-fg-dim); text-transform: uppercase; + letter-spacing: 0.06em; margin-bottom: 4px; + } + .caste-model { + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-muted); + word-break: break-all; + } + + /* --- Empty text --- */ + .empty-text { + font-size: 10.5px; font-family: var(--f-mono); + color: var(--v-fg-dim); font-style: italic; + } + + /* --- Diagnostics toggle --- */ + .diag-toggle { + font-family: var(--f-mono); font-size: 10px; color: var(--v-fg-dim); + cursor: pointer; padding: 8px 0; display: flex; align-items: center; gap: 6px; + transition: color 0.15s; + } + .diag-toggle:hover { color: var(--v-fg-muted); } + + /* --- Protocol row --- */ + .proto-row { + display: flex; align-items: center; gap: 8px; padding: 5px 0; + border-bottom: 1px solid var(--v-border); + } + .proto-row:last-child { border-bottom: none; } + .proto-name { + font-family: var(--f-mono); font-size: 10.5px; font-weight: 600; + color: var(--v-fg); width: 50px; + } + .proto-detail { + font-size: 10px; font-family: var(--f-mono); color: var(--v-fg-dim); + margin-left: 4px; + } + .proto-desc { font-size: 10px; color: var(--v-fg-muted); } + + /* --- Addon summary --- */ + .addon-row { + display: flex; align-items: center; gap: 8px; padding: 6px 0; + border-bottom: 1px solid var(--v-border); + } + .addon-row:last-child { border-bottom: none; } + .addon-name { + font-size: 11px; font-family: var(--f-mono); font-weight: 600; + color: var(--v-fg); + } + .addon-desc { + font-size: 10px; color: var(--v-fg-dim); flex: 1; + } + .addon-meta { + font-size: 9px; font-family: var(--f-mono); color: var(--v-fg-dim); + } + + @media (prefers-reduced-motion: reduce) { + *, *::before, *::after { transition: none !important; animation: none !important; } + } `]; @property({ type: Object }) protocolStatus: ProtocolStatus | null = null; @property({ type: Object }) runtimeConfig: RuntimeConfig | null = null; @property({ type: Object }) skillBankStats: SkillBankStats | null = null; @property({ type: Array }) tree: TreeNode[] = []; + @property({ type: Array }) addons: AddonSummary[] = []; - private _snapshotTime = Date.now(); @state() private _editStrategy: 'stigmergic' | 'sequential' = 'stigmergic'; @state() private _editMaxRounds = 25; @state() private _editBudget = 1.0; @state() private _editConvergence = 0.95; - @state() private _editAutonomy: 'suggest' | 'auto_notify' | 'autonomous' = 'suggest'; - @state() private _saving = false; - @state() private _saveMsg = ''; - @state() private _controlsDirty = false; + @state() private _savedFields = new Set(); + @state() private _fieldErrors = new Map(); @state() private _diagTiming: RetrievalTiming | null = null; @state() private _diagCounts: RetrievalCounts | null = null; @state() private _diagEmbedModel = ''; @state() private _diagEmbedDim = 0; @state() private _diagSearchMode = ''; + @state() private _knowledgeTotal = 0; + @state() private _showDiagnostics = false; + + // Maintenance policy state + @state() private _policyLoaded = false; + @state() private _policyLevel = 'suggest'; + @state() private _policyBudget = 1.0; + @state() private _policyMaxColonies = 2; + @state() private _policySaving = false; + @state() private _policySaved = false; + + private _saveTimeouts = new Map(); connectedCallback() { super.connectedCallback(); void this._fetchDiagnostics(); + void this._fetchKnowledgeSummary(); + void this._fetchMaintenancePolicy(); this._syncFromConfig(); } updated(changed: Map) { - if (changed.has('runtimeConfig') && !this._controlsDirty) { + if (changed.has('runtimeConfig')) { this._syncFromConfig(); } } @@ -89,34 +241,127 @@ export class FcSettingsView extends LitElement { return this.tree[0]?.id ?? ''; } - private async _saveSettings() { + // --- Instant save per field --- + + private _onControlChange(field: string, value: unknown) { + const error = this._validate(field, value); + if (error) { + const next = new Map(this._fieldErrors); + next.set(field, error); + this._fieldErrors = next; + return; + } + if (this._fieldErrors.has(field)) { + const next = new Map(this._fieldErrors); + next.delete(field); + this._fieldErrors = next; + } + const existing = this._saveTimeouts.get(field); + if (existing) clearTimeout(existing); + this._saveTimeouts.set(field, window.setTimeout(() => { + void this._saveField(field, value); + }, 500)); + } + + private _validate(field: string, value: unknown): string | null { + const num = Number(value); + switch (field) { + case 'governance.default_budget_per_colony': + if (isNaN(num) || num <= 0) return 'Must be a positive number'; + break; + case 'governance.max_rounds_per_colony': + if (isNaN(num) || num < 1 || num > 50) return 'Must be 1\u201350'; + break; + case 'governance.convergence_threshold': + if (isNaN(num) || num < 0.80 || num > 1.00) return '0.80\u20131.00'; + break; + } + return null; + } + + private async _saveField(field: string, value: unknown) { const wsId = this._workspaceId; if (!wsId) return; - this._saving = true; - this._saveMsg = ''; try { - const changes = [ - { dimension: 'governance.max_rounds_per_colony', original: {}, overridden: { value: this._editMaxRounds }, reason: 'operator settings panel' }, - { dimension: 'governance.default_budget_per_colony', original: {}, overridden: { value: this._editBudget }, reason: 'operator settings panel' }, - { dimension: 'governance.convergence_threshold', original: {}, overridden: { value: this._editConvergence }, reason: 'operator settings panel' }, - { dimension: 'routing.default_strategy', original: {}, overridden: { value: this._editStrategy }, reason: 'operator settings panel' }, - ]; - for (const change of changes) { - await fetch(`/api/v1/workspaces/${encodeURIComponent(wsId)}/config-overrides`, { + await fetch( + `/api/v1/workspaces/${encodeURIComponent(wsId)}/config-overrides`, + { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(change), - }); + body: JSON.stringify({ + dimension: field, + original: {}, + overridden: { value }, + reason: 'operator settings panel', + }), + }, + ); + this._showSaveIndicator(field); + } catch { + // Save errors are silent + } + } + + private _showSaveIndicator(field: string) { + const next = new Set(this._savedFields); + next.add(field); + this._savedFields = next; + setTimeout(() => { + const rm = new Set(this._savedFields); + rm.delete(field); + this._savedFields = rm; + }, 1500); + } + + // --- Maintenance policy --- + + private async _fetchMaintenancePolicy() { + const wsId = this._workspaceId; + if (!wsId) return; + try { + const resp = await fetch( + `/api/v1/workspaces/${encodeURIComponent(wsId)}/maintenance-policy`, + ); + if (!resp.ok) return; + const data = await resp.json() as MaintenancePolicyData; + this._policyLevel = data.autonomy_level ?? 'suggest'; + this._policyBudget = data.daily_maintenance_budget ?? 1.0; + this._policyMaxColonies = data.max_maintenance_colonies ?? 2; + this._policyLoaded = true; + } catch { + // best-effort + } + } + + private async _saveMaintenancePolicy() { + const wsId = this._workspaceId; + if (!wsId) return; + this._policySaving = true; + try { + const resp = await fetch( + `/api/v1/workspaces/${encodeURIComponent(wsId)}/maintenance-policy`, + { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + autonomy_level: this._policyLevel, + daily_maintenance_budget: this._policyBudget, + max_maintenance_colonies: this._policyMaxColonies, + }), + }, + ); + if (resp.ok) { + this._policySaved = true; + setTimeout(() => { this._policySaved = false; }, 2000); } - this._saveMsg = 'Saved'; - this._controlsDirty = false; - setTimeout(() => { this._saveMsg = ''; }, 2000); } catch { - this._saveMsg = 'Error saving'; + // best-effort } - this._saving = false; + this._policySaving = false; } + // --- Data fetching --- + private async _fetchDiagnostics() { try { const resp = await fetch('/api/v1/retrieval-diagnostics'); @@ -124,12 +369,8 @@ export class FcSettingsView extends LitElement { const data = await resp.json(); const t = data.timing ?? {}; this._diagTiming = { - embedMs: 0, - denseMs: t.vectorMs ?? 0, - bm25Ms: 0, - graphMs: t.graphMs ?? 0, - fusionMs: 0, - totalMs: t.totalMs ?? 0, + embedMs: 0, denseMs: t.vectorMs ?? 0, bm25Ms: 0, + graphMs: t.graphMs ?? 0, fusionMs: 0, totalMs: t.totalMs ?? 0, }; this._diagCounts = data.counts ?? null; const emb = data.embedding ?? {}; @@ -137,124 +378,403 @@ export class FcSettingsView extends LitElement { this._diagEmbedDim = emb.dimensions ?? 0; this._diagSearchMode = data.searchMode ?? ''; } catch { - // Diagnostics are best-effort; fail silently + // best-effort } } - render() { - const strategy = this.runtimeConfig?.routing?.defaultStrategy ?? 'stigmergic'; - const aguiStatus = this.protocolStatus?.agui?.status ?? 'inactive'; - const a2aStatus = this.protocolStatus?.a2a?.status ?? 'inactive'; - const mcpProto = this.protocolStatus?.mcp; - const aguiProto = this.protocolStatus?.agui; - const a2aProto = this.protocolStatus?.a2a; - const mcpDesc = mcpProto?.transport - ? `${mcpProto.transport} \u00B7 ${mcpProto.endpoint ?? '/mcp'} \u00B7 ${mcpProto.tools ?? 0} tools` - : `${mcpProto?.tools ?? 0} tools exposed`; - const aguiDesc = aguiStatus === 'active' - ? `SSE ${aguiProto?.semantics ?? ''} \u00B7 ${aguiProto?.endpoint ?? ''} \u00B7 ${aguiProto?.events ?? 0} event types` - : 'Inactive'; - const a2aDesc = a2aStatus === 'inactive' - ? (a2aProto?.note ?? 'Inactive') - : `REST ${a2aProto?.semantics ?? 'poll/result'} \u00B7 ${a2aProto?.endpoint ?? '/a2a/tasks'}`; - const protocols = [ - { n: 'MCP', d: mcpDesc, s: mcpProto?.status ?? 'inactive' }, - { n: 'AG-UI', d: aguiDesc, s: aguiStatus }, - { n: 'A2A', d: a2aDesc, s: a2aStatus }, - ]; + private async _fetchKnowledgeSummary() { + try { + const wsId = this._workspaceId; + if (!wsId) return; + const resp = await fetch( + `/api/v1/workspaces/${encodeURIComponent(wsId)}/knowledge?limit=1`, + ); + if (!resp.ok) return; + const data = await resp.json(); + this._knowledgeTotal = data.total ?? 0; + } catch { + // best-effort + } + } + + // --- Helpers --- + private get _taxonomyTags(): string[] { + const ws = this.tree[0]; + if (!ws) return []; + const cfg = (ws as unknown as { config?: Record }).config; + const raw = cfg?.taxonomy_tags; + if (!raw) return []; + try { return JSON.parse(raw); } catch { return []; } + } + + private get _casteDefaults(): Record { + const d = this.runtimeConfig?.models?.defaults; + if (!d) return {}; + return { + queen: d.queen, coder: d.coder, reviewer: d.reviewer, + researcher: d.researcher, archivist: d.archivist, + }; + } + + // --- Render --- + + render() { return html`

Settings

+
Operator controls and workspace configuration
-
- -
+ ${this._renderIdentityCard()} + ${this._renderGovernanceCard()} + ${this._renderBudgetingCard()} + ${this._renderModelDefaultsCard()} + ${this._renderIntegrationsCard()} + ${this._renderDiagnosticsToggle()} + `; + } -
-
Event Store
-
-
Single SQLite \u00B7 WAL mode \u00B7 append-only
+ // --- Card 1: Workspace Identity --- + + private _renderIdentityCard() { + const wsName = this.tree[0]?.name ?? 'No workspace'; + const tags = this._taxonomyTags; + + return html` +
+

Workspace

+
+
+
${wsName}
+
Workspace name
+
+
+
+
Tags
+ ${tags.length > 0 + ? html`
+ ${tags.map(t => html`${t}`)} +
` + : html`
No tags yet
` + } +
Set via Queen \u00B7 set_workspace_tags
+ `; + } + + // --- Card 2: Colony Governance (editable, instant save) --- + + private _renderGovernanceCard() { + return html` +
+

Governance

+
+
+
Default Strategy
+
Coordination mode for new colonies
+
+ + \u2713 +
-
-
Colony Governance
-
-
-
-
Default Strategy
-
Coordination mode for new colonies
-
- { + const v = parseInt( + (e.target as HTMLInputElement).value, 10, + ) || 25; + this._editMaxRounds = v; + this._onControlChange( + 'governance.max_rounds_per_colony', v, + ); }}> - - - + \u2713 +
+ +
+
+
Default Budget per Colony
+
USD spend cap per colony
+ ${this._fieldErrors.has('governance.default_budget_per_colony') + ? html`
+ ${this._fieldErrors.get('governance.default_budget_per_colony')} +
` : nothing}
-
-
-
Max Rounds per Colony
-
Hard cap on iteration rounds
-
- { - this._editMaxRounds = Math.max(1, Math.min(50, parseInt((e.target as HTMLInputElement).value, 10) || 25)); - this._controlsDirty = true; + { + const v = parseFloat( + (e.target as HTMLInputElement).value, + ) || 1.0; + this._editBudget = v; + this._onControlChange( + 'governance.default_budget_per_colony', v, + ); }}> + \u2713 +
+ +
+
+
Convergence Threshold
+
${this._editConvergence.toFixed(2)}
+ ${this._fieldErrors.has('governance.convergence_threshold') + ? html`
+ ${this._fieldErrors.get('governance.convergence_threshold')} +
` : nothing}
-
-
-
Default Budget per Colony
-
USD spend cap per colony
-
- { - this._editBudget = Math.max(0.01, parseFloat((e.target as HTMLInputElement).value) || 1.0); - this._controlsDirty = true; + { + const v = parseFloat( + (e.target as HTMLInputElement).value, + ) || 0.95; + this._editConvergence = v; + this._onControlChange( + 'governance.convergence_threshold', v, + ); }}> + \u2713 +
+
+ `; + } + + // --- Card 3: Budgeting & Autonomy --- + + private _renderBudgetingCard() { + const wsId = this._workspaceId; + if (!wsId) return nothing; + + return html` +
+

Budgeting & Autonomy

+ +
+
+
Autonomy Level
+
How much the system can do on its own
+
+ +
+ +
+
+
Daily Maintenance Budget
+
USD cap for autonomous work per day
-
-
-
Convergence Threshold
-
${this._editConvergence.toFixed(2)}
-
- { - this._editConvergence = parseFloat((e.target as HTMLInputElement).value) || 0.95; - this._controlsDirty = true; + { + this._policyBudget = parseFloat( + (e.target as HTMLInputElement).value, + ) || 1.0; }}> +
+ +
+
+
Max Maintenance Colonies
+
Concurrent maintenance colony limit
- ${this._controlsDirty ? html` -
- ${this._saveMsg ? html`${this._saveMsg}` : nothing} - void this._saveSettings()}> - ${this._saving ? 'Saving\u2026' : 'Save Changes'} - -
` : nothing} + { + this._policyMaxColonies = parseInt( + (e.target as HTMLInputElement).value, 10, + ) || 2; + }}> +
+ +
+ void this._saveMaintenancePolicy()}> + ${this._policySaving ? 'Saving\u2026' : 'Save Policy'} + + ${this._policySaved ? html` + + \u2713 Saved + + ` : nothing} +
+ +
+
+ `; + } + + // --- Card 4: Model Defaults --- -
-
+ private _renderModelDefaultsCard() { + const castes = this._casteDefaults; + + return html` +
+

Model Defaults

+ ${Object.keys(castes).length > 0 ? html` +
+ ${Object.entries(castes).map(([caste, model]) => html` +
+
${caste}
+
${model || '\u2014'}
+
+ `)} +
+
+ Managed by caste recipes. Full admin in the Models tab. +
+ ` : html` +
Model configuration not available
+ `} +
+ `; + } + + // --- Card 5: Integrations --- + + private _renderIntegrationsCard() { + return html` +
+

Integrations

+ + ${this._renderProtocolsSummary()} + ${this._renderAddonsSummary()} +
+ `; + } + + private _renderProtocolsSummary() { + const mcpProto = this.protocolStatus?.mcp; + const aguiProto = this.protocolStatus?.agui; + const a2aProto = this.protocolStatus?.a2a; + + const mcpStatus = mcpProto?.status ?? 'inactive'; + const aguiStatus = aguiProto?.status ?? 'inactive'; + const a2aStatus = a2aProto?.status ?? 'inactive'; + + return html` +
+
Protocols - Snapshot data \u2014 refreshes on reconnect
-
- ${protocols.map(p => html` -
- - ${p.n} - ${p.d} - ${p.s} -
- `)} +
+ + MCP + ${(mcpProto as any)?.tools ?? 0} tools + ${mcpStatus} +
+
+ + AG-UI + ${aguiStatus === 'active' + ? `${(aguiProto as any)?.events ?? 0} events` : ''} + ${aguiStatus} +
+
+ + A2A + ${a2aStatus === 'active' + ? `${(a2aProto as any)?.semantics ?? ''} ${(a2aProto as any)?.endpoint ?? ''}`.trim() + : ''} + ${a2aStatus} +
+
+ `; + } + + private _renderAddonsSummary() { + const addons = this.addons; + if (addons.length === 0) return nothing; + + return html` +
+
+ Addons +
+ ${addons.map(a => html` +
+ + ${a.name} + ${a.description} + + ${a.tools.length} tool${a.tools.length !== 1 ? 's' : ''} + +
+ `)} +
+ `; + } + + // --- Diagnostics (collapsed) --- + + private _renderDiagnosticsToggle() { + return html` +
{ this._showDiagnostics = !this._showDiagnostics; }}> + ${this._showDiagnostics ? '\u25BC' : '\u25B6'} + Diagnostics & System Info +
+ ${this._showDiagnostics ? html` + + +
+

Retrieval Diagnostics

+
-
`; + ` : nothing} + `; } } diff --git a/frontend/src/components/system-overview.ts b/frontend/src/components/system-overview.ts new file mode 100644 index 0000000..ebe096c --- /dev/null +++ b/frontend/src/components/system-overview.ts @@ -0,0 +1,65 @@ +/** + * Wave 69 Track 11: System capability summary header. + * Compact one-line overview rendered at top of settings page. + */ +import { LitElement, html, css } from 'lit'; +import { customElement, property } from 'lit/decorators.js'; +import { voidTokens } from '../styles/shared.js'; +import type { RuntimeConfig, AddonSummary } from '../types.js'; + +@customElement('fc-system-overview') +export class FcSystemOverview extends LitElement { + static styles = [voidTokens, css` + :host { display: block; margin-bottom: 14px; } + .summary { + font-family: var(--f-mono); + font-size: 10.5px; + color: var(--v-fg-muted); + line-height: 1.6; + } + .sep { margin: 0 4px; } + + @media (prefers-reduced-motion: reduce) { + * { transition: none !important; } + } + `]; + + @property({ type: Object }) runtimeConfig: RuntimeConfig | null = null; + @property({ type: Array }) addons: AddonSummary[] = []; + @property({ type: Number }) knowledgeTotal = 0; + @property({ type: Number }) domainCount = 0; + + private get _queenToolCount(): number { + // Hardcoded from caste_recipes.yaml — 43 tools as of Wave 70 + return 43; + } + + private get _addonCount(): number { + return this.addons.length; + } + + private get _providerCount(): number { + const reg = this.runtimeConfig?.models?.registry ?? []; + const providers = new Set(reg.map(m => m.provider)); + return providers.size; + } + + render() { + const parts: string[] = []; + parts.push(`${this._queenToolCount} Queen tools`); + parts.push(`${this._addonCount} addon${this._addonCount !== 1 ? 's' : ''}`); + parts.push(`${this._providerCount} provider${this._providerCount !== 1 ? 's' : ''}`); + if (this.knowledgeTotal > 0) { + parts.push(`${this.knowledgeTotal} knowledge entries`); + } + if (this.domainCount > 0) { + parts.push(`across ${this.domainCount} domains`); + } + + return html`
${parts.join(' \u00B7 ')}
`; + } +} + +declare global { + interface HTMLElementTagNameMap { 'fc-system-overview': FcSystemOverview; } +} diff --git a/frontend/src/components/template-editor.ts b/frontend/src/components/template-editor.ts index 461e1fb..c137dcf 100644 --- a/frontend/src/components/template-editor.ts +++ b/frontend/src/components/template-editor.ts @@ -100,13 +100,14 @@ export class FcTemplateEditor extends LitElement { @property({ type: String }) mode: EditorMode = 'create'; @property({ type: Object }) template: TemplateInfo | null = null; + @property({ type: Object }) governance: { defaultBudgetPerColony: number; maxRoundsPerColony: number } | null = null; @state() private name = ''; @state() private description = ''; @state() private castes: CasteSlot[] = []; @state() private strategy: CoordinationStrategy = 'stigmergic'; - @state() private budgetLimit = 1.0; - @state() private maxRounds = 5; + @state() private budgetLimit = 0; + @state() private maxRounds = 0; @state() private tags: string[] = []; @state() private tagInput = ''; @state() private saving = false; @@ -121,10 +122,22 @@ export class FcTemplateEditor extends LitElement { this._populateFromTemplate(); } + private get _defaultBudget(): number { + return this.governance?.defaultBudgetPerColony ?? 1.0; + } + + private get _defaultMaxRounds(): number { + return this.governance?.maxRoundsPerColony ?? 5; + } + updated(changed: Map) { if (changed.has('template') || changed.has('mode')) { this._populateFromTemplate(); } + if (changed.has('governance') && this.governance) { + if (!this.budgetLimit) this.budgetLimit = this._defaultBudget; + if (!this.maxRounds) this.maxRounds = this._defaultMaxRounds; + } } private _populateFromTemplate() { @@ -135,8 +148,8 @@ export class FcTemplateEditor extends LitElement { this.description = ''; this.castes = [{ caste: 'coder', tier: 'standard', count: 1 }]; this.strategy = 'stigmergic'; - this.budgetLimit = 1.0; - this.maxRounds = 5; + this.budgetLimit = this._defaultBudget; + this.maxRounds = this._defaultMaxRounds; this.tags = []; this._templateId = ''; this._version = 1; @@ -157,8 +170,8 @@ export class FcTemplateEditor extends LitElement { this.description = t.description; this.castes = t.castes.map(c => ({ ...c })); this.strategy = t.strategy; - this.budgetLimit = t.budgetLimit ?? 1.0; - this.maxRounds = t.maxRounds ?? 5; + this.budgetLimit = t.budgetLimit ?? this._defaultBudget; + this.maxRounds = t.maxRounds ?? this._defaultMaxRounds; this.tags = [...(t.tags ?? [])]; this._sourceColonyId = t.sourceColonyId; } diff --git a/frontend/src/components/workspace-browser.ts b/frontend/src/components/workspace-browser.ts index 4d6111c..b2e0c28 100644 --- a/frontend/src/components/workspace-browser.ts +++ b/frontend/src/components/workspace-browser.ts @@ -2,6 +2,9 @@ import { LitElement, html, css, nothing } from 'lit'; import { customElement, state, property } from 'lit/decorators.js'; import { voidTokens, sharedStyles } from '../styles/shared.js'; import './atoms.js'; +import './addon-panel.js'; + +interface AddonPanel { target: string; display_type: string; path: string; addon_name: string; } interface WsFile { name: string; bytes: number; } @@ -152,6 +155,7 @@ export class FcWorkspaceBrowser extends LitElement { `]; @property() workspaceId = ''; + @property({ type: Array }) addonPanels: AddonPanel[] = []; @state() private _files: WsFile[] = []; @state() private _loading = true; @@ -248,6 +252,14 @@ export class FcWorkspaceBrowser extends LitElement {
+ ${this.addonPanels.filter(p => p.target === 'workspace').map(p => html` + + + `)} + ${this._renderProjectContext()} ${this._files.length === 0 diff --git a/frontend/src/components/workspace-config.ts b/frontend/src/components/workspace-config.ts index 448b480..d74ac96 100644 --- a/frontend/src/components/workspace-config.ts +++ b/frontend/src/components/workspace-config.ts @@ -134,7 +134,12 @@ export class FcWorkspaceConfig extends LitElement {
-
Threads
+ ${this._renderColonyCards(cols)} + +
+
Threads
+ this.fire('spawn-colony-request', null)}>+ New Colony +
${(ws.children ?? []).map(th => html`
this.fire('navigate', th.id)}>
@@ -143,7 +148,59 @@ export class FcWorkspaceConfig extends LitElement { ${(th.children ?? []).length} colonies
- `)}`; + `)} + + ${(() => { + const completed = cols.filter(c => c.status === 'completed' || c.status === 'done'); + return completed.length > 0 ? html` +
Recent Completions
+ ${completed.slice(0, 4).map(c => html` +
this.fire('navigate', c.id)}> + ${c.name} \u2014 ${c.status} +
+ `)} + ` : nothing; + })()} + + ${cfg.description ? html` +
Description
+
${cfg.description}
+ ` : nothing} + +
+
this.fire('navigate-tab', 'knowledge')}> +
Knowledge
+
\u2139 Browse
+
+
this.fire('navigate-tab', 'playbook')}> +
Playbook
+
\u25B6 Templates
+
+
this.fire('navigate-tab', 'operations')}> +
Operations
+
\u2699 Manage
+
+
`; + } + + private _renderColonyCards(cols: ReturnType) { + const running = cols.filter(c => c.status === 'running'); + if (running.length === 0) return nothing; + return html` +
Active Colonies
+
+ ${running.map(c => html` +
this.fire('navigate', c.id)}> +
+ + ${c.name} +
+
${c.status} \u00B7 R${(c as any).rounds ?? 0}
+
+ `)} +
+ `; } private _modelOptions(current: string | null): string[] { diff --git a/frontend/src/state/store.ts b/frontend/src/state/store.ts index e3d898b..25bd79b 100644 --- a/frontend/src/state/store.ts +++ b/frontend/src/state/store.ts @@ -8,7 +8,7 @@ import type { TreeNode, MergeEdge, QueenThread, ApprovalRequest, CasteDefinition, LocalModel, CloudEndpoint, ProtocolStatus, RuntimeConfig, OperatorStateSnapshot, WSMessage, WSCommandAction, SkillBankStats, - QueenChatMessage, + QueenChatMessage, AddonSummary, } from '../types.js'; /** @@ -65,6 +65,7 @@ export interface StoreState { castes: CasteDefinition[]; runtimeConfig: RuntimeConfig | null; skillBankStats: SkillBankStats; + addons: AddonSummary[]; memoryStats: MemoryStats; templateStats: LearnedTemplateStats; connection: ConnectionState; @@ -76,7 +77,7 @@ function emptyState(): StoreState { return { tree: [], merges: [], queenThreads: [], approvals: [], protocolStatus: null, localModels: [], cloudEndpoints: [], - castes: [], runtimeConfig: null, + castes: [], runtimeConfig: null, addons: [], skillBankStats: { total: 0, avgConfidence: 0 }, memoryStats: { total: 0, extractedColonies: new Set(), globalPromotions: 0 }, templateStats: { total: 0, learned: 0, operator: 0 }, @@ -135,6 +136,7 @@ class FormicStore { castes: snap.castes, runtimeConfig: snap.runtimeConfig, skillBankStats: snap.skillBankStats ?? { total: 0, avgConfidence: 0 }, + addons: snap.addons ?? [], memoryStats: this._state.memoryStats, templateStats: this._state.templateStats, connection: this._state.connection, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index f7693a9..fbb8908 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -240,6 +240,36 @@ export interface ProposalData { recommendation?: string; } +/** Wave 70.5: Blast radius metadata from proposal action dict. */ +export interface BlastRadiusData { + score: number; + level: 'low' | 'medium' | 'high'; + factors: string[]; + recommendation: 'proceed' | 'notify' | 'escalate'; +} + +/** Wave 70.5: Autonomy status from GET /api/v1/workspaces/{id}/autonomy-status. */ +export interface AutonomyStatusData { + level: string; + score: number; + grade: string; + daily_budget: number; + daily_spend: number; + remaining: number; + active_maintenance_colonies: number; + max_maintenance_colonies: number; + auto_actions: string[]; + components: Record; + recommendation: string; + recent_actions: Array<{ + colony_id: string; + strategy: string; + outcome: string; + cost: number; + quality_score: number; + }>; +} + /** Wave 63: Edit proposal card from Queen's edit_file tool. */ export interface EditProposalMeta { filePath: string; @@ -372,6 +402,7 @@ export interface ModelRegistryEntry { maxOutputTokens: number; timeMultiplier: number; toolCallMultiplier: number; + hidden: boolean; } export interface ModelDefaults { @@ -647,6 +678,56 @@ export interface SkillBankStats { avgConfidence: number; } +export interface AddonToolSummary { + name: string; + description: string; + handler: string; + parameters: Record; + callCount: number; +} + +export interface AddonHandlerSummary { + event: string; + lastFired: string | null; + errorCount: number; +} + +export interface AddonTriggerSummary { + type: string; + schedule: string; + handler: string; + lastFired: string | null; +} + +export interface AddonPanelSummary { + target: string; + displayType: string; + path: string; + addonName: string; +} + +export interface AddonConfigParam { + key: string; + type: 'boolean' | 'string' | 'integer' | 'cron' | 'select'; + default: any; + label: string; + options: string[]; +} + +export interface AddonSummary { + name: string; + version: string; + description: string; + tools: AddonToolSummary[]; + handlers: AddonHandlerSummary[]; + triggers: AddonTriggerSummary[]; + panels: AddonPanelSummary[]; + config: AddonConfigParam[]; + status: 'healthy' | 'degraded' | 'error'; + lastError: string | null; + disabled: boolean; +} + export interface OperatorStateSnapshot { tree: TreeNode[]; merges: MergeEdge[]; @@ -658,6 +739,7 @@ export interface OperatorStateSnapshot { castes: CasteDefinition[]; runtimeConfig: RuntimeConfig; skillBankStats: SkillBankStats; + addons: AddonSummary[]; } export interface SkillEntry { @@ -915,6 +997,60 @@ export interface WorkflowStepCompletedEvent { artifactTypes: string[]; } +// Wave 67.5: provenance chain (append-only audit trail on knowledge entries) +export interface ProvenanceChainItem { + event_type: string; + timestamp: string; + actor_id: string; + detail: string; + confidence_delta: number | null; +} + +export interface ProvenanceResponse { + entry_id: string; + chain: ProvenanceChainItem[]; + total: number; +} + +// Wave 69: Consulted knowledge entry metadata on Queen messages +export interface ConsultedEntry { + id: string; + title: string; + confidence: number; +} + +// Wave 69: Parsed plan step from thread plan file +export interface PlanStep { + index: number; + status: string; + description: string; + colony_id?: string; + note?: string; +} + +export interface ThreadPlan { + exists: boolean; + title?: string; + approach?: string; + steps?: PlanStep[]; +} + +/** Wave 69: unified search result from /workspaces/{id}/search endpoint. */ +export interface UnifiedSearchResult { + source: string; // 'memory' | 'codebase-index' | 'docs-index' + source_label: string; // human-readable source name + id: string; // entry ID or composite key + title: string; + snippet: string; + score: number; + metadata: Record; // source-specific metadata +} + +export interface UnifiedSearchResponse { + results: UnifiedSearchResult[]; + total: number; +} + // Wave 16 event types export interface ThreadRenamedEvent { type: 'ThreadRenamed'; diff --git a/scripts/bootstrap_hierarchy.py b/scripts/bootstrap_hierarchy.py new file mode 100644 index 0000000..d748132 --- /dev/null +++ b/scripts/bootstrap_hierarchy.py @@ -0,0 +1,208 @@ +"""Bootstrap knowledge hierarchy — offline LLM-only tool. + +Wave 67: Assigns hierarchy_path values to existing knowledge entries by +grouping them by domain tag and asking an LLM to identify topic sub-clusters +within each domain. For 300 entries across 15 domains, this is ~15 LLM calls. + +NOT imported by the runtime. Run manually: + python scripts/bootstrap_hierarchy.py --workspace-id [--base-url http://localhost:8080] + +See ADR-049 for design rationale. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from collections import defaultdict +from typing import Any + +import httpx + + +def _normalize_domain(raw: str) -> str: + return re.sub(r"[\s\-]+", "_", raw.strip()).lower() + + +def _group_by_domain(entries: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]: + groups: dict[str, list[dict[str, Any]]] = defaultdict(list) + for e in entries: + domains = e.get("domains", []) + primary = domains[0] if domains else "uncategorized" + normalized = _normalize_domain(primary) + groups[normalized].append(e) + return dict(groups) + + +def _build_cluster_prompt(domain: str, entries: list[dict[str, Any]]) -> str: + entry_summaries = [] + for e in entries[:30]: # cap per-batch to avoid prompt overflow + title = e.get("title", "untitled") + summary = e.get("summary", "")[:100] + entry_summaries.append(f" - [{e.get('id', '?')[:12]}] {title}: {summary}") + + entries_text = "\n".join(entry_summaries) + return f"""You are organizing knowledge entries for the domain "{domain}". + +Below are {len(entry_summaries)} entries in this domain. Identify 2-5 topic +sub-clusters that naturally group these entries. Each entry should belong to +exactly one topic. + +Entries: +{entries_text} + +Respond with a JSON array of objects, each with: +- "topic": short topic name (lowercase, underscores, no spaces) +- "entry_ids": list of entry ID prefixes that belong to this topic + +Example: +[ + {{"topic": "authentication", "entry_ids": ["abc123", "def456"]}}, + {{"topic": "testing", "entry_ids": ["ghi789"]}} +] + +Return ONLY the JSON array, no other text.""" + + +def _assign_hierarchy_paths( + domain: str, + entries: list[dict[str, Any]], + clusters: list[dict[str, Any]], +) -> dict[str, str]: + """Map entry IDs to hierarchy paths based on cluster assignments.""" + id_to_path: dict[str, str] = {} + + # Build reverse lookup: entry_id_prefix -> full entry_id + full_ids = {e.get("id", ""): e.get("id", "") for e in entries} + + for cluster in clusters: + topic = _normalize_domain(cluster.get("topic", "misc")) + path = f"/{domain}/{topic}/" + for prefix in cluster.get("entry_ids", []): + # Match prefix to full ID + for full_id in full_ids: + if full_id.startswith(prefix): + id_to_path[full_id] = path + break + + # Entries not assigned to any cluster get domain-only path + for e in entries: + eid = e.get("id", "") + if eid and eid not in id_to_path: + id_to_path[eid] = f"/{domain}/" + + return id_to_path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Bootstrap knowledge hierarchy paths") + parser.add_argument("--workspace-id", required=True, help="Workspace ID") + parser.add_argument("--base-url", default="http://localhost:8080", help="FormicOS base URL") + parser.add_argument("--dry-run", action="store_true", help="Print assignments without applying") + args = parser.parse_args() + + client = httpx.Client(base_url=args.base_url, timeout=30.0) + + # Fetch entries + print(f"Fetching entries for workspace {args.workspace_id}...") + resp = client.get(f"/api/v1/workspaces/{args.workspace_id}/knowledge") + if resp.status_code != 200: + print(f"Failed to fetch entries: {resp.status_code}", file=sys.stderr) + sys.exit(1) + + data = resp.json() + entries = data.get("entries", data.get("items", [])) + print(f" Found {len(entries)} entries") + + if not entries: + print("No entries to process.") + return + + # Group by domain + groups = _group_by_domain(entries) + print(f" {len(groups)} domains: {', '.join(sorted(groups.keys()))}") + + # Process each domain + all_assignments: dict[str, str] = {} + for domain, domain_entries in sorted(groups.items()): + if len(domain_entries) < 3: + # Too few entries for sub-clustering — assign domain-level path + for e in domain_entries: + all_assignments[e.get("id", "")] = f"/{domain}/" + print(f" [{domain}] {len(domain_entries)} entries — too few, using domain-level path") + continue + + prompt = _build_cluster_prompt(domain, domain_entries) + print(f" [{domain}] {len(domain_entries)} entries — requesting LLM clustering...") + + # The bootstrap script uses the FormicOS LLM endpoint if available, + # or falls back to a simple domain-level assignment. + try: + # Try to call a simple LLM completion endpoint + llm_resp = client.post("/api/v1/llm/complete", json={ + "prompt": prompt, + "max_tokens": 1024, + }, timeout=60.0) + + if llm_resp.status_code == 200: + result_text = llm_resp.json().get("text", "[]") + # Parse JSON from response + try: + clusters = json.loads(result_text) + except json.JSONDecodeError: + # Try to extract JSON array from response + match = re.search(r"\[.*\]", result_text, re.DOTALL) + clusters = json.loads(match.group()) if match else [] + + assignments = _assign_hierarchy_paths(domain, domain_entries, clusters) + all_assignments.update(assignments) + topics = {v.split("/")[2] for v in assignments.values() if v.count("/") >= 3} + print(f" → {len(topics)} topics: {', '.join(sorted(topics))}") + else: + # Fallback: domain-level only + for e in domain_entries: + all_assignments[e.get("id", "")] = f"/{domain}/" + print(f" → LLM unavailable ({llm_resp.status_code}), using domain-level path") + except Exception as exc: + for e in domain_entries: + all_assignments[e.get("id", "")] = f"/{domain}/" + print(f" → LLM error ({exc}), using domain-level path") + + # Report + print(f"\nTotal assignments: {len(all_assignments)}") + unique_paths = sorted(set(all_assignments.values())) + print(f"Unique paths ({len(unique_paths)}):") + for p in unique_paths: + count = sum(1 for v in all_assignments.values() if v == p) + print(f" {p} ({count} entries)") + + if args.dry_run: + print("\n[DRY RUN] No changes applied.") + return + + # Apply: update entries via REST API + print("\nApplying hierarchy paths...") + updated = 0 + for entry_id, path in all_assignments.items(): + if not entry_id: + continue + try: + patch_resp = client.patch( + f"/api/v1/workspaces/{args.workspace_id}/knowledge/{entry_id}", + json={"hierarchy_path": path}, + timeout=10.0, + ) + if patch_resp.status_code in (200, 204): + updated += 1 + else: + print(f" Warning: failed to update {entry_id[:12]}: {patch_resp.status_code}") + except Exception as exc: + print(f" Warning: error updating {entry_id[:12]}: {exc}") + + print(f"Done. Updated {updated}/{len(all_assignments)} entries.") + + +if __name__ == "__main__": + main() diff --git a/src/formicos/__main__.py b/src/formicos/__main__.py index 3b5fafa..8fafbc7 100644 --- a/src/formicos/__main__.py +++ b/src/formicos/__main__.py @@ -34,6 +34,16 @@ def _build_parser() -> argparse.ArgumentParser: subs.add_parser("reset", help="Reset colony state") subs.add_parser("export-events", help="Export the event log") + init_mcp = subs.add_parser( + "init-mcp", + help="Generate MCP config for Claude Code integration", + ) + init_mcp.add_argument( + "--url", + default="http://localhost:8080/mcp", + help="FormicOS MCP server URL (default: http://localhost:8080/mcp)", + ) + return parser @@ -52,6 +62,85 @@ def main(argv: list[str] | None = None) -> None: print("Reset not yet implemented") elif args.command == "export-events": print("Event export not yet implemented") + elif args.command == "init-mcp": + _init_mcp(url=args.url) + + +_BRIDGE_TEMPLATE = """\ +# FormicOS Developer Bridge + +This project uses FormicOS for institutional memory, strategic delegation, +and autonomous background work. FormicOS MCP server: {url} + +## MCP Prompts (context injection — read-only) + +- **morning-status** — What happened, what's pending, project plan status +- **delegate-task** — Plan a colony to handle a task, get blast radius estimate +- **review-overnight-work** — Review autonomous actions, pending approvals, new knowledge +- **knowledge-for-context** — Search institutional memory for relevant entries + +## MCP Tools (actions — may mutate state) + +- `spawn_colony` — Create and start a colony directly +- `chat_queen` — Message the Queen for strategic guidance +- `get_status` — Workspace status with threads and colonies +- `approve` / `deny` — Review pending actions +- `log_finding` — Record a discovery as a knowledge entry +- `handoff_to_formicos` — Transfer work context to a new colony +- `addon_status` — Check installed addon health +- `toggle_addon` — Enable/disable addons +- `trigger_addon` — Run addon handlers (reindex, etc.) + +## MCP Resources + +- `formicos://plan` — Project plan (global) +- `formicos://procedures/{{workspace_id}}` — Operating procedures +- `formicos://journal/{{workspace_id}}` — Recent journal entries +- `formicos://knowledge/{{workspace}}` — Knowledge catalog +- `formicos://briefing/{{workspace_id}}` — Proactive intelligence briefing + +## Shared Files + +- `.formicos/project_plan.md` — Milestones (both you and FormicOS read/write) +- `.formicos/project_context.md` — Project instructions for colonies +- `.formicos/operations/*/operating_procedures.md` — Autonomy rules +- `.formicos/operations/*/queen_journal.md` — What FormicOS did (read-only) +""" + + +def _init_mcp(url: str = "http://localhost:8080/mcp") -> None: + """Generate .mcp.json and .formicos/DEVELOPER_QUICKSTART.md.""" + import json + from pathlib import Path + + cwd = Path.cwd() + + # Write .mcp.json + mcp_config = { + "mcpServers": { + "formicos": { + "type": "http", + "url": url, + } + } + } + mcp_path = cwd / ".mcp.json" + mcp_path.write_text(json.dumps(mcp_config, indent=2) + "\n") + print(f" Created {mcp_path}") + + # Write .formicos/DEVELOPER_QUICKSTART.md + bridge_dir = cwd / ".formicos" + bridge_dir.mkdir(exist_ok=True) + bridge_path = bridge_dir / "DEVELOPER_QUICKSTART.md" + bridge_path.write_text(_BRIDGE_TEMPLATE.format(url=url)) + print(f" Created {bridge_path}") + + print() + print("FormicOS MCP integration configured.") + print("Restart Claude Code to connect. Then try:") + print(" morning-status — get a complete briefing") + print(" delegate-task — hand off work to FormicOS") + print(" knowledge-for-context — search institutional memory") def _start_server(host: str | None = None, port: int | None = None) -> None: diff --git a/src/formicos/adapters/knowledge_graph.py b/src/formicos/adapters/knowledge_graph.py index 64680eb..5385b95 100644 --- a/src/formicos/adapters/knowledge_graph.py +++ b/src/formicos/adapters/knowledge_graph.py @@ -522,6 +522,165 @@ async def ingest_tuples( ) return created + # ------------------------------------------------------------------ + # Wave 67.5: Embedding-based entity matching for PPR seeding + # ------------------------------------------------------------------ + + async def match_entities_by_embedding( + self, + query: str, + workspace_id: str, + *, + limit: int = 5, + ) -> list[dict[str, Any]]: + """Find KG entities semantically similar to query. + + Falls back to normalized substring matching on entity names + if no embedding function is available. + """ + db = await self._ensure_db() + cursor = await db.execute( + "SELECT id, name, entity_type, summary FROM kg_nodes WHERE workspace_id = ?", + [workspace_id], + ) + rows = list(await cursor.fetchall()) + if not rows: + return [] + + # Bound cost: skip embedding for large workspaces + if len(rows) > 500 or (self._async_embed_fn is None and self._embed_fn is None): + return self._substring_entity_match(query, rows, limit) + + candidate_texts = [ + f"{row['name']} {row['summary'] or ''}" # pyright: ignore[reportIndexIssue] + for row in rows + ] + all_texts = [query] + candidate_texts + embeddings = await self._embed_for_similarity(all_texts) + if embeddings is None: + return self._substring_entity_match(query, rows, limit) + + query_vec = embeddings[0] + scored: list[tuple[float, dict[str, Any]]] = [] + for i, cand_vec in enumerate(embeddings[1:]): + sim = _cosine_similarity(query_vec, cand_vec) + scored.append((sim, { + "id": rows[i]["id"], # pyright: ignore[reportIndexIssue] + "name": rows[i]["name"], # pyright: ignore[reportIndexIssue] + "entity_type": rows[i]["entity_type"], # pyright: ignore[reportIndexIssue] + "score": sim, + })) + scored.sort(key=lambda x: -x[0]) + return [item for _, item in scored[:limit]] + + @staticmethod + def _substring_entity_match( + query: str, + rows: list[Any], + limit: int, + ) -> list[dict[str, Any]]: + """Fallback: substring matching on entity names.""" + normalized_query = _normalize(query) + results: list[dict[str, Any]] = [] + for row in rows: + if _normalize(row["name"]) in normalized_query: # pyright: ignore[reportIndexIssue] + results.append({ + "id": row["id"], # pyright: ignore[reportIndexIssue] + "name": row["name"], # pyright: ignore[reportIndexIssue] + "entity_type": row["entity_type"], # pyright: ignore[reportIndexIssue] + "score": 1.0, + }) + if len(results) >= limit: + break + return results + + # ------------------------------------------------------------------ + # Wave 67.5: Personalized PageRank (ADR-050 D1) + # ------------------------------------------------------------------ + + async def personalized_pagerank( + self, + seed_ids: list[str], + workspace_id: str, + *, + damping: float = 0.5, + iterations: int = 20, + ) -> dict[str, float]: + """Iterative PPR from seed entities. + + Builds a bounded local adjacency list by expanding outward from seeds + up to 3 hops, then runs power iteration with restart bias toward seeds. + Returns {entity_id: score} normalized so max = 1.0. + """ + if not seed_ids: + return {} + + # Build local adjacency via bounded expansion (3 hops) + adj_sets: dict[str, set[str]] = {} + frontier = set(seed_ids) + visited: set[str] = set() + + for _hop in range(3): + next_frontier: set[str] = set() + for node_id in frontier: + if node_id in visited: + continue + visited.add(node_id) + try: + neighbors = await self.get_neighbors( + node_id, workspace_id=workspace_id, + ) + except Exception: # noqa: BLE001 + continue + node_adj = adj_sets.setdefault(node_id, set()) + for nbr in neighbors: + other = ( + nbr["to_node"] if nbr["from_node"] == node_id + else nbr["from_node"] + ) + node_adj.add(other) + adj_sets.setdefault(other, set()).add(node_id) + next_frontier.add(other) + frontier = next_frontier - visited + + # Convert to lists for iteration; sets above prevent duplicate + # edges when the same relationship is discovered from both endpoints. + adjacency: dict[str, list[str]] = {k: list(v) for k, v in adj_sets.items()} + all_nodes = set(adjacency.keys()) + if not all_nodes: + return {} + + # Initialize reset vector: uniform over seeds + reset: dict[str, float] = {} + valid_seeds = [s for s in seed_ids if s in all_nodes] + if not valid_seeds: + return {} + seed_weight = 1.0 / len(valid_seeds) + for s in valid_seeds: + reset[s] = seed_weight + + # Initialize PR scores + pr: dict[str, float] = {n: reset.get(n, 0.0) for n in all_nodes} + + # Power iteration + for _ in range(iterations): + new_pr: dict[str, float] = {} + for node in all_nodes: + incoming_mass = 0.0 + for neighbor in adjacency.get(node, []): + degree = len(adjacency.get(neighbor, [])) + if degree > 0: + incoming_mass += pr.get(neighbor, 0.0) / degree + new_pr[node] = (1 - damping) * reset.get(node, 0.0) + damping * incoming_mass + pr = new_pr + + # Normalize max score to 1.0 + max_score = max(pr.values()) if pr else 1.0 + if max_score > 0: + pr = {k: v / max_score for k, v in pr.items()} + + return pr + # ------------------------------------------------------------------ # Stats # ------------------------------------------------------------------ diff --git a/src/formicos/adapters/vector_qdrant.py b/src/formicos/adapters/vector_qdrant.py index ffbe1a4..66d7c1a 100644 --- a/src/formicos/adapters/vector_qdrant.py +++ b/src/formicos/adapters/vector_qdrant.py @@ -154,6 +154,7 @@ async def ensure_collection(self, name: str | None = None) -> None: ("extracted_at", models.PayloadSchemaType.DATETIME), ("source_colony", models.PayloadSchemaType.KEYWORD), ("source_colony_id", models.PayloadSchemaType.KEYWORD), + ("hierarchy_path", models.PayloadSchemaType.KEYWORD), ] for field, schema in index_fields: try: diff --git a/src/formicos/addons/codebase_index/status.py b/src/formicos/addons/codebase_index/status.py new file mode 100644 index 0000000..6671a15 --- /dev/null +++ b/src/formicos/addons/codebase_index/status.py @@ -0,0 +1,33 @@ +"""Codebase index status endpoint for addon panel rendering.""" + +from __future__ import annotations + +from typing import Any + + +async def get_status( + _inputs: dict[str, Any], + workspace_id: str, + _thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Return index status as status_card data.""" + ctx = runtime_context or {} + vector_port = ctx.get("vector_port") + + items = [] + if vector_port is not None: + try: + info = await vector_port.collection_info("code_index") + items.append({"label": "Chunks indexed", "value": str(info.get("points_count", "?"))}) + items.append({"label": "Collection", "value": "code_index"}) + except Exception: # noqa: BLE001 + items.append({"label": "Status", "value": "unavailable"}) + else: + items.append({"label": "Status", "value": "no vector store"}) + + return { + "display_type": "status_card", + "items": items, + } diff --git a/src/formicos/addons/docs_index/__init__.py b/src/formicos/addons/docs_index/__init__.py new file mode 100644 index 0000000..09bc231 --- /dev/null +++ b/src/formicos/addons/docs_index/__init__.py @@ -0,0 +1 @@ +"""Documentation semantic index addon — embedding-based doc search.""" diff --git a/src/formicos/addons/docs_index/indexer.py b/src/formicos/addons/docs_index/indexer.py new file mode 100644 index 0000000..2ba651c --- /dev/null +++ b/src/formicos/addons/docs_index/indexer.py @@ -0,0 +1,294 @@ +"""Documentation indexer — walks workspace, chunks doc files, embeds, upserts to Qdrant. + +Chunking strategy: split on structural boundaries per format — +headings for Markdown/RST, ``

``–``

`` for HTML, blank lines for plain text. +""" + +from __future__ import annotations + +import hashlib +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +import structlog + +from formicos.core.types import VectorDocument + +if TYPE_CHECKING: + from collections.abc import Sequence + from pathlib import Path + +log = structlog.get_logger() + +# File extensions worth indexing +_DOC_EXTENSIONS = frozenset({".md", ".rst", ".txt", ".html"}) + +# Directories to skip +_SKIP_DIRS = frozenset({ + "__pycache__", ".git", "node_modules", ".venv", "venv", + ".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build", +}) + +COLLECTION_NAME = "docs_index" + +# Regex patterns for structural splitting +_MD_HEADING_RE = re.compile(r"^(#{1,3})\s+", re.MULTILINE) +_RST_UNDERLINE_RE = re.compile(r"^[=\-~^\"]{3,}\s*$", re.MULTILINE) +_HTML_HEADING_RE = re.compile(r"]*>", re.IGNORECASE) + + +@dataclass +class DocChunk: + """A chunk of documentation with location metadata.""" + + id: str + text: str + path: str + section: str + line_start: int + line_end: int + + +def _is_doc_file(path: Path) -> bool: + """Check if a file should be indexed.""" + return path.suffix.lower() in _DOC_EXTENSIONS and path.stat().st_size < 500_000 + + +def _make_chunk_id(file_path: str, line_start: int) -> str: + """Deterministic chunk ID from path and start line.""" + return hashlib.sha256(f"{file_path}:{line_start}".encode()).hexdigest()[:16] + + +def _flush_section( + lines: list[str], + file_path: str, + section: str, + line_start: int, + chunks: list[DocChunk], +) -> None: + """Flush accumulated lines into a DocChunk if non-empty.""" + text = "".join(lines).strip() + if text: + chunks.append(DocChunk( + id=_make_chunk_id(file_path, line_start), + text=text, + path=file_path, + section=section, + line_start=line_start, + line_end=line_start + len(lines) - 1, + )) + + +def _chunk_markdown(content: str, file_path: str) -> list[DocChunk]: + """Split Markdown on # / ## / ### headings.""" + lines = content.splitlines(keepends=True) + chunks: list[DocChunk] = [] + current_section = "(intro)" + current_lines: list[str] = [] + section_start = 1 + + for i, line in enumerate(lines, start=1): + if _MD_HEADING_RE.match(line): + _flush_section(current_lines, file_path, current_section, section_start, chunks) + current_section = line.lstrip("#").strip() or "(heading)" + current_lines = [line] + section_start = i + else: + current_lines.append(line) + + _flush_section(current_lines, file_path, current_section, section_start, chunks) + return chunks + + +def _chunk_rst(content: str, file_path: str) -> list[DocChunk]: + """Split RST on heading underlines.""" + lines = content.splitlines(keepends=True) + chunks: list[DocChunk] = [] + current_section = "(intro)" + current_lines: list[str] = [] + section_start = 1 + + for i, line in enumerate(lines, start=1): + stripped = line.rstrip("\n\r") + if _RST_UNDERLINE_RE.match(stripped) and current_lines: + # The previous line is the heading title + title_line = current_lines.pop() + # Flush everything before the title as previous section + if current_lines: + _flush_section(current_lines, file_path, current_section, section_start, chunks) + current_section = title_line.strip() or "(heading)" + current_lines = [title_line, line] + section_start = i - 1 + else: + current_lines.append(line) + + _flush_section(current_lines, file_path, current_section, section_start, chunks) + return chunks + + +def _chunk_html(content: str, file_path: str) -> list[DocChunk]: + """Split HTML on

,

,

tags.""" + lines = content.splitlines(keepends=True) + chunks: list[DocChunk] = [] + current_section = "(intro)" + current_lines: list[str] = [] + section_start = 1 + + for i, line in enumerate(lines, start=1): + if _HTML_HEADING_RE.search(line): + _flush_section(current_lines, file_path, current_section, section_start, chunks) + # Extract heading text (strip tags naively) + heading_text = re.sub(r"<[^>]+>", "", line).strip() + current_section = heading_text or "(heading)" + current_lines = [line] + section_start = i + else: + current_lines.append(line) + + _flush_section(current_lines, file_path, current_section, section_start, chunks) + return chunks + + +def _chunk_text(content: str, file_path: str) -> list[DocChunk]: + """Split plain text on blank-line-delimited sections.""" + lines = content.splitlines(keepends=True) + chunks: list[DocChunk] = [] + current_lines: list[str] = [] + section_start = 1 + section_idx = 0 + + for i, line in enumerate(lines, start=1): + if not line.strip() and current_lines: + section_idx += 1 + _flush_section( + current_lines, file_path, f"(section {section_idx})", + section_start, chunks, + ) + current_lines = [] + section_start = i + 1 + else: + current_lines.append(line) + + if current_lines: + section_idx += 1 + _flush_section( + current_lines, file_path, f"(section {section_idx})", + section_start, chunks, + ) + return chunks + + +def chunk_document(content: str, file_path: str) -> list[DocChunk]: + """Route to the appropriate chunker based on file extension.""" + ext = file_path.rsplit(".", 1)[-1].lower() if "." in file_path else "" + if ext == "md": + return _chunk_markdown(content, file_path) + if ext == "rst": + return _chunk_rst(content, file_path) + if ext in ("htm", "html"): + return _chunk_html(content, file_path) + return _chunk_text(content, file_path) + + +def _chunks_to_docs(chunks: Sequence[DocChunk]) -> list[VectorDocument]: + """Convert doc chunks to VectorDocuments for the VectorPort.""" + return [ + VectorDocument( + id=chunk.id, + content=chunk.text, + metadata={ + "path": chunk.path, + "section": chunk.section, + "line_start": chunk.line_start, + "line_end": chunk.line_end, + "content": chunk.text, + }, + ) + for chunk in chunks + ] + + +async def full_reindex( + workspace_path: Path, + vector_port: Any, +) -> dict[str, Any]: + """Walk workspace, chunk doc files, upsert to vector store. + + Returns summary dict with file_count, chunk_count, errors. + """ + file_count = 0 + chunk_count = 0 + errors = 0 + + for path in sorted(workspace_path.rglob("*")): + if any(part in _SKIP_DIRS for part in path.parts): + continue + if not path.is_file() or not _is_doc_file(path): + continue + + try: + content = path.read_text(encoding="utf-8", errors="ignore") + rel_path = str(path.relative_to(workspace_path)) + chunks = chunk_document(content, rel_path) + file_count += 1 + + docs = _chunks_to_docs(chunks) + if docs: + await vector_port.upsert(COLLECTION_NAME, docs) + chunk_count += len(docs) + except Exception: # noqa: BLE001 + errors += 1 + log.warning( + "docs_index.file_error", + path=str(path), exc_info=True, + ) + + log.info( + "docs_index.reindex_complete", + files=file_count, + chunks=chunk_count, + errors=errors, + ) + return { + "file_count": file_count, + "chunk_count": chunk_count, + "errors": errors, + } + + +async def incremental_reindex( + workspace_path: Path, + vector_port: Any, + *, + changed_files: list[str] | None = None, +) -> dict[str, Any]: + """Re-index only changed files. + + If ``changed_files`` is None, falls back to full reindex. + """ + if changed_files is None: + return await full_reindex(workspace_path, vector_port) + + chunk_count = 0 + errors = 0 + + for rel_path in changed_files: + path = workspace_path / rel_path + if not path.is_file() or not _is_doc_file(path): + continue + try: + content = path.read_text(encoding="utf-8", errors="ignore") + chunks = chunk_document(content, rel_path) + docs = _chunks_to_docs(chunks) + if docs: + await vector_port.upsert(COLLECTION_NAME, docs) + chunk_count += len(docs) + except Exception: # noqa: BLE001 + errors += 1 + + return { + "file_count": len(changed_files), + "chunk_count": chunk_count, + "errors": errors, + } diff --git a/src/formicos/addons/docs_index/search.py b/src/formicos/addons/docs_index/search.py new file mode 100644 index 0000000..f4dd75e --- /dev/null +++ b/src/formicos/addons/docs_index/search.py @@ -0,0 +1,126 @@ +"""Semantic documentation search handler for the docs-index addon.""" + +from __future__ import annotations + +import fnmatch +from typing import Any + +import structlog + +from formicos.addons.docs_index.indexer import COLLECTION_NAME + +log = structlog.get_logger() + + +def _format_result(hit: Any) -> str: + """Format a single search result for display.""" + metadata = getattr(hit, "metadata", {}) or {} + payload = getattr(hit, "payload", metadata) or {} + path = payload.get("path", "?") + section = payload.get("section", "") + line_start = payload.get("line_start", "?") + line_end = payload.get("line_end", "?") + content = payload.get("content", "") + score = getattr(hit, "score", 0.0) + # Truncate content for display + if len(content) > 300: + content = content[:300] + "..." + header = f"**{path}:{line_start}-{line_end}**" + if section: + header += f" [{section}]" + header += f" (score: {score:.3f})" + return f"{header}\n```\n{content}\n```" + + +async def handle_semantic_search( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Search documentation by semantic meaning using the vector index.""" + query = inputs.get("query", "") + top_k = inputs.get("top_k", 10) + file_pattern = inputs.get("file_pattern", "") + + if not query: + return "Error: query parameter is required." + + ctx = runtime_context or {} + vector_port = ctx.get("vector_port") + + if vector_port is None: + return ( + "Semantic search unavailable — vector store not configured. " + "Ensure the Qdrant sidecar is running." + ) + + try: + hits = await vector_port.search(COLLECTION_NAME, query, top_k) + except Exception: # noqa: BLE001 + log.warning("docs_index.search_error", exc_info=True) + return ( + "Documentation index not found. Build it first with the " + "'reindex_docs' tool." + ) + + if not hits: + return ( + f"No results found for '{query}' in documentation index. " + "The index may be empty — try 'reindex_docs' first." + ) + + # Filter by file pattern if provided + if file_pattern: + filtered: list[Any] = [] + for h in hits: + metadata = getattr(h, "metadata", {}) or {} + payload = getattr(h, "payload", metadata) or {} + path = payload.get("path", "") + if fnmatch.fnmatch(path, file_pattern): + filtered.append(h) + hits = filtered + + if not hits: + return f"No results matching pattern '{file_pattern}' for query '{query}'." + + lines = [f"**Documentation search:** {len(hits)} results for '{query}'\n"] + for hit in hits: + lines.append(_format_result(hit)) + return "\n\n".join(lines) + + +async def handle_reindex( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Trigger incremental or full reindex of workspace documentation.""" + from formicos.addons.docs_index.indexer import incremental_reindex + + ctx = runtime_context or {} + vector_port = ctx.get("vector_port") + workspace_root_fn = ctx.get("workspace_root_fn") + + if not vector_port or not workspace_root_fn: + return ( + "Reindex unavailable — missing vector_port " + "or workspace_root_fn in runtime context." + ) + + workspace_path = workspace_root_fn(workspace_id) + if not workspace_path.is_dir(): + return f"Workspace path not found: {workspace_path}" + + changed_files = inputs.get("changed_files") + result = await incremental_reindex( + workspace_path, vector_port, + changed_files=changed_files, + ) + return ( + f"Reindex complete: {result['file_count']} files, " + f"{result['chunk_count']} chunks indexed, {result['errors']} errors." + ) diff --git a/src/formicos/addons/docs_index/status.py b/src/formicos/addons/docs_index/status.py new file mode 100644 index 0000000..ce18e09 --- /dev/null +++ b/src/formicos/addons/docs_index/status.py @@ -0,0 +1,35 @@ +"""Documentation index status endpoint for addon panel rendering.""" + +from __future__ import annotations + +from typing import Any + + +async def get_status( + _inputs: dict[str, Any], + workspace_id: str, + _thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Return index status as status_card data.""" + ctx = runtime_context or {} + vector_port = ctx.get("vector_port") + + items: list[dict[str, str]] = [] + if vector_port is not None: + try: + info = await vector_port.collection_info("docs_index") + count = str(info.get("points_count", "?")) + items.append({"label": "Documents indexed", "value": count}) + items.append({"label": "Collection", "value": "docs_index"}) + items.append({"label": "Extensions", "value": ".md, .rst, .txt, .html"}) + except Exception: # noqa: BLE001 + items.append({"label": "Status", "value": "unavailable"}) + else: + items.append({"label": "Status", "value": "no vector store"}) + + return { + "display_type": "status_card", + "items": items, + } diff --git a/src/formicos/addons/git_control/status.py b/src/formicos/addons/git_control/status.py new file mode 100644 index 0000000..bcd748d --- /dev/null +++ b/src/formicos/addons/git_control/status.py @@ -0,0 +1,52 @@ +"""Git control status endpoint for addon panel rendering.""" + +from __future__ import annotations + +import asyncio +from typing import Any + + +async def get_status( + _inputs: dict[str, Any], + workspace_id: str, + _thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Return git workspace status as status_card data.""" + ctx = runtime_context or {} + workspace_root_fn = ctx.get("workspace_root_fn") + + items = [] + ws_path = workspace_root_fn(workspace_id) if workspace_root_fn else None + + if ws_path and ws_path.is_dir(): + try: + proc = await asyncio.create_subprocess_exec( + "git", "branch", "--show-current", + cwd=str(ws_path), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + branch = stdout.decode().strip() if stdout else "unknown" + items.append({"label": "Branch", "value": branch}) + + proc2 = await asyncio.create_subprocess_exec( + "git", "status", "--porcelain", + cwd=str(ws_path), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout2, _ = await proc2.communicate() + lines = [ln for ln in (stdout2.decode().splitlines() if stdout2 else []) if ln.strip()] + items.append({"label": "Modified files", "value": str(len(lines))}) + except Exception: # noqa: BLE001 + items.append({"label": "Status", "value": "git unavailable"}) + else: + items.append({"label": "Status", "value": "no workspace"}) + + return { + "display_type": "status_card", + "items": items, + } diff --git a/src/formicos/addons/mcp_bridge/__init__.py b/src/formicos/addons/mcp_bridge/__init__.py new file mode 100644 index 0000000..e7893cd --- /dev/null +++ b/src/formicos/addons/mcp_bridge/__init__.py @@ -0,0 +1 @@ +"""MCP bridge addon — connects remote MCP servers into the FormicOS tool ecosystem.""" diff --git a/src/formicos/addons/mcp_bridge/client.py b/src/formicos/addons/mcp_bridge/client.py new file mode 100644 index 0000000..3a922ab --- /dev/null +++ b/src/formicos/addons/mcp_bridge/client.py @@ -0,0 +1,238 @@ +"""MCP bridge client — manages connections to remote MCP servers. + +Provides connection caching, per-server health tracking, and a structured +health export for generic consumption by addon_loader / queen_tools. +""" +# pyright: reportUnknownVariableType=false, reportUnknownMemberType=false +# pyright: reportMissingTypeArgument=false, reportUnknownParameterType=false +# pyright: reportUnknownArgumentType=false, reportAttributeAccessIssue=false + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import Any + +import structlog +from fastmcp.client import Client + +log = structlog.get_logger() + + +@dataclass +class ServerHealth: + """Health state for a single MCP server connection.""" + + name: str + url: str + connected: bool = False + last_connected: str | None = None + last_error: str | None = None + tool_count: int = 0 + call_count: int = 0 + error_count: int = 0 + + +@dataclass +class McpBridge: + """Manages connections to multiple remote MCP servers. + + Connections are lazily established and cached. Health is tracked + per-server and exported as a structured dict for generic consumption. + """ + + servers: list[dict[str, str]] = field(default_factory=list) + _clients: dict[str, Client] = field(default_factory=dict, repr=False) + _health: dict[str, ServerHealth] = field(default_factory=dict, repr=False) + _locks: dict[str, asyncio.Lock] = field(default_factory=dict, repr=False) + + def configure(self, servers: list[dict[str, str]]) -> None: + """Set or replace the server list. Does not eagerly connect.""" + self.servers = list(servers) + # Initialise health entries for newly-configured servers + for s in servers: + name = s.get("name", "") + if name and name not in self._health: + self._health[name] = ServerHealth( + name=name, url=s.get("url", ""), + ) + + async def _get_client(self, name: str) -> Client | None: + """Return a connected client for *name*, or None on failure.""" + if name not in self._locks: + self._locks[name] = asyncio.Lock() + + async with self._locks[name]: + # Return cached if still connected + existing = self._clients.get(name) + if existing is not None and existing.is_connected(): + return existing + + # Find server config + server_cfg = next( + (s for s in self.servers if s.get("name") == name), None, + ) + if server_cfg is None: + return None + + url = server_cfg.get("url", "") + if not url: + return None + + health = self._health.setdefault( + name, ServerHealth(name=name, url=url), + ) + + try: + client = Client(url, timeout=10) + await client.__aenter__() + health.connected = True + health.last_connected = datetime.now(UTC).isoformat() + health.last_error = None + self._clients[name] = client + log.info("mcp_bridge.connected", server=name, url=url) + return client + except Exception as exc: # noqa: BLE001 + health.connected = False + health.last_error = str(exc)[:200] + health.error_count += 1 + log.warning( + "mcp_bridge.connect_failed", + server=name, error=str(exc)[:200], + ) + return None + + async def list_tools(self, server_name: str | None = None) -> list[dict[str, Any]]: + """List tools from one or all configured servers.""" + targets = ( + [s for s in self.servers if s.get("name") == server_name] + if server_name + else self.servers + ) + results: list[dict[str, Any]] = [] + for srv in targets: + name = srv.get("name", "") + client = await self._get_client(name) + if client is None: + continue + try: + tools = await client.list_tools() + health = self._health.get(name) + if health: + health.tool_count = len(tools) + for t in tools: + results.append({ + "server": name, + "name": t.name, + "description": getattr(t, "description", "") or "", + "inputSchema": ( + t.inputSchema if hasattr(t, "inputSchema") else {} + ), + }) + except Exception as exc: # noqa: BLE001 + health = self._health.get(name) + if health: + health.error_count += 1 + health.last_error = str(exc)[:200] + log.warning( + "mcp_bridge.list_tools_failed", + server=name, error=str(exc)[:200], + ) + return results + + async def call_tool( + self, + server_name: str, + tool_name: str, + arguments: dict[str, Any] | None = None, + ) -> str: + """Call a tool on a specific server. Returns result text.""" + client = await self._get_client(server_name) + if client is None: + return f"Error: Cannot connect to MCP server '{server_name}'" + + health = self._health.get(server_name) + try: + result = await client.call_tool(tool_name, arguments) + if health: + health.call_count += 1 + # Extract text from CallToolResult + if hasattr(result, "content") and result.content: + parts = [] + for item in result.content: + if hasattr(item, "text"): + parts.append(item.text) + return "\n".join(parts) if parts else str(result) + return str(result) + except Exception as exc: # noqa: BLE001 + if health: + health.error_count += 1 + health.last_error = str(exc)[:200] + return f"Error calling {tool_name} on {server_name}: {exc!s}" + + async def close(self) -> None: + """Disconnect all cached clients.""" + for name, client in list(self._clients.items()): + try: + await client.__aexit__(None, None, None) + except Exception: # noqa: BLE001 + pass + finally: + self._clients.pop(name, None) + health = self._health.get(name) + if health: + health.connected = False + + # ------------------------------------------------------------------ + # Structured health export (generic capability protocol) + # ------------------------------------------------------------------ + + def get_bridge_health(self) -> dict[str, Any]: + """Return machine-readable bridge health for addon summary consumers. + + This is the capability-protocol export: if an addon registration's + ``runtime_context`` contains a ``get_bridge_health`` callable, + generic code (queen_tools, routes/api) can consume it without + hardcoding addon names. + """ + servers: list[dict[str, Any]] = [] + total_tools = 0 + connected_count = 0 + unhealthy_count = 0 + + for s in self.servers: + name = s.get("name", "") + health = self._health.get(name) + if health is None: + servers.append({ + "name": name, + "status": "unconfigured", + }) + unhealthy_count += 1 + continue + + status = "connected" if health.connected else "disconnected" + if health.error_count >= 3: + status = "error" + + if health.connected: + connected_count += 1 + else: + unhealthy_count += 1 + + total_tools += health.tool_count + servers.append({ + "name": name, + "status": status, + "toolCount": health.tool_count, + "callCount": health.call_count, + "lastError": health.last_error, + }) + + return { + "connectedServers": connected_count, + "unhealthyServers": unhealthy_count, + "totalRemoteTools": total_tools, + "servers": servers, + } diff --git a/src/formicos/addons/mcp_bridge/discovery.py b/src/formicos/addons/mcp_bridge/discovery.py new file mode 100644 index 0000000..a5c0d10 --- /dev/null +++ b/src/formicos/addons/mcp_bridge/discovery.py @@ -0,0 +1,69 @@ +"""MCP bridge tool handlers — registered by addon.yaml. + +These handlers follow the standard addon tool signature: +``async def handler(inputs, workspace_id, thread_id, *, runtime_context)`` +""" + +from __future__ import annotations + +from typing import Any + +import structlog + +log = structlog.get_logger() + + +def _get_bridge(runtime_context: dict[str, Any]) -> Any: + """Extract the McpBridge instance from runtime_context.""" + return runtime_context.get("mcp_bridge") + + +async def handle_discover_tools( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Discover tools available on connected MCP servers.""" + ctx = runtime_context or {} + bridge = _get_bridge(ctx) + if bridge is None: + return "MCP bridge is not configured. No remote servers available." + + server = inputs.get("server") or None + tools = await bridge.list_tools(server) + + if not tools: + target = f"server '{server}'" if server else "any connected server" + return f"No tools discovered on {target}." + + parts = [f"Discovered {len(tools)} remote tool(s):"] + for t in tools: + parts.append( + f"- [{t['server']}] {t['name']}: {t.get('description', '')[:120]}" + ) + return "\n".join(parts) + + +async def handle_call_tool( + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + *, + runtime_context: dict[str, Any] | None = None, +) -> str: + """Call a tool on a remote MCP server.""" + ctx = runtime_context or {} + bridge = _get_bridge(ctx) + if bridge is None: + return "MCP bridge is not configured." + + server = inputs.get("server", "") + tool = inputs.get("tool", "") + arguments: dict[str, Any] = inputs.get("arguments") or {} + + if not server or not tool: + return "Error: 'server' and 'tool' are required." + + return await bridge.call_tool(server, tool, arguments) diff --git a/src/formicos/addons/proactive_intelligence/rules.py b/src/formicos/addons/proactive_intelligence/rules.py index 481211a..51f6981 100644 --- a/src/formicos/addons/proactive_intelligence/rules.py +++ b/src/formicos/addons/proactive_intelligence/rules.py @@ -691,20 +691,32 @@ def _rule_cost_outlier( def _rule_knowledge_roi( outcomes: dict[str, Any], ) -> list[KnowledgeInsight]: - """Flag when colonies cost significant amounts but extract no knowledge.""" + """Flag when colonies cost significant amounts but extract no knowledge. + + Also flags when successful colonies access zero knowledge entries and + produce low-quality outcomes — a signal that retrieval is underused. + """ insights: list[KnowledgeInsight] = [] no_extraction_cost = 0.0 no_extraction_count = 0 total_cost = 0.0 + no_access_low_quality = 0 + no_access_total = 0 for o in outcomes.values(): cost = float(getattr(o, "total_cost", 0)) extracted = int(getattr(o, "entries_extracted", 0)) + accessed = int(getattr(o, "entries_accessed", 0)) succeeded = bool(getattr(o, "succeeded", False)) + quality = float(getattr(o, "quality_score", 0)) total_cost += cost if succeeded and cost > 0 and extracted == 0: no_extraction_cost += cost no_extraction_count += 1 + if succeeded and accessed == 0: + no_access_total += 1 + if quality < 0.7: + no_access_low_quality += 1 if no_extraction_count >= 3 and total_cost > 0: pct = no_extraction_cost / total_cost @@ -724,6 +736,23 @@ def _rule_knowledge_roi( "and task prompts for knowledge-producing castes." ), )) + + if no_access_low_quality >= 3: + insights.append(KnowledgeInsight( + severity="info", + category="performance", + title="Knowledge underutilization correlated with low quality", + detail=( + f"{no_access_low_quality} of {no_access_total} successful colonies " + f"accessed zero knowledge entries and scored below 0.7 quality. " + f"Colonies that leverage existing knowledge tend to produce better results." + ), + affected_entries=[], + suggested_action=( + "Verify that knowledge retrieval is enabled for relevant castes. " + "Consider seeding domain knowledge before dispatching colonies." + ), + )) return insights diff --git a/src/formicos/core/types.py b/src/formicos/core/types.py index b59aa1e..fecb168 100644 --- a/src/formicos/core/types.py +++ b/src/formicos/core/types.py @@ -225,6 +225,10 @@ class ModelRecord(BaseModel): default=0, description="Max concurrent requests. 0 = use LLM_SLOTS env var.", ) + hidden: bool = Field( + default=False, + description="Operator-set flag to hide this model from default selection.", + ) # --------------------------------------------------------------------------- diff --git a/src/formicos/engine/scoring_math.py b/src/formicos/engine/scoring_math.py index 3dcc776..b26a850 100644 --- a/src/formicos/engine/scoring_math.py +++ b/src/formicos/engine/scoring_math.py @@ -75,4 +75,25 @@ def exploration_score( return min(ts_draw + ucb_weight * ucb_bonus, 1.0) -__all__ = ["exploration_score"] +def rescale_preserving_mean( + alpha: float, beta: float, max_ess: float = 150.0, +) -> tuple[float, float]: + """Rescale Beta parameters to cap effective sample size. + + Mathematically equivalent to exponential decay with + gamma = 1 - 1/max_ess. Default cap of 150 (not 100) lets + high-evidence entries stabilize without becoming immovable. + 100 would be too aggressive per production Thompson Sampling + literature (Russo et al. recommend N_eff ~ 200 for + nonstationary environments). + + Preserves the posterior mean: alpha/(alpha+beta) is unchanged. + """ + ess = alpha + beta + if ess <= max_ess: + return alpha, beta + scale = max_ess / ess + return alpha * scale, beta * scale + + +__all__ = ["exploration_score", "rescale_preserving_mean"] diff --git a/src/formicos/engine/tool_dispatch.py b/src/formicos/engine/tool_dispatch.py index b2eb74b..26ecac0 100644 --- a/src/formicos/engine/tool_dispatch.py +++ b/src/formicos/engine/tool_dispatch.py @@ -607,6 +607,14 @@ }), denied_tools=frozenset({"code_execute"}), ), + "forager": CasteToolPolicy( + caste="forager", + allowed_categories=frozenset({ + ToolCategory.vector_query, ToolCategory.search_web, + ToolCategory.network_out, + }), + denied_tools=frozenset({"code_execute"}), + ), } diff --git a/src/formicos/surface/action_queue.py b/src/formicos/surface/action_queue.py new file mode 100644 index 0000000..ff31fe3 --- /dev/null +++ b/src/formicos/surface/action_queue.py @@ -0,0 +1,322 @@ +"""Durable action queue ledger (Wave 71.0 Track 4). + +Canonical operational inbox for proposed, approved, rejected, and executed +actions. Backed by workspace-scoped JSONL files: +``.formicos/operations/{workspace_id}/actions.jsonl`` + +The queue is generic — ``kind`` is the semantic authority for routing and UI. +Future action kinds (continuation, knowledge-review, workflow-template) slot +in without schema changes. +""" + +from __future__ import annotations + +import gzip +import json +import uuid +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import structlog + +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Status constants +# --------------------------------------------------------------------------- + +STATUS_PENDING_REVIEW = "pending_review" +STATUS_APPROVED = "approved" +STATUS_REJECTED = "rejected" +STATUS_EXECUTED = "executed" +STATUS_SELF_REJECTED = "self_rejected" +STATUS_FAILED = "failed" + +_VALID_STATUSES = { + STATUS_PENDING_REVIEW, + STATUS_APPROVED, + STATUS_REJECTED, + STATUS_EXECUTED, + STATUS_SELF_REJECTED, + STATUS_FAILED, +} + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +_COMPACT_THRESHOLD = 1000 +_COMPACT_KEEP = 500 + + +def _actions_dir(data_dir: str, workspace_id: str) -> Path: + return Path(data_dir) / ".formicos" / "operations" / workspace_id + + +def _actions_path(data_dir: str, workspace_id: str) -> Path: + return _actions_dir(data_dir, workspace_id) / "actions.jsonl" + + +# --------------------------------------------------------------------------- +# Action record creation +# --------------------------------------------------------------------------- + + +def new_action_id() -> str: + return f"act-{uuid.uuid4().hex[:12]}" + + +def create_action( # noqa: PLR0913 + *, + kind: str, + title: str, + detail: str = "", + source_category: str = "", + source_ref: str = "", + rationale: str = "", + payload: dict[str, Any] | None = None, + thread_id: str = "", + estimated_cost: float = 0.0, + blast_radius: float = 0.0, + confidence: float = 0.0, + requires_approval: bool = True, + created_by: str = "system", +) -> dict[str, Any]: + """Build a new action record dict (not yet persisted).""" + now = datetime.now(UTC).isoformat() + return { + "action_id": new_action_id(), + "created_at": now, + "updated_at": now, + "created_by": created_by, + "status": STATUS_PENDING_REVIEW, + "kind": kind, + "source_category": source_category, + "source_ref": source_ref, + "title": title, + "detail": detail, + "rationale": rationale, + "payload": payload or {}, + "thread_id": thread_id, + "estimated_cost": estimated_cost, + "blast_radius": blast_radius, + "confidence": confidence, + "requires_approval": requires_approval, + "approval_request_id": "", + "executed_at": "", + "operator_reason": "", + } + + +# --------------------------------------------------------------------------- +# Persistence +# --------------------------------------------------------------------------- + + +def append_action( + data_dir: str, + workspace_id: str, + action: dict[str, Any], +) -> None: + """Append a single action record to the JSONL ledger.""" + path = _actions_path(data_dir, workspace_id) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as f: + f.write(json.dumps(action, default=str) + "\n") + + +def read_actions( + data_dir: str, + workspace_id: str, +) -> list[dict[str, Any]]: + """Read all action records from the JSONL ledger.""" + path = _actions_path(data_dir, workspace_id) + if not path.is_file(): + return [] + actions: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + actions.append(json.loads(line)) + except json.JSONDecodeError: + continue + return actions + + +def update_action( + data_dir: str, + workspace_id: str, + action_id: str, + updates: dict[str, Any], +) -> dict[str, Any] | None: + """Update an action record in-place. Returns the updated record or None.""" + actions = read_actions(data_dir, workspace_id) + target: dict[str, Any] | None = None + + for act in actions: + if act.get("action_id") == action_id: + act.update(updates) + act["updated_at"] = datetime.now(UTC).isoformat() + target = act + break + + if target is None: + return None + + _rewrite_actions(data_dir, workspace_id, actions) + return target + + +def _rewrite_actions( + data_dir: str, + workspace_id: str, + actions: list[dict[str, Any]], +) -> None: + """Rewrite the entire JSONL file atomically.""" + path = _actions_path(data_dir, workspace_id) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(".jsonl.tmp") + with tmp.open("w", encoding="utf-8") as f: + for act in actions: + f.write(json.dumps(act, default=str) + "\n") + tmp.replace(path) + + +# --------------------------------------------------------------------------- +# Filtering + aggregation +# --------------------------------------------------------------------------- + + +def list_actions( + data_dir: str, + workspace_id: str, + *, + status: str = "", + kind: str = "", + limit: int = 100, +) -> dict[str, Any]: + """List actions with optional filtering and aggregation. + + Returns:: + + { + "actions": [...], + "total": N, + "counts_by_status": {...}, + "counts_by_kind": {...}, + } + """ + all_actions = read_actions(data_dir, workspace_id) + + # Aggregate counts over the full set + counts_by_status: dict[str, int] = {} + counts_by_kind: dict[str, int] = {} + for act in all_actions: + s = act.get("status", "") + k = act.get("kind", "") + counts_by_status[s] = counts_by_status.get(s, 0) + 1 + counts_by_kind[k] = counts_by_kind.get(k, 0) + 1 + + # Filter + filtered = all_actions + if status: + filtered = [a for a in filtered if a.get("status") == status] + if kind: + filtered = [a for a in filtered if a.get("kind") == kind] + + # Most recent first + filtered.sort(key=lambda a: a.get("created_at", ""), reverse=True) + + return { + "actions": filtered[:limit], + "total": len(filtered), + "counts_by_status": counts_by_status, + "counts_by_kind": counts_by_kind, + } + + +# --------------------------------------------------------------------------- +# Compaction — prevents unbounded growth +# --------------------------------------------------------------------------- + + +def compact_action_log(data_dir: str, workspace_id: str) -> bool: + """Archive old entries when the JSONL exceeds the threshold. + + When the file has > ``_COMPACT_THRESHOLD`` lines, archive older entries + to ``actions.{date}.jsonl.gz`` and keep only the last + ``_COMPACT_KEEP`` entries in the active file. + + Returns True if compaction was performed. + """ + path = _actions_path(data_dir, workspace_id) + if not path.is_file(): + return False + + actions = read_actions(data_dir, workspace_id) + if len(actions) <= _COMPACT_THRESHOLD: + return False + + # Archive the older entries + archive_entries = actions[:-_COMPACT_KEEP] + keep_entries = actions[-_COMPACT_KEEP:] + + date_str = datetime.now(UTC).strftime("%Y%m%d_%H%M%S") + archive_path = _actions_dir(data_dir, workspace_id) / f"actions.{date_str}.jsonl.gz" + + with gzip.open(archive_path, "wt", encoding="utf-8") as gz: + for act in archive_entries: + gz.write(json.dumps(act, default=str) + "\n") + + _rewrite_actions(data_dir, workspace_id, keep_entries) + + log.info( + "action_queue.compacted", + workspace_id=workspace_id, + archived=len(archive_entries), + kept=len(keep_entries), + ) + return True + + +# --------------------------------------------------------------------------- +# Convenience: queue from proactive insight +# --------------------------------------------------------------------------- + + +def queue_from_insight( + data_dir: str, + workspace_id: str, + *, + insight_category: str, + insight_title: str, + insight_detail: str = "", + suggested_colony: dict[str, Any] | None = None, + blast_radius: float = 0.0, + estimated_cost: float = 0.0, + confidence: float = 0.0, + reason: str = "", + self_rejected: bool = False, +) -> dict[str, Any]: + """Create and persist an action from a proactive intelligence insight.""" + action = create_action( + kind="maintenance", + title=insight_title, + detail=insight_detail, + source_category=insight_category, + rationale=reason, + payload={"suggested_colony": suggested_colony} if suggested_colony else {}, + estimated_cost=estimated_cost, + blast_radius=blast_radius, + confidence=confidence, + requires_approval=not self_rejected, + created_by="proactive_intelligence", + ) + if self_rejected: + action["status"] = STATUS_SELF_REJECTED + action["operator_reason"] = reason + append_action(data_dir, workspace_id, action) + return action diff --git a/src/formicos/surface/addon_loader.py b/src/formicos/surface/addon_loader.py index af3f9df..7da4c90 100644 --- a/src/formicos/surface/addon_loader.py +++ b/src/formicos/surface/addon_loader.py @@ -49,6 +49,16 @@ class AddonTriggerSpec(BaseModel): handler: str = Field(..., description="module.py::function_name") +class AddonConfigParam(BaseModel): + """A configurable parameter declared by an addon.""" + + key: str + type: str = Field(default="string", description="boolean | string | integer | cron | select") + default: Any = None + label: str = "" + options: list[str] = Field(default_factory=list) + + class AddonManifest(BaseModel): """Parsed addon.yaml manifest.""" @@ -58,10 +68,16 @@ class AddonManifest(BaseModel): author: str = "" tools: list[AddonToolSpec] = Field(default_factory=list) handlers: list[AddonHandlerSpec] = Field(default_factory=list) + config: list[AddonConfigParam] = Field(default_factory=list) panels: list[dict[str, Any]] = Field(default_factory=list) templates: list[dict[str, Any]] = Field(default_factory=list) routes: list[dict[str, Any]] = Field(default_factory=list) triggers: list[AddonTriggerSpec] = Field(default_factory=list) + hidden: bool = False # Hide from operator UI (dev scaffolds) + # Wave 68 Track 5: capability metadata for Queen routing + content_kinds: list[str] = Field(default_factory=list) + path_globs: list[str] = Field(default_factory=list) + search_tool: str = Field(default="") # --------------------------------------------------------------------------- @@ -94,6 +110,19 @@ def _resolve_handler(addon_name: str, handler_ref: str) -> Callable[..., Any]: f"Addon '{addon_name}' handler function '{func_name}' " f"not found in module '{fq_module}'" ) + if not inspect.iscoroutinefunction(func): + log.warning( + "addon_loader.sync_handler", + addon=addon_name, + handler=handler_ref, + hint="Handler is not async — it will be wrapped automatically, " + "but addon handlers should be declared async.", + ) + + async def _sync_wrapper(*args: Any, **kwargs: Any) -> Any: + return func(*args, **kwargs) + + return _sync_wrapper # type: ignore[return-value] return func # type: ignore[return-value] @@ -146,7 +175,26 @@ def __init__(self, manifest: AddonManifest) -> None: self.manifest = manifest self.registered_tools: list[str] = [] self.registered_handlers: list[str] = [] + self.registered_routes: list[dict[str, Any]] = [] + self.registered_panels: list[dict[str, Any]] = [] self.runtime_context: dict[str, Any] = {} + # Wave 66 T1: health monitoring counters + self.tool_call_counts: dict[str, int] = {} + self.last_tool_call: str | None = None + self.handler_error_count: int = 0 + self.last_handler_fire: str | None = None + self.last_error: str | None = None + self.trigger_fire_times: dict[str, str | None] = {} + self.disabled: bool = False + + @property + def health_status(self) -> str: + """Derive health from error counts: 0=healthy, 1-2=degraded, 3+=error.""" + if self.handler_error_count >= 3: + return "error" + if self.handler_error_count >= 1: + return "degraded" + return "healthy" def register_addon( @@ -198,7 +246,16 @@ async def _tool_wrapper( _bound_fn: Callable[..., Any] = _fn, _pass_ctx: bool = _accepts_ctx, _bound_ctx: dict[str, Any] = _ctx, + _tool_name: str = tool_spec.name, + _reg: AddonRegistration = result, ) -> Any: + if _reg.disabled: + return f"Addon '{_reg.manifest.name}' is currently disabled." + from datetime import UTC, datetime + _reg.tool_call_counts[_tool_name] = ( + _reg.tool_call_counts.get(_tool_name, 0) + 1 + ) + _reg.last_tool_call = datetime.now(UTC).isoformat() if _pass_ctx: return await _bound_fn( inputs, workspace_id, thread_id, @@ -239,15 +296,50 @@ async def _event_wrapper( *, _bound_efn: Callable[..., Any] = _efn, _bound_ctx: dict[str, Any] = _ctx, + _reg: AddonRegistration = result, + _evt_name: str = handler_spec.event, **kwargs: Any, ) -> Any: - return await _bound_efn(event, runtime_context=_bound_ctx, **kwargs) + if _reg.disabled: + log.debug( + "addon_loader.handler_skipped_disabled", + addon=_reg.manifest.name, + event=_evt_name, + ) + return None + from datetime import UTC, datetime + _reg.last_handler_fire = datetime.now(UTC).isoformat() + try: + return await _bound_efn( + event, runtime_context=_bound_ctx, **kwargs, + ) + except Exception: + _reg.handler_error_count += 1 + _reg.last_error = ( + f"{_evt_name}: " + f"{datetime.now(UTC).isoformat()}" + ) + raise svc_name = f"addon:{manifest.name}:{handler_spec.event}" service_router.register_handler(svc_name, _event_wrapper) else: + _plain_efn = handler_fn + _plain_reg = result + + async def _plain_event_wrapper( + event: Any, + *, + _bound_efn: Callable[..., Any] = _plain_efn, + _reg: AddonRegistration = _plain_reg, + **kwargs: Any, + ) -> Any: + if _reg.disabled: + return None + return await _bound_efn(event, **kwargs) + svc_name = f"addon:{manifest.name}:{handler_spec.event}" - service_router.register_handler(svc_name, handler_fn) + service_router.register_handler(svc_name, _plain_event_wrapper) result.registered_handlers.append(svc_name) log.info( @@ -264,19 +356,57 @@ async def _event_wrapper( exc_info=True, ) - # Warn about declared-but-unimplemented manifest fields - for field_name in ("panels", "templates", "routes"): - field_val = getattr(manifest, field_name, []) - if field_val: + # Register routes + for route_spec in manifest.routes: + handler_ref = route_spec.get("handler", "") + route_path = route_spec.get("path", "") + if not handler_ref or not route_path: + continue + try: + handler_fn = _resolve_handler(manifest.name, handler_ref) + result.registered_routes.append({ + "path": route_path, + "handler": handler_fn, + "addon_name": manifest.name, + }) + log.info( + "addon_loader.route_registered", + addon=manifest.name, + path=route_path, + ) + except Exception: # noqa: BLE001 log.warning( - "addon_loader.unimplemented_field", + "addon_loader.route_registration_failed", addon=manifest.name, - field=field_name, - count=len(field_val), - hint=f"Addon '{manifest.name}' declares {field_name} but " - f"{field_name} registration is not yet implemented.", + path=route_path, + exc_info=True, ) + # Register panels + for panel_spec in manifest.panels: + result.registered_panels.append({ + "target": panel_spec.get("target", ""), + "display_type": panel_spec.get("display_type", "status_card"), + "path": panel_spec.get("path", ""), + "addon_name": manifest.name, + }) + log.info( + "addon_loader.panel_registered", + addon=manifest.name, + target=panel_spec.get("target", ""), + ) + + # Warn about templates (still unimplemented) + if manifest.templates: + log.warning( + "addon_loader.unimplemented_field", + addon=manifest.name, + field="templates", + count=len(manifest.templates), + hint=f"Addon '{manifest.name}' declares templates but " + "templates registration is not yet implemented.", + ) + return result diff --git a/src/formicos/surface/app.py b/src/formicos/surface/app.py index cf74721..178b341 100644 --- a/src/formicos/surface/app.py +++ b/src/formicos/surface/app.py @@ -15,7 +15,7 @@ import os from collections.abc import AsyncGenerator from contextlib import AsyncExitStack, asynccontextmanager -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from pathlib import Path from typing import TYPE_CHECKING, Any @@ -46,7 +46,10 @@ from formicos.surface.colony_manager import ColonyManager from formicos.surface.mcp_server import MCP_TOOL_NAMES, create_mcp_server from formicos.surface.projections import ProjectionStore -from formicos.surface.queen_runtime import QueenAgent +from formicos.surface.queen_runtime import ( + QueenAgent, + _parse_projection_timestamp, # pyright: ignore[reportPrivateUsage] +) from formicos.surface.registry import CapabilityRegistry, ProtocolEntry, ToolEntry from formicos.surface.routes import ( a2a_routes, @@ -736,6 +739,23 @@ async def lifespan(_app: Starlette) -> AsyncGenerator[None]: _addons_dir = Path("/app/addons") # Docker layout _addon_manifests = discover_addons(_addons_dir) _addon_registrations = [] + # Wave 70.0: initialise MCP bridge (lazy connections, no I/O here) + from formicos.addons.mcp_bridge.client import McpBridge # noqa: PLC0415 + + _mcp_bridge = McpBridge() + # Load server config from settings if available + import json as _json # noqa: PLC0415 + _mcp_servers_raw = getattr( + getattr(runtime, "settings", None), "mcp_servers", "[]", + ) + if isinstance(_mcp_servers_raw, str): + try: + _mcp_servers_raw = _json.loads(_mcp_servers_raw) + except (ValueError, TypeError): + _mcp_servers_raw = [] + if isinstance(_mcp_servers_raw, list): + _mcp_bridge.configure(_mcp_servers_raw) + _addon_runtime_context: dict[str, Any] = { "vector_port": getattr(runtime, "memory_store", None), "embed_fn": getattr(runtime, "embed_fn", None), @@ -746,6 +766,8 @@ async def lifespan(_app: Starlette) -> AsyncGenerator[None]: "settings": getattr(runtime, "settings", {}), "projections": getattr(runtime, "projections", None), "runtime": runtime, + "mcp_bridge": _mcp_bridge, + "get_bridge_health": _mcp_bridge.get_bridge_health, } for _manifest in _addon_manifests: _reg = register_addon( @@ -766,7 +788,7 @@ async def lifespan(_app: Starlette) -> AsyncGenerator[None]: version=_manifest.version, tools=_reg.registered_tools, handlers=_reg.registered_handlers, - panels=[p.get("id", "") for p in _manifest.panels], + panels=[p.get("target", "") for p in _manifest.panels], )) log.info( "app.addon_loaded", @@ -778,8 +800,12 @@ async def lifespan(_app: Starlette) -> AsyncGenerator[None]: # Expose addon tool specs for Queen tool list and wire into dispatcher app.state.addon_tool_specs = build_addon_tool_specs(_addon_manifests) # type: ignore[attr-defined] app.state.addon_manifests = _addon_manifests # type: ignore[attr-defined] + app.state.addon_registrations = _addon_registrations # type: ignore[attr-defined] + runtime.addon_registrations = _addon_registrations # type: ignore[attr-defined] + ws_manager._addon_registrations = _addon_registrations # noqa: SLF001 # pyright: ignore[reportPrivateUsage] if queen is not None: queen._tool_dispatcher._addon_tool_specs = app.state.addon_tool_specs # pyright: ignore[reportPrivateUsage] + queen._tool_dispatcher._addon_manifests = _addon_manifests # pyright: ignore[reportPrivateUsage] queen._tool_dispatcher._addon_runtime_context = _addon_runtime_context # pyright: ignore[reportPrivateUsage] # Wave 65 Track 4: wire TriggerDispatcher for addon cron/manual triggers @@ -822,6 +848,16 @@ async def _trigger_loop() -> None: params=list(sig.parameters.keys()), ) continue + # Track trigger fire time on registration + _match_reg = next( + (r for r in _addon_registrations + if r.manifest.name == addon_name), + None, + ) + if _match_reg is not None: + _match_reg.trigger_fire_times[handler_ref] = ( + datetime.now(UTC).isoformat() + ) await runtime.emit_and_broadcast(ServiceTriggerFired( seq=0, timestamp=datetime.now(UTC), @@ -848,6 +884,7 @@ async def _trigger_loop() -> None: ) _maint_dispatcher = MaintenanceDispatcher(runtime) + app.state.maintenance_dispatcher = _maint_dispatcher # type: ignore[attr-defined] # Wave 30 A7: scheduled maintenance timer _maint_interval = int(os.environ.get( @@ -858,7 +895,10 @@ async def _maintenance_loop( router: Any = service_router, interval_s: int = _maint_interval, ) -> None: - """Periodic dispatch of consolidation services + proactive briefings.""" + """Periodic dispatch of consolidation services only. + + Proactive dispatch moved to _operational_sweep_loop (Wave 72). + """ while True: await asyncio.sleep(interval_s) for svc in [ @@ -878,13 +918,205 @@ async def _maintenance_loop( service=svc, ) - # Wave 45.5: evaluate proactive briefings and dispatch - # forage signals + maintenance colonies for each workspace - await _maint_dispatcher.run_proactive_dispatch() - _maint_task = asyncio.create_task(_maintenance_loop()) _maint_task.add_done_callback(_log_task_exception) + # Wave 71.0 Track 6: operational sweep loop (30-minute default) + _ops_sweep_interval = int(os.environ.get( + "FORMICOS_OPS_SWEEP_INTERVAL_S", "1800", + )) + + async def _operational_sweep_loop( + interval_s: int = _ops_sweep_interval, + ) -> None: + """Consolidated operational sweep (Wave 72 Track 6). + + Order: + 1. run_proactive_dispatch() + 2. Team A scan_knowledge_for_review() — stub until Team A lands + 3. queue_continuation_proposals() + 4. execute_idle_continuations() + 5. Team C extract_workflow_patterns() — stub until Team C lands + 6. Team C detect_operator_patterns() — stub until Team C lands + 7. Approved-action processing + compaction + """ + while True: + await asyncio.sleep(interval_s) + data_dir_str = str(data_dir) + workspace_ids = list( + runtime.projections.workspaces.keys(), + ) + + # --- Step 1: Proactive dispatch (moved from _maintenance_loop) --- + try: + await _maint_dispatcher.run_proactive_dispatch() + except Exception: # noqa: BLE001 + log.debug("ops_sweep.proactive_dispatch_failed") + + # --- Step 2: Team A knowledge review (stub) --- + for ws_id in workspace_ids: + try: + from formicos.surface.knowledge_review import ( # noqa: PLC0415 + scan_knowledge_for_review, + ) + + await scan_knowledge_for_review( + data_dir_str, ws_id, runtime.projections, + briefing_insights=getattr( + _maint_dispatcher, "last_briefing_insights", {}, + ).get(ws_id), + ) + except ImportError: + pass # Team A not landed yet + except Exception: # noqa: BLE001 + log.debug( + "ops_sweep.knowledge_review_failed", + workspace_id=ws_id, + ) + + # --- Steps 3-4: Continuation proposals + idle execution --- + for ws_id in workspace_ids: + try: + from formicos.surface.continuation import ( # noqa: PLC0415 + execute_idle_continuations, + queue_continuation_proposals, + ) + + await queue_continuation_proposals( + data_dir_str, ws_id, + runtime.projections, _maint_dispatcher, + ) + await execute_idle_continuations( + data_dir_str, ws_id, + runtime.projections, _maint_dispatcher, + ) + except Exception: # noqa: BLE001 + log.debug( + "ops_sweep.continuation_failed", + workspace_id=ws_id, + ) + + # --- Steps 5-6: Team C workflow/operator patterns (stubs) --- + for ws_id in workspace_ids: + try: + from formicos.surface.workflow_learning import ( # noqa: PLC0415 + detect_operator_patterns, + extract_workflow_patterns, + ) + + await extract_workflow_patterns( + data_dir_str, ws_id, runtime.projections, + ) + await detect_operator_patterns( + data_dir_str, ws_id, runtime.projections, + ) + except ImportError: + pass # Team C not landed yet + except Exception: # noqa: BLE001 + log.debug( + "ops_sweep.workflow_learning_failed", + workspace_id=ws_id, + ) + + # --- Step 7: Approved-action processing + compaction --- + try: + from formicos.surface.action_queue import ( # noqa: PLC0415 + STATUS_APPROVED, + STATUS_EXECUTED, + STATUS_FAILED, + compact_action_log, + update_action, + ) + from formicos.surface.action_queue import ( + read_actions as _read_actions, + ) + + for ws_id in workspace_ids: + compact_action_log(data_dir_str, ws_id) + + actions = _read_actions(data_dir_str, ws_id) + for act in actions: + if act.get("status") != STATUS_APPROVED: + continue + sc = act.get("payload", {}).get( + "suggested_colony", + ) + if not sc: + continue + try: + from datetime import ( # noqa: PLC0415 + UTC as _UTC, + ) + from datetime import ( + datetime as _dt, + ) + + from formicos.core.types import ( # noqa: PLC0415 + CasteSlot as _CS, + ) + + await runtime.spawn_colony( + workspace_id=ws_id, + thread_id=act.get( + "thread_id", "maintenance", + ) or "maintenance", + task=sc.get( + "task", act.get("title", ""), + ), + castes=[ + _CS( + caste=sc.get( + "caste", "researcher", + ), + ), + ], + strategy=sc.get( + "strategy", "sequential", + ), + max_rounds=sc.get("max_rounds", 3), + ) + update_action( + data_dir_str, ws_id, + act["action_id"], + { + "status": STATUS_EXECUTED, + "executed_at": _dt.now( + _UTC, + ).isoformat(), + }, + ) + except Exception: # noqa: BLE001 + update_action( + data_dir_str, ws_id, + act["action_id"], + {"status": STATUS_FAILED}, + ) + except Exception: # noqa: BLE001 + log.debug("ops_sweep.action_processing_failed") + + # --- Step 8: Post sweep observations to display board --- + try: + from formicos.surface.operational_state import ( # noqa: PLC0415 + post_sweep_observations as _post_sweep, + ) + from formicos.surface.operations_coordinator import ( # noqa: PLC0415 + build_operations_summary as _build_ops, + ) + + for ws_id in workspace_ids: + _ops_summary = _build_ops( + data_dir_str, ws_id, runtime.projections, + ) + _post_sweep( + data_dir_str, ws_id, _ops_summary, + runtime.projections, + ) + except Exception: # noqa: BLE001 + log.debug("ops_sweep.display_board_posting_failed") + + _ops_sweep_task = asyncio.create_task(_operational_sweep_loop()) + _ops_sweep_task.add_done_callback(_log_task_exception) + # Telemetry bus (Wave 17 A2): start with JSONL sink + optional OTel from formicos.adapters.telemetry_jsonl import JSONLSink from formicos.engine.telemetry_bus import get_telemetry_bus @@ -921,6 +1153,24 @@ async def _maintenance_loop( addon_name=_reg.manifest.name, reason="shutdown", )) + # Wave 68: emit session summaries for recently active threads + _cutoff = datetime.now(UTC) - timedelta(minutes=30) + for _ws_id, _ws in projections.workspaces.items(): + for _tid, _thr in _ws.threads.items(): + if not _thr.queen_messages: + continue + _last_msg = _thr.queen_messages[-1] + _ts = _parse_projection_timestamp(_last_msg.timestamp) + if _ts and _ts > _cutoff: + try: + queen.emit_session_summary(_ws_id, _tid) + except Exception: + log.warning( + "shutdown.session_summary_failed", + workspace_id=_ws_id, + thread_id=_tid, + ) + await telemetry_bus.stop() await event_store.close() log.info("app.stopped") @@ -950,6 +1200,39 @@ async def websocket_handler(ws: WebSocket) -> None: } routes: list[Any] = [] + + # Wave 66 T3: catch-all addon route handler (resolves from app.state at request time) + async def _addon_route_handler(request: Any) -> Any: + import inspect as _ri # noqa: PLC0415 + + from starlette.responses import JSONResponse # noqa: PLC0415 + + addon_name = request.path_params.get("addon_name", "") + route_path = "/" + request.path_params.get("path", "") + regs: list[Any] = getattr(request.app.state, "addon_registrations", []) + for reg in regs: + for aroute in reg.registered_routes: + if aroute["addon_name"] == addon_name and aroute["path"] == route_path: + handler_fn = aroute["handler"] + try: + _accepts = "runtime_context" in _ri.signature(handler_fn).parameters + except (ValueError, TypeError): + _accepts = False + ctx = reg.runtime_context + if _accepts: + result = await handler_fn({}, "", "", runtime_context=ctx) + else: + result = await handler_fn({}, "", "") + return JSONResponse(result) + return JSONResponse({"error": "addon route not found"}, status_code=404) + + from starlette.routing import Route # noqa: PLC0415 + routes.append(Route( + "/addons/{addon_name:str}/{path:path}", + _addon_route_handler, + methods=["GET"], + )) + routes.extend(health_routes(**shared_deps)) routes.extend(api_routes(**shared_deps)) routes.extend(colony_io_routes(**shared_deps)) diff --git a/src/formicos/surface/colony_manager.py b/src/formicos/surface/colony_manager.py index ccb5dd8..445916a 100644 --- a/src/formicos/surface/colony_manager.py +++ b/src/formicos/surface/colony_manager.py @@ -33,6 +33,7 @@ RunnerCallbacks, ToolExecutionResult, ) +from formicos.engine.scoring_math import rescale_preserving_mean from formicos.engine.service_router import ServiceRouter from formicos.engine.strategies.sequential import SequentialStrategy from formicos.engine.strategies.stigmergic import StigmergicStrategy @@ -1499,7 +1500,7 @@ async def _hook_confidence_update( colony_proj, "knowledge_accesses", [], ) for trace in accesses: - for item in trace.get("items", []): + for rank, item in enumerate(trace.get("items", [])): item_id = item.get("id", "") if not item_id or item_id in seen_ids: continue @@ -1512,6 +1513,11 @@ async def _hook_confidence_update( old_alpha = float(entry.get("conf_alpha", PRIOR_ALPHA)) old_beta = float(entry.get("conf_beta", PRIOR_BETA)) + # Wave 67: geometric credit 0.7^rank (Position-Based Model) + # [1.0, 0.7, 0.49, 0.34, 0.24, ...] — models declining + # attention better than harmonic 1/(rank+1). + credit = 0.7 ** rank + # Wave 32 A1: time-based gamma-decay (ADR-041 D1) event_ts = _now() last_updated = entry.get( @@ -1539,7 +1545,9 @@ async def _hook_confidence_update( if succeeded: # Wave 37 1B: quality-aware delta replaces flat +1 - delta_alpha = min(max(0.5 + quality_score, 0.5), 1.5) + # Wave 67: scaled by rank credit + base_delta = min(max(0.5 + quality_score, 0.5), 1.5) + delta_alpha = base_delta * credit new_alpha = max(decayed_alpha + delta_alpha, 1.0) new_beta = max(decayed_beta, 1.0) @@ -1558,10 +1566,17 @@ async def _hook_confidence_update( # Wave 37 1B: quality-aware failure penalty # Low quality (near 0) â†' higher penalty (1.5) # quality_score is 0 on failure path, so penalty is 1.0 + # Wave 67: scaled by rank credit failure_penalty = 1.0 - quality_score - delta_beta = min(max(0.5 + failure_penalty, 0.5), 1.5) + base_delta = min(max(0.5 + failure_penalty, 0.5), 1.5) + delta_beta = base_delta * credit new_alpha = max(decayed_alpha, 1.0) new_beta = max(decayed_beta + delta_beta, 1.0) + + # Wave 67: cap effective sample size at 150 (ADR-049) + new_alpha, new_beta = rescale_preserving_mean( + new_alpha, new_beta, + ) new_confidence = new_alpha / (new_alpha + new_beta) address = ( diff --git a/src/formicos/surface/continuation.py b/src/formicos/surface/continuation.py new file mode 100644 index 0000000..8943ace --- /dev/null +++ b/src/formicos/surface/continuation.py @@ -0,0 +1,356 @@ +"""Autonomous continuation engine (Wave 72 Track 5 + 7). + +Queues continuation proposals from stalled/idle threads and executes +low-risk continuations during operator idle time. All actions flow +through the existing action queue and ``approve_action()`` contract. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import structlog + +from formicos.surface.action_queue import ( + STATUS_APPROVED, + STATUS_EXECUTED, + STATUS_FAILED, + STATUS_PENDING_REVIEW, + append_action, + create_action, + read_actions, + update_action, +) +from formicos.surface.operational_state import append_journal_entry +from formicos.surface.operations_coordinator import build_operations_summary +from formicos.surface.self_maintenance import estimate_blast_radius + +if TYPE_CHECKING: + from formicos.surface.projections import ProjectionStore + from formicos.surface.self_maintenance import MaintenanceDispatcher + +log = structlog.get_logger() + +# Default idle threshold (minutes) before autonomous continuation kicks in. +_DEFAULT_IDLE_THRESHOLD_MINUTES = 60 + + +# --------------------------------------------------------------------------- +# Track 5: Continuation Proposals +# --------------------------------------------------------------------------- + + +async def queue_continuation_proposals( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + dispatcher: MaintenanceDispatcher, +) -> int: + """Queue continuation actions for work that is ready to resume. + + Returns the number of newly queued proposals. + """ + if not data_dir: + return 0 + + summary = build_operations_summary(data_dir, workspace_id, projections) + + # Guard 1: operator recently active — do not queue + if summary.get("operator_active", False): + return 0 + + candidates = summary.get("continuation_candidates", []) + if not candidates: + return 0 + + # Dedupe: read existing pending continuation actions by thread_id + existing_actions = read_actions(data_dir, workspace_id) + pending_thread_ids: set[str] = set() + for act in existing_actions: + if ( + act.get("kind") == "continuation" + and act.get("status") == STATUS_PENDING_REVIEW + ): + tid = act.get("thread_id") or act.get("payload", {}).get("thread_id", "") + if tid: + pending_thread_ids.add(tid) + + queued = 0 + for candidate in candidates: + thread_id = candidate.get("thread_id", "") + if not thread_id: + continue + if thread_id in pending_thread_ids: + continue + + description = candidate.get("description", "Continue stalled work") + priority = candidate.get("priority", "medium") + + # Estimate blast radius + blast = estimate_blast_radius( + task=description, + caste="coder", + max_rounds=3, + strategy="sequential", + workspace_id=workspace_id, + projections=projections, + ) + + action = create_action( + kind="continuation", + title=f"Continue: {description[:80]}", + detail=description, + source_category="continuation", + source_ref=thread_id, + rationale=f"Thread has pending work (priority={priority})", + payload={ + "thread_id": thread_id, + "description": description, + "priority": priority, + "blast_radius_score": blast.score, + "blast_radius_level": blast.level, + "suggested_colony": { + "task": description, + "caste": "coder", + "strategy": "sequential", + "max_rounds": 3, + }, + }, + thread_id=thread_id, + estimated_cost=0.12 * 3, # coder cost * rounds + blast_radius=blast.score, + confidence=1.0 if candidate.get("ready_for_autonomy") else 0.5, + requires_approval=True, + created_by="continuation_engine", + ) + append_action(data_dir, workspace_id, action) + pending_thread_ids.add(thread_id) + queued += 1 + + log.info( + "continuation.proposal_queued", + workspace_id=workspace_id, + thread_id=thread_id, + blast_radius=blast.score, + priority=priority, + ) + + return queued + + +# --------------------------------------------------------------------------- +# Track 7: Idle-Time Execution +# --------------------------------------------------------------------------- + + +def _get_idle_threshold(dispatcher: MaintenanceDispatcher, workspace_id: str) -> int: + """Read idle threshold from maintenance policy, with fallback.""" + try: + policy = dispatcher._get_policy(workspace_id) # pyright: ignore[reportPrivateUsage] + raw = getattr(policy, "idle_threshold_minutes", None) + if isinstance(raw, (int, float)) and raw > 0: + return int(raw) + except Exception: # noqa: BLE001 + pass + return _DEFAULT_IDLE_THRESHOLD_MINUTES + + +def _check_daily_budget( + dispatcher: MaintenanceDispatcher, + workspace_id: str, + estimated_cost: float, +) -> bool: + """Return True if the daily budget has capacity for estimated_cost.""" + try: + policy = dispatcher._get_policy(workspace_id) # pyright: ignore[reportPrivateUsage] + spent = dispatcher._daily_spend.get(workspace_id, 0.0) # pyright: ignore[reportPrivateUsage] + return (policy.daily_maintenance_budget - spent) >= estimated_cost + except Exception: # noqa: BLE001 + return False + + +async def execute_idle_continuations( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + dispatcher: MaintenanceDispatcher, + *, + max_per_sweep: int = 1, +) -> int: + """Execute low-risk continuation actions during operator idle time. + + Returns the number of continuations executed. + """ + if not data_dir: + return 0 + + # Guard 1: workspace autonomy level must be 'autonomous' + try: + policy = dispatcher._get_policy(workspace_id) # pyright: ignore[reportPrivateUsage] + except Exception: # noqa: BLE001 + return 0 + + from formicos.core.types import AutonomyLevel # noqa: PLC0415 + + if policy.autonomy_level != AutonomyLevel.autonomous: + return 0 + + # Guard 2: operator idle time exceeds threshold + summary = build_operations_summary(data_dir, workspace_id, projections) + idle_minutes = summary.get("idle_for_minutes") + threshold = _get_idle_threshold(dispatcher, workspace_id) + if idle_minutes is None or idle_minutes < threshold: + return 0 + + # Guard 3: no pending-review actions of any kind + actions = read_actions(data_dir, workspace_id) + if any(a.get("status") == STATUS_PENDING_REVIEW for a in actions): + return 0 + + # Find approved continuation actions ready for execution + approved_continuations = [ + a for a in actions + if a.get("kind") == "continuation" + and a.get("status") == STATUS_APPROVED + ] + if not approved_continuations: + return 0 + + executed = 0 + for act in approved_continuations[:max_per_sweep]: + # Guard 4: re-check blast radius at execution time + sc = act.get("payload", {}).get("suggested_colony", {}) + blast = estimate_blast_radius( + task=sc.get("task", act.get("title", "")), + caste=sc.get("caste", "coder"), + max_rounds=sc.get("max_rounds", 3), + strategy=sc.get("strategy", "sequential"), + workspace_id=workspace_id, + projections=projections, + ) + if blast.score >= 0.6: + log.info( + "continuation.idle_execution_skipped", + workspace_id=workspace_id, + action_id=act["action_id"], + reason="blast_radius_too_high", + score=blast.score, + ) + continue + + # Guard 5: daily budget check + estimated_cost = act.get("estimated_cost", 0.36) + if not _check_daily_budget(dispatcher, workspace_id, estimated_cost): + log.info( + "continuation.idle_execution_skipped", + workspace_id=workspace_id, + action_id=act["action_id"], + reason="budget_exhausted", + ) + break + + # Execute via runtime.spawn_colony + try: + from datetime import UTC, datetime # noqa: PLC0415 + + from formicos.core.types import CasteSlot # noqa: PLC0415 + + runtime = dispatcher._runtime # pyright: ignore[reportPrivateUsage] + colony_id: str = await runtime.spawn_colony( + workspace_id=workspace_id, + thread_id=act.get("thread_id") or "maintenance", + task=sc.get("task", act.get("title", "")), + castes=[CasteSlot(caste=sc.get("caste", "coder"))], + strategy=sc.get("strategy", "sequential"), + max_rounds=sc.get("max_rounds", 3), + ) + + update_action( + data_dir, workspace_id, act["action_id"], + {"status": STATUS_EXECUTED, "executed_at": datetime.now(UTC).isoformat()}, + ) + + # Increment daily spend + dispatcher._daily_spend[workspace_id] = ( # pyright: ignore[reportPrivateUsage] + dispatcher._daily_spend.get(workspace_id, 0.0) + estimated_cost # pyright: ignore[reportPrivateUsage] + ) + + # Journal the autonomous continuation + import contextlib # noqa: PLC0415 + + with contextlib.suppress(Exception): + append_journal_entry( + data_dir, workspace_id, + source="continuation", + message=( + f"Auto-executed continuation: {act.get('title', '?')[:60]} " + f"(colony={colony_id[:12]}, blast={blast.score:.2f})" + ), + ) + + executed += 1 + log.info( + "continuation.idle_execution_completed", + workspace_id=workspace_id, + action_id=act["action_id"], + colony_id=colony_id, + blast_radius=blast.score, + ) + + except Exception: # noqa: BLE001 + update_action( + data_dir, workspace_id, act["action_id"], + {"status": STATUS_FAILED}, + ) + log.debug( + "continuation.idle_execution_failed", + workspace_id=workspace_id, + action_id=act["action_id"], + ) + + return executed + + +# --------------------------------------------------------------------------- +# Track 7: Warm-start cue builder +# --------------------------------------------------------------------------- + + +def build_warm_start_cue( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + *, + max_candidates: int = 3, +) -> str: + """Build a continuation cue for the Queen's first returning-session turn. + + Returns empty string if there are no actionable candidates. + """ + if not data_dir: + return "" + + summary = build_operations_summary(data_dir, workspace_id, projections) + candidates = summary.get("continuation_candidates", []) + if not candidates: + return "" + + lines: list[str] = ["# Continuation Opportunities"] + lines.append( + "The following threads have pending work and no active colony. " + "Confirm which to resume, or redirect." + ) + lines.append("") + + for c in candidates[:max_candidates]: + ready = c.get("ready_for_autonomy", False) + blocked = c.get("blocked_reason", "") + priority = c.get("priority", "medium") + tag = "READY" if ready else f"BLOCKED: {blocked}" if blocked else "review" + lines.append(f"- [{tag}] (priority={priority}) {c.get('description', '?')}") + + remaining = len(candidates) - max_candidates + if remaining > 0: + lines.append(f" (+{remaining} more)") + + return "\n".join(lines) diff --git a/src/formicos/surface/hierarchy.py b/src/formicos/surface/hierarchy.py new file mode 100644 index 0000000..48461e8 --- /dev/null +++ b/src/formicos/surface/hierarchy.py @@ -0,0 +1,129 @@ +"""Knowledge hierarchy utilities — branch confidence aggregation. + +Wave 67: materialized-path hierarchy on knowledge entry projections. +See ADR-049 for design rationale. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from formicos.surface.projections import ProjectionStore + + +def compute_branch_confidence( + store: ProjectionStore, + path_prefix: str, + workspace_id: str = "", +) -> dict[str, Any]: + """Aggregate Beta confidence for entries under a hierarchy branch. + + Returns ``{"alpha": float, "beta": float, "count": int, "mean": float}``. + Sums children's evidence (subtracting the Beta(5,5) prior from each), + re-adds a single prior, and caps effective sample size at 150. + + ESS 150 is mathematically equivalent to exponential decay with + gamma ≈ 0.993. Balances stability with responsiveness per production + Thompson Sampling literature. + """ + total_alpha = 0.0 + total_beta = 0.0 + count = 0 + for entry in store.memory_entries.values(): + if entry.get("entry_type") == "topic": + continue # don't count synthetic nodes + if workspace_id and entry.get("workspace_id") != workspace_id: + continue + hp = entry.get("hierarchy_path", "/") + if hp.startswith(path_prefix): + total_alpha += entry.get("conf_alpha", 5.0) - 5.0 + total_beta += entry.get("conf_beta", 5.0) - 5.0 + count += 1 + agg_alpha = max(5.0 + total_alpha, 1.0) + agg_beta = max(5.0 + total_beta, 1.0) + ess = agg_alpha + agg_beta + if ess > 150: + scale = 150.0 / ess + agg_alpha *= scale + agg_beta *= scale + mean = ( + agg_alpha / (agg_alpha + agg_beta) + if (agg_alpha + agg_beta) > 0 + else 0.5 + ) + return {"alpha": agg_alpha, "beta": agg_beta, "count": count, "mean": mean} + + +def build_knowledge_tree( + store: ProjectionStore, + workspace_id: str, +) -> list[dict[str, Any]]: + """Build a tree structure from memory_entries hierarchy paths. + + Returns a list of root branch dicts, each with nested ``children``. + Each branch includes path, label, entryCount, and confidence. + """ + # Collect all hierarchy paths for workspace entries + path_counts: dict[str, int] = {} + for entry in store.memory_entries.values(): + if entry.get("workspace_id") != workspace_id: + continue + if entry.get("entry_type") == "topic": + continue + hp = entry.get("hierarchy_path", "/") + if hp and hp != "/": + # Extract the root segment: /foo/ from /foo/ or /foo/bar/ + segments = [s for s in hp.split("/") if s] + if segments: + root_path = f"/{segments[0]}/" + path_counts[root_path] = path_counts.get(root_path, 0) + 1 + # If deeper, also count the subtopic + if len(segments) >= 2: + sub_path = f"/{segments[0]}/{segments[1]}/" + # Don't double-count in root — root counts all descendants + path_counts[sub_path] = path_counts.get(sub_path, 0) + 1 + + # Build root branches sorted by label + root_paths = sorted( + {p for p in path_counts if p.count("/") == 2}, + ) # /foo/ has exactly 2 slashes + + branches: list[dict[str, Any]] = [] + for rp in root_paths: + label = rp.strip("/") + conf = compute_branch_confidence(store, rp, workspace_id) + # Find children (subtopic paths under this root) + children: list[dict[str, Any]] = [] + child_paths = sorted( + p for p in path_counts + if p.startswith(rp) and p != rp and p.count("/") == 3 + ) + for cp in child_paths: + child_label = cp[len(rp):].strip("/") + child_conf = compute_branch_confidence(store, cp, workspace_id) + children.append({ + "path": cp, + "label": child_label, + "entryCount": child_conf["count"], + "confidence": { + "alpha": round(child_conf["alpha"], 1), + "beta": round(child_conf["beta"], 1), + "mean": round(child_conf["mean"], 2), + }, + "children": [], + }) + + branches.append({ + "path": rp, + "label": label, + "entryCount": conf["count"], + "confidence": { + "alpha": round(conf["alpha"], 1), + "beta": round(conf["beta"], 1), + "mean": round(conf["mean"], 2), + }, + "children": children, + }) + + return branches diff --git a/src/formicos/surface/knowledge_catalog.py b/src/formicos/surface/knowledge_catalog.py index ebb86ad..3e13962 100644 --- a/src/formicos/surface/knowledge_catalog.py +++ b/src/formicos/surface/knowledge_catalog.py @@ -296,9 +296,8 @@ def _composite_key( + W["freshness"] * freshness + W["status"] * status + W["thread"] * thread_bonus - # Wave 59.5: graph_proximity only has real values in _search_thread_boosted; - # here it's always 0.0 to keep the weight dict consistent across both paths. - + W.get("graph_proximity", 0.0) * 0.0 + # Wave 67.5: graph_proximity now populated by _search_vector via PPR + + W.get("graph_proximity", 0.0) * float(item.get("_graph_proximity", 0.0)) + pin_boost ) return -(raw * fed_penalty) @@ -325,6 +324,67 @@ def __init__( _THREAD_BONUS = 1.0 # Wave 32 A3: normalized to [0,1], weight 0.08 scales contribution + # ------------------------------------------------------------------ + # Wave 67.5: shared graph scoring (ADR-050 D3) + # ------------------------------------------------------------------ + + async def _enrich_with_graph_scores( + self, + seed_entity_ids: list[str], + workspace_id: str, + ) -> dict[str, float]: + """PPR walk from seed entities, return {entry_id: proximity_score}.""" + if ( + self._kg_adapter is None + or self._projections is None + or not seed_entity_ids + ): + return {} + + try: + pr_scores = await self._kg_adapter.personalized_pagerank( + seed_entity_ids, workspace_id, + ) + except Exception: # noqa: BLE001 + log.warning("knowledge_catalog.ppr_failed") + return {} + + if not pr_scores: + return {} + + # Reverse-map entity ids -> entry ids via projections + # Build reverse lookup: node_id -> entry_id + node_to_entry: dict[str, str] = { + nid: eid + for eid, nid in self._projections.entry_kg_nodes.items() + } + result: dict[str, float] = {} + for entity_id, score in pr_scores.items(): + entry_id = node_to_entry.get(entity_id) + if entry_id: + result[entry_id] = max(result.get(entry_id, 0.0), score) + return result + + async def _compute_graph_scores( + self, + query: str, + workspace_id: str, + ) -> dict[str, float]: + """Standard-path graph scoring: embed query -> match entities -> PPR.""" + if self._kg_adapter is None: + return {} + + try: + matched = await self._kg_adapter.match_entities_by_embedding( + query, workspace_id, + ) + except Exception: # noqa: BLE001 + log.warning("knowledge_catalog.entity_match_failed") + return {} + + seed_ids = [m["id"] for m in matched if m.get("id")] + return await self._enrich_with_graph_scores(seed_ids, workspace_id) + async def search( self, query: str, @@ -410,7 +470,10 @@ async def _search_vector( else: tasks.append(_empty()) - institutional, legacy = await asyncio.gather(*tasks) + # Wave 67.5: graph scoring in parallel with vector search (ADR-050) + tasks.append(self._compute_graph_scores(query, workspace_id)) + + institutional, legacy, graph_scores = await asyncio.gather(*tasks) # Merge, deduplicate, sort seen: set[str] = set() @@ -424,12 +487,52 @@ async def _search_vector( # Wave 39: apply operator overlays (skip muted/invalidated, boost pinned) merged = self._apply_operator_overlays(merged) + # Wave 67.5: inject graph proximity scores onto items + for item in merged: + item["_graph_proximity"] = graph_scores.get(item.get("id", ""), 0.0) + # Wave 35 C2: per-workspace weights (ADR-044 D4) from formicos.surface.knowledge_constants import get_workspace_weights # noqa: PLC0415 ws_weights = get_workspace_weights(workspace_id, self._projections) merged.sort(key=lambda item: _composite_key(item, weights=ws_weights)) - return merged[:top_k] + top_results = merged[:top_k] + + # Wave 67.5: score breakdown parity with thread path + from formicos.engine.scoring_math import exploration_score # noqa: PLC0415 + from formicos.surface.trust import federated_retrieval_penalty # noqa: PLC0415 + + for item in top_results: + semantic = float(item.get("score", 0.0)) + alpha = float(item.get("conf_alpha", 5.0)) + beta_p = float(item.get("conf_beta", 5.0)) + thompson = exploration_score(alpha, beta_p) + freshness = _compute_freshness(item.get("created_at", "")) + status_bonus = _STATUS_BONUS.get(str(item.get("status", "")), 0.0) + pin_boost = float(item.get("_pin_boost", 0.0)) + fed_penalty = federated_retrieval_penalty(item) + gp = float(item.get("_graph_proximity", 0.0)) + raw = ( + ws_weights["semantic"] * semantic + + ws_weights["thompson"] * thompson + + ws_weights["freshness"] * freshness + + ws_weights["status"] * status_bonus + + ws_weights.get("graph_proximity", 0.0) * gp + + pin_boost + ) + item["_score_breakdown"] = { + "semantic": semantic, + "thompson": thompson, + "freshness": freshness, + "status": status_bonus, + "thread": 0.0, + "cooccurrence": 0.0, + "graph_proximity": gp, + "composite": raw * fed_penalty, + "weights": dict(ws_weights), + } + + return top_results def _projection_keyword_fallback( self, @@ -537,52 +640,29 @@ async def _search_thread_boosted( # Wave 39: apply operator overlays (skip muted/invalidated, boost pinned) merged = self._apply_operator_overlays(merged) - # Wave 59.5: graph-augmented retrieval — discover neighbors of top-3 + # Wave 67.5: graph-augmented retrieval via shared PPR helper (ADR-050 D3) graph_scores: dict[str, float] = {} if self._kg_adapter is not None and self._projections is not None: seed_items = sorted( merged, key=lambda x: -float(x.get("score", 0.0)), )[:3] - for seed in seed_items: - seed_entry_id = seed.get("id", "") - node_id = self._projections.entry_kg_nodes.get( - seed_entry_id, "", - ) - if not node_id: - continue - try: - neighbors = await self._kg_adapter.get_neighbors( - node_id, - workspace_id=workspace_id, - ) - for nbr in neighbors: - # Wave 60: fix node_id bug — get_neighbors() - # returns from_node/to_node, not node_id - other_node = ( - nbr["to_node"] if nbr["from_node"] == node_id - else nbr["from_node"] - ) - # Reverse lookup: find entry_id for this KG node - for eid, nid in ( - self._projections.entry_kg_nodes.items() - ): - if nid == other_node and eid not in seen: - entry_data = ( - self._projections.memory_entries.get(eid) - ) - if entry_data: - item = _normalize_institutional( - entry_data, score=0.0, - ) - merged.append(item) - seen.add(eid) - graph_scores[eid] = 1.0 - break - except Exception: # noqa: BLE001 - log.warning( - "knowledge_catalog.graph_neighbor_lookup_failed", - seed_id=seed_entry_id, - ) + seed_entity_ids = [ + self._projections.entry_kg_nodes[sid] + for sid in (s.get("id", "") for s in seed_items) + if sid and sid in self._projections.entry_kg_nodes + ] + graph_scores = await self._enrich_with_graph_scores( + seed_entity_ids, workspace_id, + ) + # Inject graph-discovered entries not yet in merged + if graph_scores: + for eid, _score in graph_scores.items(): + if eid not in seen: + entry_data = self._projections.memory_entries.get(eid) + if entry_data: + item = _normalize_institutional(entry_data, score=0.0) + merged.append(item) + seen.add(eid) # Wave 34 A3: composite sort with co-occurrence (ADR-044) cooc_scores: dict[str, float] = {} diff --git a/src/formicos/surface/knowledge_review.py b/src/formicos/surface/knowledge_review.py new file mode 100644 index 0000000..6a94071 --- /dev/null +++ b/src/formicos/surface/knowledge_review.py @@ -0,0 +1,328 @@ +"""Knowledge review scanner (Wave 72 Track 1). + +Pure scan function that queues ``kind="knowledge_review"`` actions for entries +that need human attention. Runs from the operational sweep loop; does not mutate +knowledge directly. + +Four review criteria: + 1. Outcome-correlated failures — entry accessed by 3+ colonies, >50% failed + 2. Contradictions — reuses already-generated briefing insights + 3. Stale authority — high-confidence old entries (not permanent) + 4. Unconfirmed machine-generated — influential entries with no operator confirmation +""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from typing import TYPE_CHECKING, Any + +import structlog + +from formicos.surface.action_queue import ( + STATUS_PENDING_REVIEW, + append_action, + create_action, + read_actions, +) + +if TYPE_CHECKING: + from formicos.surface.projections import ProjectionStore + +log = structlog.get_logger() + +# Thresholds +_MIN_COLONY_ACCESS = 3 +_FAILURE_RATE_THRESHOLD = 0.5 +_HIGH_CONFIDENCE_THRESHOLD = 0.75 +_STALE_DAYS = 90 +_MIN_ACCESS_FOR_INFLUENTIAL = 5 + + +def _content_preview(entry: dict[str, Any], max_len: int = 120) -> str: + content = entry.get("content", "") + if len(content) > max_len: + return content[:max_len] + "..." + return content + + +def _entry_confidence(entry: dict[str, Any]) -> float: + alpha = float(entry.get("conf_alpha", 5.0)) + beta = float(entry.get("conf_beta", 5.0)) + return alpha / (alpha + beta) if (alpha + beta) > 0 else 0.5 + + +def _existing_pending_entry_ids( + data_dir: str, + workspace_id: str, +) -> set[str]: + """Return entry_ids that already have a pending knowledge_review action.""" + actions = read_actions(data_dir, workspace_id) + ids: set[str] = set() + for act in actions: + if ( + act.get("kind") == "knowledge_review" + and act.get("status") == STATUS_PENDING_REVIEW + ): + eid = act.get("payload", {}).get("entry_id", "") + if eid: + ids.add(eid) + return ids + + +async def scan_knowledge_for_review( + data_dir: str, + workspace_id: str, + projections: ProjectionStore, + *, + briefing_insights: list[dict[str, object]] | None = None, +) -> int: + """Queue review actions for entries that need human attention. + + Returns the number of actions queued. + """ + existing = _existing_pending_entry_ids(data_dir, workspace_id) + queued = 0 + + # Build workspace entry set + entries = { + eid: e + for eid, e in projections.memory_entries.items() + if e.get("workspace_id") == workspace_id + } + + # Build per-entry outcome stats from colony outcomes + entry_colony_results: dict[str, dict[str, int]] = {} # entry_id -> {ok, fail} + for outcome in projections.colony_outcomes.values(): + if outcome.workspace_id != workspace_id: + continue + accessed = _get_accessed_entries(outcome.colony_id, projections) + for eid in accessed: + stats = entry_colony_results.setdefault(eid, {"ok": 0, "fail": 0}) + if outcome.succeeded: + stats["ok"] += 1 + else: + stats["fail"] += 1 + + now = datetime.now(UTC) + + # --- Criterion 1: Outcome-correlated failures --- + for eid, stats in entry_colony_results.items(): + if eid in existing: + continue + entry = entries.get(eid) + if entry is None: + continue + total = stats["ok"] + stats["fail"] + if total < _MIN_COLONY_ACCESS: + continue + fail_rate = stats["fail"] / total + if fail_rate <= _FAILURE_RATE_THRESHOLD: + continue + + action = create_action( + kind="knowledge_review", + title=f"Failure-correlated: {entry.get('title', eid[:12])}", + detail=f"{stats['fail']}/{total} colonies failed when using this entry", + source_category="outcome_correlation", + rationale=f"Entry accessed by {total} colonies with {fail_rate:.0%} failure rate", + payload={ + "entry_id": eid, + "title": entry.get("title", ""), + "content_preview": _content_preview(entry), + "review_reason": "outcome_correlated_failure", + "confidence": round(_entry_confidence(entry), 4), + "access_count": total, + "failure_count": stats["fail"], + "failure_rate": round(fail_rate, 4), + }, + confidence=_entry_confidence(entry), + created_by="knowledge_review_scanner", + ) + append_action(data_dir, workspace_id, action) + existing.add(eid) + queued += 1 + + # --- Criterion 2: Contradictions from briefing insights --- + if briefing_insights: + for insight in briefing_insights: + cat = str(insight.get("category", "")) + if cat != "contradiction": + continue + detail_str = str(insight.get("detail", "")) + # Extract entry IDs from insight metadata + entry_ids = _extract_entry_ids_from_insight(insight, entries) + for eid in entry_ids: + if eid in existing: + continue + entry = entries.get(eid) + if entry is None: + continue + action = create_action( + kind="knowledge_review", + title=f"Contradiction: {entry.get('title', eid[:12])}", + detail=detail_str or "Entry involved in a contradiction", + source_category="contradiction", + rationale="Detected via briefing contradiction analysis", + payload={ + "entry_id": eid, + "title": entry.get("title", ""), + "content_preview": _content_preview(entry), + "review_reason": "contradiction", + "confidence": round(_entry_confidence(entry), 4), + "access_count": _get_access_count(eid, projections), + }, + confidence=_entry_confidence(entry), + created_by="knowledge_review_scanner", + ) + append_action(data_dir, workspace_id, action) + existing.add(eid) + queued += 1 + + # --- Criterion 3: Stale authority --- + stale_cutoff = now - timedelta(days=_STALE_DAYS) + for eid, entry in entries.items(): + if eid in existing: + continue + if entry.get("decay_class") == "permanent": + continue + conf = _entry_confidence(entry) + if conf < _HIGH_CONFIDENCE_THRESHOLD: + continue + # Check last_accessed from usage tracking + usage = projections.knowledge_entry_usage.get(eid, {}) + last_accessed_str = usage.get("last_accessed", "") + if not last_accessed_str: + # Never accessed — use created_at + last_accessed_str = entry.get("created_at", "") + if not last_accessed_str: + continue + try: + last_accessed = datetime.fromisoformat(last_accessed_str.replace("Z", "+00:00")) + except (ValueError, AttributeError): + continue + if last_accessed >= stale_cutoff: + continue + + access_count = usage.get("count", 0) + action = create_action( + kind="knowledge_review", + title=f"Stale authority: {entry.get('title', eid[:12])}", + detail=f"High-confidence entry not accessed in {_STALE_DAYS}+ days", + source_category="stale_authority", + rationale=f"Confidence {conf:.2f} but last accessed {last_accessed_str[:10]}", + payload={ + "entry_id": eid, + "title": entry.get("title", ""), + "content_preview": _content_preview(entry), + "review_reason": "stale_authority", + "confidence": round(conf, 4), + "access_count": access_count, + }, + confidence=conf, + created_by="knowledge_review_scanner", + ) + append_action(data_dir, workspace_id, action) + existing.add(eid) + queued += 1 + + # --- Criterion 4: Unconfirmed machine-generated --- + confirmed_entries = projections.operator_overlays.pinned_entries + for eid, entry in entries.items(): + if eid in existing: + continue + # Skip if operator has confirmed (pinned = confirmation signal) + if eid in confirmed_entries: + continue + # Must be machine-generated (not operator-created) + if entry.get("created_by") == "operator": + continue + # Must be influential (accessed often) + usage = projections.knowledge_entry_usage.get(eid, {}) + access_count = usage.get("count", 0) + if access_count < _MIN_ACCESS_FOR_INFLUENTIAL: + continue + + conf = _entry_confidence(entry) + action = create_action( + kind="knowledge_review", + title=f"Unconfirmed: {entry.get('title', eid[:12])}", + detail=f"Machine-generated entry accessed {access_count} times, never confirmed", + source_category="unconfirmed_machine", + rationale=f"Influential entry (accessed {access_count}x) with no operator confirmation", + payload={ + "entry_id": eid, + "title": entry.get("title", ""), + "content_preview": _content_preview(entry), + "review_reason": "unconfirmed_machine_generated", + "confidence": round(conf, 4), + "access_count": access_count, + }, + confidence=conf, + created_by="knowledge_review_scanner", + ) + append_action(data_dir, workspace_id, action) + existing.add(eid) + queued += 1 + + if queued > 0: + log.info( + "knowledge_review.scan_complete", + workspace_id=workspace_id, + actions_queued=queued, + ) + + return queued + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _get_accessed_entries( + colony_id: str, + projections: ProjectionStore, +) -> list[str]: + """Get entry IDs accessed by a colony from access records.""" + result: list[str] = [] + for eid, usage in projections.knowledge_entry_usage.items(): + # Usage tracks per-entry, not per-colony — use colony outcomes + # to infer which entries were accessed by checking access events + colonies = usage.get("colonies", []) + if colony_id in colonies: + result.append(eid) + # Fallback: if colonies not tracked in usage, use the outcome's entries_accessed + if not result: + outcome = projections.colony_outcomes.get(colony_id) + if outcome and outcome.entries_accessed > 0: + # Cannot map individual entries without colony-level tracking; + # return empty — this criterion requires per-colony access data + pass + return result + + +def _get_access_count(entry_id: str, projections: ProjectionStore) -> int: + usage = projections.knowledge_entry_usage.get(entry_id, {}) + return usage.get("count", 0) + + +def _extract_entry_ids_from_insight( + insight: dict[str, object], + entries: dict[str, dict[str, Any]], +) -> list[str]: + """Extract entry IDs from a contradiction insight.""" + result: list[str] = [] + # Insights may have entry_ids directly + raw_ids: object = insight.get("entry_ids", []) + if isinstance(raw_ids, list): + for raw_eid in raw_ids: # type: ignore[reportUnknownVariableType] + eid = str(raw_eid) # type: ignore[reportUnknownArgumentType] + if eid in entries: + result.append(eid) + # Or they may reference entries in detail text + if not result: + detail = str(insight.get("detail", "")) + for eid in entries: + if eid in detail: + result.append(eid) + return result diff --git a/src/formicos/surface/mcp_server.py b/src/formicos/surface/mcp_server.py index 84f36b2..7e36716 100644 --- a/src/formicos/surface/mcp_server.py +++ b/src/formicos/surface/mcp_server.py @@ -41,6 +41,16 @@ "query_service", "activate_service", "chat_colony", + # Wave 35 + "set_maintenance_policy", + "get_maintenance_policy", + "configure_scoring", + # Wave 73 + "addon_status", + "toggle_addon", + "trigger_addon", + "log_finding", + "handoff_to_formicos", ) _RO = ToolAnnotations(readOnlyHint=True, destructiveHint=False) @@ -543,6 +553,264 @@ async def configure_scoring( )) return {"status": "updated", "weights": new_weights} + # ----------------------------------------------------------------------- + # Wave 73 Track 3: Addon control MCP tools + # ----------------------------------------------------------------------- + + @mcp.tool(annotations=_RO) + async def addon_status(workspace_id: str = "") -> list[dict[str, Any]]: + """List installed addons with health status, tool counts, and errors.""" + regs: list[Any] = runtime.addon_registrations or [] + result: list[dict[str, Any]] = [] + for reg in regs: + manifest = reg.manifest + if getattr(manifest, "hidden", False): + continue + result.append({ + "name": manifest.name, + "version": getattr(manifest, "version", ""), + "description": getattr(manifest, "description", ""), + "status": getattr(reg, "health_status", "unknown"), + "disabled": getattr(reg, "disabled", False), + "tool_count": len(getattr(reg, "registered_tools", [])), + "handler_count": len(getattr(reg, "registered_handlers", [])), + "total_tool_calls": sum( + getattr(reg, "tool_call_counts", {}).values() + ), + "last_error": getattr(reg, "last_error", None), + }) + return result + + @mcp.tool(annotations=_MUT) + async def toggle_addon( + addon_name: str, + disabled: bool, + workspace_id: str = "", + ) -> dict[str, Any]: + """Enable or disable an addon. Disabled addons' tools return errors if called.""" + regs: list[Any] = runtime.addon_registrations or [] + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return to_mcp_tool_error(KNOWN_ERRORS["ADDON_NOT_FOUND"]) + reg.disabled = disabled + return {"addon": addon_name, "disabled": reg.disabled} + + @mcp.tool(annotations=_MUT) + async def trigger_addon( + addon_name: str, + handler: str, + inputs: str = "", + workspace_id: str = "", + ) -> dict[str, Any]: + """Trigger an addon handler (e.g., reindex). Same as the REST trigger endpoint.""" + import inspect as _inspect # noqa: PLC0415 + + from formicos.surface.addon_loader import ( + _resolve_handler, # noqa: PLC0415 # pyright: ignore[reportPrivateUsage] + ) + + regs: list[Any] = runtime.addon_registrations or [] + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return to_mcp_tool_error(KNOWN_ERRORS["ADDON_NOT_FOUND"]) + if getattr(reg, "disabled", False): + return to_mcp_tool_error(KNOWN_ERRORS["ADDON_NOT_FOUND"].model_copy( + update={"message": f"Addon '{addon_name}' is currently disabled"}, + )) + + try: + handler_fn = _resolve_handler(addon_name, handler) + except (ValueError, AttributeError) as exc: + return to_mcp_tool_error(KNOWN_ERRORS["INVALID_PARAMETER"].model_copy( + update={"message": str(exc)}, + )) + + import json as _json2 # noqa: PLC0415 + parsed_inputs: dict[str, Any] = {} + if inputs: + try: + parsed_inputs = _json2.loads(inputs) + except (ValueError, TypeError): + parsed_inputs = {} + + try: + sig = _inspect.signature(handler_fn) + accepts_ctx = "runtime_context" in sig.parameters + positional_params = [ + p for p in sig.parameters.values() + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.name not in ("self", "cls", "runtime_context") + ] + has_tool_args = len(positional_params) >= 3 + except (ValueError, TypeError): + accepts_ctx = False + has_tool_args = False + + try: + if has_tool_args and accepts_ctx: + result = await handler_fn( + parsed_inputs, workspace_id, "", + runtime_context=reg.runtime_context, + ) + elif has_tool_args: + result = await handler_fn(parsed_inputs, workspace_id, "") + elif accepts_ctx: + result = await handler_fn( + runtime_context=reg.runtime_context, + ) + else: + result = await handler_fn() + except Exception as exc: # noqa: BLE001 + return to_mcp_tool_error(KNOWN_ERRORS["INVALID_STATE"].model_copy( + update={"message": f"Trigger failed: {exc}"}, + )) + + return {"addon": addon_name, "handler": handler, "result": str(result)} + + # ----------------------------------------------------------------------- + # Wave 73 Track 1e-f: Mutating MCP tools (log_finding, handoff_to_formicos) + # ----------------------------------------------------------------------- + + @mcp.tool(annotations=_MUT) + async def log_finding( + title: str, + content: str, + domains: str = "", + workspace_id: str = "", + ) -> dict[str, Any]: + """Record a developer discovery as a knowledge entry. + + Creates a knowledge entry at 'candidate' status for operator review. + Domains: comma-separated list (e.g., "auth,security"). + """ + from datetime import UTC, datetime # noqa: PLC0415 + from uuid import uuid4 # noqa: PLC0415 + + from formicos.core.events import MemoryEntryCreated # noqa: PLC0415 + + # Resolve workspace + if not workspace_id: + ws_ids = list(runtime.projections.workspaces.keys()) + if not ws_ids: + return to_mcp_tool_error(KNOWN_ERRORS["WORKSPACE_NOT_FOUND"]) + workspace_id = ws_ids[0] + + domain_list = [d.strip() for d in domains.split(",") if d.strip()] if domains else [] + entry_id = f"entry-{uuid4().hex[:12]}" + now = datetime.now(UTC) + + entry_dict: dict[str, Any] = { + "entry_id": entry_id, + "title": title, + "content": content, + "entry_type": "experience", + "sub_type": "learning", + "category": "experience", + "domains": domain_list, + "status": "candidate", + "conf_alpha": 5.0, + "conf_beta": 5.0, + "decay_class": "stable", + "created_at": now.isoformat(), + "created_by": "developer_mcp", + "workspace_id": workspace_id, + "thread_id": "", + "tool_refs": [], + "confidence": 0.5, + } + + await runtime.emit_and_broadcast(MemoryEntryCreated( + seq=0, timestamp=now, address=workspace_id, + entry=entry_dict, workspace_id=workspace_id, + )) + + return { + "status": "recorded", + "entry_id": entry_id, + "title": title, + "domains": domain_list, + "review_status": "candidate", + "_next_actions": ["approve", "get_status"], + } + + @mcp.tool(annotations=_MUT) + async def handoff_to_formicos( + task: str, + context: str, + what_was_tried: str = "", + files: str = "", + workspace_id: str = "", + ) -> dict[str, Any]: + """Hand off work from the developer to FormicOS. + + Creates a thread and spawns a colony with the developer's full context + pre-loaded so the colony doesn't repeat failed approaches. + """ + from formicos.surface.self_maintenance import estimate_blast_radius # noqa: PLC0415 + + # Resolve workspace + if not workspace_id: + ws_ids = list(runtime.projections.workspaces.keys()) + if not ws_ids: + return to_mcp_tool_error(KNOWN_ERRORS["WORKSPACE_NOT_FOUND"]) + workspace_id = ws_ids[0] + + # Build enriched task + sections = [f"## Task\n{task}"] + if context: + sections.append(f"## Developer Context\n{context}") + if what_was_tried: + sections.append(f"## What Was Already Tried\n{what_was_tried}") + if files: + sections.append(f"## Relevant Files\n{files}") + enriched_task = "\n\n".join(sections) + + # Suggest team and estimate blast radius + suggestions = await runtime.suggest_team(task) + br = estimate_blast_radius(task) + + # Pick castes from suggestion + from formicos.core.types import CasteSlot # noqa: PLC0415 + castes: list[CasteSlot] = [] + if suggestions: + for s in suggestions[:3]: + castes.append(CasteSlot( + caste=s.get("caste", "coder"), + count=s.get("count", 1), + )) + if not castes: + castes = [CasteSlot(caste="coder", count=1)] + + # Create thread + thread_name = f"handoff-{task[:40].replace(' ', '-').lower()}" + thread_id = await runtime.create_thread(workspace_id, thread_name) + + # Spawn colony + colony_id = await runtime.spawn_colony( + workspace_id, thread_id, enriched_task, castes, + ) + + # Start colony in background + if runtime.colony_manager is not None: + asyncio.create_task( + runtime.colony_manager.start_colony(colony_id), + ) + + return { + "status": "handed_off", + "colony_id": colony_id, + "thread_id": thread_id, + "workspace_id": workspace_id, + "task": task, + "blast_radius": {"level": br.level, "score": br.score}, + "_next_actions": ["get_status", "chat_colony"], + "_context": {"colony_id": colony_id, "workspace_id": workspace_id}, + } + # ----------------------------------------------------------------------- # Wave 33 B5: MCP resources (5) # ----------------------------------------------------------------------- @@ -714,6 +982,283 @@ async def plan_task_prompt(goal: str, workspace_id: str) -> str: f"Available templates:\n{template_lines or ' (none)'}" ) + # ----------------------------------------------------------------------- + # Wave 73 Track 1a-d: MCP prompts (4 read-only) + # ----------------------------------------------------------------------- + + @mcp.prompt("morning-status") + async def morning_status_prompt(workspace_id: str) -> str: + """Get a complete status briefing for a workspace. + + Composes: operational summary, project plan, autonomy score, + recent colony outcomes, pending actions. Returns natural-language + markdown suitable for starting a work session. + """ + from formicos.surface.action_queue import ( # noqa: PLC0415 + list_actions as _list_actions, + ) + from formicos.surface.operations_coordinator import ( # noqa: PLC0415 + build_operations_summary as _build_ops, + ) + from formicos.surface.project_plan import ( # noqa: PLC0415 + load_project_plan as _load_plan, + ) + from formicos.surface.project_plan import ( + render_for_queen as _render_plan, + ) + from formicos.surface.self_maintenance import ( # noqa: PLC0415 + compute_autonomy_score as _autonomy, + ) + + data_dir = runtime.settings.system.data_dir + + # 1. Operational summary + ops = _build_ops(data_dir, workspace_id, runtime.projections) + + # 2. Project plan + plan = _load_plan(data_dir) + plan_text = _render_plan(plan) or "No project plan set." + + # 3. Autonomy score + auto = _autonomy(workspace_id, runtime.projections) + + # 4. Pending actions + pending = _list_actions( + data_dir, workspace_id, status="pending_review", limit=10, + ) + pending_actions = pending.get("actions", []) + + # 5. Recent colony outcomes + ws = runtime.projections.workspaces.get(workspace_id) + recent_colonies: list[dict[str, Any]] = [] + if ws is not None: + for thread in ws.threads.values(): + for colony in thread.colonies.values(): + if colony.status in ("completed", "failed"): + recent_colonies.append({ + "id": colony.id, + "status": colony.status, + "cost": colony.cost, + "round": colony.round_number, + }) + recent_colonies = recent_colonies[-5:] + + # Compose + ws_name = ws.name if ws is not None else workspace_id + parts = [f"# Status Briefing — {ws_name}\n"] + + parts.append("## Operational Health") + parts.append( + f"{ops.get('pending_review_count', 0)} actions pending review | " + f"{len(ops.get('continuation_candidates', []))} continuations available" + ) + parts.append( + f"Autonomy: {auto.grade} ({auto.score}/100)" + f"{f' — {auto.recommendation}' if auto.recommendation else ''}" + ) + + parts.append(f"\n## Project Plan\n{plan_text}") + + if pending_actions: + parts.append("\n## Pending Actions") + for a in pending_actions: + parts.append(f"- [{a.get('kind', '?')}] {a.get('title', 'Untitled')}") + else: + parts.append("\n## Pending Actions\nNone.") + + if recent_colonies: + parts.append("\n## Recent Colony Outcomes") + for c in recent_colonies: + parts.append(f"- {c['id']}: {c['status']} (${c['cost']:.2f})") + else: + parts.append("\n## Recent Colony Outcomes\nNone.") + + candidates: list[Any] = ops.get("continuation_candidates", []) + if candidates: + parts.append("\n## Continuation Candidates") + for cand in candidates: + parts.append(f"- {cand}") + + return "\n".join(parts) + + @mcp.prompt("delegate-task") + async def delegate_task_prompt( + task: str, + context: str = "", + workspace_id: str = "", + ) -> str: + """Plan a colony delegation for a task. + + Resolves workspace, suggests a team, estimates blast radius. + Returns a delegation plan — the developer confirms before spawning. + """ + from formicos.surface.self_maintenance import ( # noqa: PLC0415 + estimate_blast_radius as _blast, + ) + + if not workspace_id: + ws_ids = list(runtime.projections.workspaces.keys()) + workspace_id = ws_ids[0] if ws_ids else "default" + + suggestions = await runtime.suggest_team(task) + br = _blast(task) + + parts = ["# Delegation Plan\n"] + parts.append(f"**Task:** {task}") + parts.append(f"**Workspace:** {workspace_id}") + if context: + parts.append(f"**Context:** {context}") + + if suggestions: + parts.append("\n## Suggested Team") + for s in suggestions: + reason = s.get("reason", "") + line = f"- {s.get('caste', '?')} ×{s.get('count', 1)}" + if reason: + line += f": {reason}" + parts.append(line) + + parts.append(f"\n## Blast Radius: {br.level} ({br.score:.1f})") + for f in br.factors: + parts.append(f"- {f}") + + castes_json = ", ".join( + f'"{s.get("caste", "coder")}"' for s in (suggestions or [{"caste": "coder"}]) + ) + parts.append("\n## Next Steps") + parts.append("To spawn this colony, call the `spawn_colony` tool with:") + parts.append(f"- workspace_id: {workspace_id}") + parts.append("- thread_id: (create a new thread or use an existing one)") + parts.append(f"- task: {task}") + parts.append(f"- castes: [{castes_json}]") + + return "\n".join(parts) + + @mcp.prompt("review-overnight-work") + async def review_overnight_work_prompt(workspace_id: str) -> str: + """Review what happened while you were away. + + Shows: recently executed actions, pending review items, new knowledge + entries, colony outcomes from last 24h. + """ + from formicos.surface.action_queue import ( # noqa: PLC0415 + list_actions as _list_actions, + ) + + data_dir = runtime.settings.system.data_dir + + executed = _list_actions( + data_dir, workspace_id, status="executed", limit=20, + ) + pending = _list_actions( + data_dir, workspace_id, status="pending_review", limit=20, + ) + + # Recent knowledge entries + recent_entries: list[dict[str, Any]] = [] + for eid, entry in runtime.projections.memory_entries.items(): + if entry.get("workspace_id") != workspace_id: + continue + if entry.get("status") == "rejected": + continue + recent_entries.append({"id": eid, **entry}) + recent_entries.sort( + key=lambda e: e.get("created_at", ""), reverse=True, + ) + recent_entries = recent_entries[:10] + + parts = [f"# Overnight Review — {workspace_id}\n"] + + # Executed actions + exec_actions = executed.get("actions", []) + if exec_actions: + parts.append("## Recently Executed Actions") + for a in exec_actions: + parts.append(f"- [{a.get('kind', '?')}] {a.get('title', 'Untitled')}") + else: + parts.append("## Recently Executed Actions\nNone.") + + # Pending review + pend_actions = pending.get("actions", []) + if pend_actions: + parts.append("\n## Pending Review") + for a in pend_actions: + parts.append(f"- [{a.get('kind', '?')}] {a.get('title', 'Untitled')}") + else: + parts.append("\n## Pending Review\nNone.") + + # New knowledge + if recent_entries: + parts.append("\n## New Knowledge Entries") + for e in recent_entries: + alpha = float(e.get("conf_alpha", 5.0)) + beta = float(e.get("conf_beta", 5.0)) + conf = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.5 + parts.append( + f"- **{e.get('title', 'Untitled')}** " + f"({e.get('status', '?')}, conf: {conf:.0%})" + ) + else: + parts.append("\n## New Knowledge Entries\nNone.") + + return "\n".join(parts) + + @mcp.prompt("knowledge-for-context") + async def knowledge_for_context_prompt( + query: str, + workspace_id: str = "", + ) -> str: + """Search institutional memory and return relevant entries as prose. + + Returns top-5 knowledge entries formatted for context injection. + """ + query_lower = query.lower() + scored: list[tuple[int, str, dict[str, Any]]] = [] + for eid, entry in runtime.projections.memory_entries.items(): + if entry.get("status") == "rejected": + continue + if workspace_id and entry.get("workspace_id") != workspace_id: + continue + # Simple keyword scoring on title + content + domains + score = 0 + title = str(entry.get("title", "")).lower() + content_text = str(entry.get("content", "")).lower() + entry_domains = entry.get("domains", []) + for word in query_lower.split(): + if word in title: + score += 3 + if word in content_text: + score += 1 + if any(word in str(d).lower() for d in entry_domains): + score += 2 + if score > 0: + scored.append((score, eid, entry)) + + scored.sort(key=lambda x: x[0], reverse=True) + top = scored[:5] + + if not top: + return f"No knowledge entries found matching: {query}" + + parts = [f"# Knowledge Context: {query}\n"] + for _score, _eid, entry in top: + alpha = float(entry.get("conf_alpha", 5.0)) + beta = float(entry.get("conf_beta", 5.0)) + conf = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.5 + content_text = str(entry.get("content", ""))[:500] + entry_domains = entry.get("domains", []) + parts.append(f"## {entry.get('title', 'Untitled')} (confidence: {conf:.0%})") + parts.append(content_text) + parts.append( + f"Source: {entry.get('created_by', '?')} ({entry.get('created_at', '?')[:10]}), " + f"status: {entry.get('status', '?')}" + ) + if entry_domains: + parts.append(f"Domains: {', '.join(str(d) for d in entry_domains)}") + parts.append("") + + return "\n".join(parts) + # ----------------------------------------------------------------------- # Wave 34 B2: Proactive intelligence briefing resource # ----------------------------------------------------------------------- @@ -727,6 +1272,44 @@ async def briefing_resource(workspace_id: str) -> dict[str, Any]: briefing = _gen(workspace_id, runtime.projections) return briefing.model_dump() + # ----------------------------------------------------------------------- + # Wave 73 Track 2: MCP resources (3 new) + # ----------------------------------------------------------------------- + + @mcp.resource("formicos://plan") + async def plan_resource() -> str: + """Project plan formatted as markdown. Global to the FormicOS instance.""" + from formicos.surface.project_plan import ( # noqa: PLC0415 + load_project_plan as _load_plan, + ) + from formicos.surface.project_plan import ( + render_for_queen as _render_plan, + ) + data_dir = runtime.settings.system.data_dir + plan = _load_plan(data_dir) + rendered = _render_plan(plan) + return rendered or "No project plan configured." + + @mcp.resource("formicos://procedures/{workspace_id}") + async def procedures_resource(workspace_id: str) -> str: + """Operating procedures for a workspace, formatted as markdown.""" + from formicos.surface.operational_state import ( # noqa: PLC0415 + render_procedures_for_queen as _render_procs, + ) + data_dir = runtime.settings.system.data_dir + text = _render_procs(data_dir, workspace_id) + return text or "No operating procedures configured." + + @mcp.resource("formicos://journal/{workspace_id}") + async def journal_resource(workspace_id: str) -> str: + """Recent journal entries for a workspace, formatted as markdown.""" + from formicos.surface.operational_state import ( # noqa: PLC0415 + render_journal_for_queen as _render_journal, + ) + data_dir = runtime.settings.system.data_dir + text = _render_journal(data_dir, workspace_id, max_lines=30) + return text or "No journal entries yet." + # Activate transforms so @mcp.resource() and @mcp.prompt() # definitions are automatically exposed as tools. try: diff --git a/src/formicos/surface/memory_extractor.py b/src/formicos/surface/memory_extractor.py index e15b961..0092ef7 100644 --- a/src/formicos/surface/memory_extractor.py +++ b/src/formicos/surface/memory_extractor.py @@ -210,10 +210,26 @@ def build_extraction_prompt( '- "permanent": verified definitions, mathematical facts, immutable truths\n' 'Default to "ephemeral" if uncertain.\n' ) + # Wave 67: domain normalization via existing entry suggestion + existing_domains: set[str] = set() + if existing_entries: + for e in existing_entries[:10]: + for d in e.get("domains", []): + existing_domains.add(d) + domain_hint = "" + if existing_domains: + sorted_domains = sorted(existing_domains)[:20] + domain_hint = ( + "\nUse one of these existing domain tags if applicable " + "(do not create synonyms): " + + ", ".join(sorted_domains) + ) parts.append( f'Tag each entry with "primary_domain": "{task_class}". ' "This classifies the task context the knowledge was extracted from.\n" ) + if domain_hint: + parts.append(domain_hint) if not (existing_entries and colony_status == "completed"): parts.append( 'Return JSON: {"skills": [...], "experiences": [...]}\n' diff --git a/src/formicos/surface/memory_store.py b/src/formicos/surface/memory_store.py index 7cb695d..16d44eb 100644 --- a/src/formicos/surface/memory_store.py +++ b/src/formicos/surface/memory_store.py @@ -75,6 +75,7 @@ async def upsert_entry(self, entry: dict[str, Any]) -> None: "created_at": entry.get("created_at", ""), "trajectory_data": entry.get("trajectory_data", []), "sub_type": str(entry.get("sub_type", "")), + "hierarchy_path": entry.get("hierarchy_path", "/"), }, ) await self._vector.upsert(collection=COLLECTION_NAME, docs=[doc]) diff --git a/src/formicos/surface/model_registry_view.py b/src/formicos/surface/model_registry_view.py index aa6c500..13d95c0 100644 --- a/src/formicos/surface/model_registry_view.py +++ b/src/formicos/surface/model_registry_view.py @@ -24,6 +24,7 @@ def model_registry_view(settings: SystemSettings) -> list[dict[str, Any]]: "status": _derive_status(m.api_key_env, m.status), "costPerInputToken": m.cost_per_input_token, "costPerOutputToken": m.cost_per_output_token, + "hidden": m.hidden, } for m in settings.models.registry ] diff --git a/src/formicos/surface/operational_state.py b/src/formicos/surface/operational_state.py new file mode 100644 index 0000000..a47b258 --- /dev/null +++ b/src/formicos/surface/operational_state.py @@ -0,0 +1,382 @@ +"""Shared operational-state helper (Wave 71.0 Track 1). + +Single source of truth for workspace-scoped operational files: +- ``.formicos/operations/{workspace_id}/queen_journal.md`` +- ``.formicos/operations/{workspace_id}/operating_procedures.md`` + +Operational state is file-backed working memory, NOT institutional memory. +Do not route through ``memory_entries``. +""" + +from __future__ import annotations + +import json as _json +import re +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from formicos.surface.projections import ProjectionStore + +import structlog + +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + +_OPS_DIR = "operations" + + +def _ops_dir(data_dir: str, workspace_id: str) -> Path: + """Return the workspace-scoped operations directory.""" + return Path(data_dir) / ".formicos" / _OPS_DIR / workspace_id + + +def journal_path(data_dir: str, workspace_id: str) -> Path: + """Return the canonical journal path for a workspace.""" + return _ops_dir(data_dir, workspace_id) / "queen_journal.md" + + +def procedures_path(data_dir: str, workspace_id: str) -> Path: + """Return the canonical operating procedures path for a workspace.""" + return _ops_dir(data_dir, workspace_id) / "operating_procedures.md" + + +# --------------------------------------------------------------------------- +# Operating procedures — editable, overwritable +# --------------------------------------------------------------------------- + + +def load_procedures(data_dir: str, workspace_id: str) -> str: + """Load operating procedures text. Returns empty string if absent.""" + path = procedures_path(data_dir, workspace_id) + if not path.is_file(): + return "" + try: + return path.read_text(encoding="utf-8") + except OSError: + return "" + + +def save_procedures(data_dir: str, workspace_id: str, content: str) -> None: + """Write operating procedures (full overwrite).""" + path = procedures_path(data_dir, workspace_id) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def append_procedure_rule( + data_dir: str, + workspace_id: str, + heading: str, + rule: str, +) -> str: + """Append a rule under a markdown heading, creating section if needed. + + Returns the updated full text. + """ + text = load_procedures(data_dir, workspace_id) + lines = text.split("\n") if text else [] + + heading_line = f"## {heading}" + heading_idx = -1 + for i, line in enumerate(lines): + if line.strip() == heading_line: + heading_idx = i + break + + if heading_idx == -1: + # Add heading at end + if lines and lines[-1].strip(): + lines.append("") + lines.append(heading_line) + lines.append(f"- {rule}") + else: + # Insert after last bullet under this heading + insert_at = heading_idx + 1 + for i in range(heading_idx + 1, len(lines)): + stripped = lines[i].strip() + if stripped.startswith("#"): + break + if stripped.startswith("- ") or stripped == "": + insert_at = i + 1 + else: + break + lines.insert(insert_at, f"- {rule}") + + result = "\n".join(lines) + save_procedures(data_dir, workspace_id, result) + return result + + +# --------------------------------------------------------------------------- +# Queen journal — append-only working log +# --------------------------------------------------------------------------- + + +def append_journal_entry( + data_dir: str, + workspace_id: str, + source: str, + message: str, + *, + heading: str = "", + metadata: dict[str, Any] | None = None, +) -> None: + """Append a timestamped journal entry. + + Parameters + ---------- + source: + Short label: ``session``, ``queen``, ``maintenance``, ``operator``. + message: + Compact one-line summary (not chat transcript). + heading: + Optional heading for display-board entries. When provided, format + becomes ``- [ts] [source] [heading] message``. + metadata: + Optional JSON metadata written as an HTML comment on the next line. + Stripped from Queen prompt context by ``read_journal_tail()``. + """ + path = journal_path(data_dir, workspace_id) + path.parent.mkdir(parents=True, exist_ok=True) + + ts = datetime.now(UTC).strftime("%Y-%m-%d %H:%M") + if heading: + line = f"- [{ts}] [{source}] [{heading}] {message}\n" + else: + line = f"- [{ts}] [{source}] {message}\n" + + with path.open("a", encoding="utf-8") as f: + f.write(line) + if metadata is not None: + f.write(f" \n") + + +def read_journal_tail( + data_dir: str, + workspace_id: str, + max_lines: int = 30, +) -> str: + """Read the most recent journal entries. Returns empty string if absent. + + Metadata comment lines (````) are stripped so they don't + leak into Queen prompt context. + """ + path = journal_path(data_dir, workspace_id) + if not path.is_file(): + return "" + try: + text = path.read_text(encoding="utf-8") + except OSError: + return "" + + lines = text.strip().splitlines() + # Strip metadata comment lines for clean Queen context + clean = [ln for ln in lines if not _METADATA_COMMENT_RE.match(ln)] + tail = clean[-max_lines:] if len(clean) > max_lines else clean + return "\n".join(tail) + + +def read_journal_full(data_dir: str, workspace_id: str) -> str: + """Read the full journal text. Returns empty string if absent.""" + path = journal_path(data_dir, workspace_id) + if not path.is_file(): + return "" + try: + return path.read_text(encoding="utf-8") + except OSError: + return "" + + +# --------------------------------------------------------------------------- +# Structured journal parse (for API endpoint) +# --------------------------------------------------------------------------- + +_JOURNAL_ENTRY_RE = re.compile( + r"^- \[([^\]]+)\] \[([^\]]+)\](?: \[([^\]]+)\])? (.*)$", +) +_METADATA_COMMENT_RE = re.compile(r"^\s*$") + + +def parse_journal_entries(text: str) -> list[dict[str, Any]]: + """Parse journal text into structured entries. + + Returns dicts with keys: ``timestamp``, ``source``, ``heading`` (or None), + ``message``, and ``metadata`` (parsed JSON dict or None). + """ + entries: list[dict[str, Any]] = [] + lines = text.strip().splitlines() + for i, line in enumerate(lines): + m = _JOURNAL_ENTRY_RE.match(line.strip()) + if not m: + continue + entry: dict[str, Any] = { + "timestamp": m.group(1), + "source": m.group(2), + "heading": m.group(3), # None if legacy format + "message": m.group(4), + } + # Check next line for metadata comment + meta: dict[str, Any] | None = None + if i + 1 < len(lines): + mm = _METADATA_COMMENT_RE.match(lines[i + 1]) + if mm: + try: + meta = _json.loads(mm.group(1)) + except (ValueError, TypeError): + pass + entry["metadata"] = meta + entries.append(entry) + return entries + + +# --------------------------------------------------------------------------- +# Queen context rendering — compact injection text +# --------------------------------------------------------------------------- + + +def render_procedures_for_queen(data_dir: str, workspace_id: str) -> str: + """Render procedures as compact Queen context block. + + Returns empty string when no procedures file exists. + """ + text = load_procedures(data_dir, workspace_id) + if not text.strip(): + return "" + return f"# Operating Procedures\n{text}" + + +def render_journal_for_queen( + data_dir: str, + workspace_id: str, + max_lines: int = 20, +) -> str: + """Render recent journal entries as compact Queen context block. + + Returns empty string when no journal exists. + """ + tail = read_journal_tail(data_dir, workspace_id, max_lines=max_lines) + if not tail.strip(): + return "" + return f"# Queen Journal (recent)\n{tail}" + + +# --------------------------------------------------------------------------- +# Public API for structured reads +# --------------------------------------------------------------------------- + + +def get_journal_summary( + data_dir: str, + workspace_id: str, + max_entries: int = 50, +) -> dict[str, Any]: + """Return structured journal data for the REST endpoint.""" + text = read_journal_full(data_dir, workspace_id) + if not text.strip(): + return {"exists": False, "entries": []} + + entries = parse_journal_entries(text) + tail = entries[-max_entries:] if len(entries) > max_entries else entries + # Map to frontend-expected shape: heading + body + mapped = [ + { + "timestamp": e["timestamp"], + "heading": e.get("heading") or e["source"], + "body": e["message"], + "source": e["source"], + "metadata": e.get("metadata"), + } + for e in tail + ] + return { + "exists": True, + "totalEntries": len(entries), + "entries": mapped, + } + + +def get_procedures_summary( + data_dir: str, + workspace_id: str, +) -> dict[str, Any]: + """Return structured procedures data for the REST endpoint.""" + text = load_procedures(data_dir, workspace_id) + if not text.strip(): + return {"exists": False, "content": ""} + return {"exists": True, "content": text} + + +# --------------------------------------------------------------------------- +# Sweep auto-posting — display board population +# --------------------------------------------------------------------------- + + +def post_sweep_observations( + data_dir: str, + workspace_id: str, + summary: dict[str, Any], + projections: ProjectionStore, +) -> int: + """Post notable findings from the operational sweep to the display board. + + Returns the number of observations posted. Keeps it conservative (2-5 items). + """ + posted = 0 + + # 1. Ready continuations + candidates = summary.get("continuation_candidates", []) + ready = [c for c in candidates if isinstance(c, dict) and c.get("ready_for_autonomy")] + if ready: + append_journal_entry( + data_dir, workspace_id, source="maintenance", + message=f"{len(ready)} continuation(s) ready for autonomous execution", + heading="status:normal — Continuations ready", + metadata={"display_board": True, "type": "status", "priority": "normal"}, + ) + posted += 1 + + # 2. Pending review count + pending = summary.get("pending_review_count", 0) + if pending > 0: + append_journal_entry( + data_dir, workspace_id, source="maintenance", + message=f"{pending} action(s) awaiting operator review", + heading="status:attention — Pending reviews", + metadata={"display_board": True, "type": "status", "priority": "attention"}, + ) + posted += 1 + + # 3. Stalled threads + stalled = summary.get("stalled_thread_count", 0) + if stalled > 0: + append_journal_entry( + data_dir, workspace_id, source="maintenance", + message=f"{stalled} thread(s) appear stalled — consider reviewing or archiving", + heading="concern:attention — Stalled threads", + metadata={"display_board": True, "type": "concern", "priority": "attention"}, + ) + posted += 1 + + # 4. Failed colonies (from recent outcomes) + ws = projections.workspaces.get(workspace_id) + if ws is not None: + failed_count = 0 + for thread in ws.threads.values(): + for colony in thread.colonies.values(): + if colony.status == "failed": + failed_count += 1 + if failed_count > 0: + append_journal_entry( + data_dir, workspace_id, source="maintenance", + message=f"{failed_count} colony/colonies in failed state", + heading="concern:attention — Failed colonies", + metadata={"display_board": True, "type": "concern", "priority": "attention"}, + ) + posted += 1 + + return posted diff --git a/src/formicos/surface/operations_coordinator.py b/src/formicos/surface/operations_coordinator.py new file mode 100644 index 0000000..6c57710 --- /dev/null +++ b/src/formicos/surface/operations_coordinator.py @@ -0,0 +1,461 @@ +"""Operations coordinator — cross-artifact synthesis (Wave 71.0 Track 8). + +Inspects project plan, thread plans, session summaries, recent colony +outcomes, and queued actions to derive: + +- continuation_candidates +- sync_issues +- recent_progress +- compact counts (pending_review, stalled, active milestones) +- operator-availability signals + +This is a synthesis layer, not a second source of truth. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import structlog + +log = structlog.get_logger() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def build_operations_summary( + data_dir: str, + workspace_id: str, + projections: Any = None, +) -> dict[str, Any]: + """Build a compact operational summary from all available artifacts. + + Gracefully degrades when Team A/B helpers or projections are absent. + """ + result: dict[str, Any] = { + "workspace_id": workspace_id, + "pending_review_count": 0, + "active_milestone_count": 0, + "stalled_thread_count": 0, + "last_operator_activity_at": None, + "idle_for_minutes": None, + "operator_active": False, + "continuation_candidates": [], + "sync_issues": [], + "recent_progress": [], + } + + if not data_dir: + return result + + # -- 1. Project plan -- + _pp = _load_project_plan_safe(data_dir) + milestones: list[dict[str, Any]] = ( + _pp.get("milestones", []) if _pp.get("exists") else [] + ) + result["active_milestone_count"] = sum( + 1 for m in milestones if m.get("status") != "completed" + ) + + # -- 2. Thread plans -- + thread_plans = _load_thread_plans_safe(data_dir) + + # -- 3. Session summaries -- + sessions = _load_session_summaries_safe(data_dir) + + # -- 4. Action queue -- + actions = _load_actions_safe(data_dir, workspace_id) + result["pending_review_count"] = sum( + 1 for a in actions if a.get("status") == "pending_review" + ) + + # -- 5. Operator activity -- + result.update(_compute_operator_activity(projections, workspace_id)) + + # -- 6. Derive continuation candidates -- + result["continuation_candidates"] = _find_continuation_candidates( + thread_plans, sessions, projections, workspace_id, + ) + + # -- 7. Derive sync issues -- + result["sync_issues"] = _find_sync_issues( + milestones, thread_plans, actions, + ) + + # -- 8. Recent progress -- + result["recent_progress"] = _collect_recent_progress( + thread_plans, milestones, + ) + + # -- 9. Stalled threads -- + result["stalled_thread_count"] = _count_stalled_threads( + thread_plans, projections, workspace_id, + ) + + return result + + +def render_continuity_block(summary: dict[str, Any]) -> str: + """Render the operational summary into compact text for Queen injection. + + Returns an empty string if there is nothing useful to report. + """ + parts: list[str] = [] + + # Counts header + pending = summary.get("pending_review_count", 0) + active_ms = summary.get("active_milestone_count", 0) + stalled = summary.get("stalled_thread_count", 0) + idle_min = summary.get("idle_for_minutes") + + counts: list[str] = [] + if pending: + counts.append(f"{pending} pending review") + if active_ms: + counts.append(f"{active_ms} active milestones") + if stalled: + counts.append(f"{stalled} stalled threads") + if idle_min is not None: + counts.append(f"operator idle {idle_min}m") + + if counts: + parts.append("# Operational Loop Summary") + parts.append("Status: " + ", ".join(counts)) + + # Continuation candidates + candidates = summary.get("continuation_candidates", []) + if candidates: + parts.append("Continuations:") + for c in candidates[:3]: + ready = c.get("ready_for_autonomy", False) + reason = c.get("blocked_reason", "") + tag = "READY" if ready else f"BLOCKED: {reason}" if reason else "review" + parts.append(f" - [{tag}] {c.get('description', '?')}") + + # Sync issues + issues = summary.get("sync_issues", []) + if issues: + parts.append("Sync issues:") + for issue in issues[:3]: + parts.append(f" - {issue.get('description', '?')}") + + # Recent progress + progress = summary.get("recent_progress", []) + if progress: + parts.append("Recent:") + for p in progress[:3]: + parts.append(f" - {p.get('description', '?')}") + + if len(parts) <= 1: + return "" + + return "\n".join(parts) + + +# --------------------------------------------------------------------------- +# Internal helpers — each loads one artifact safely +# --------------------------------------------------------------------------- + + +def _load_project_plan_safe(data_dir: str) -> dict[str, Any]: + try: + from formicos.surface.project_plan import load_project_plan # noqa: PLC0415 + + return load_project_plan(data_dir) + except (ImportError, OSError, TypeError): + return {"exists": False} + + +def _load_thread_plans_safe(data_dir: str) -> list[dict[str, Any]]: + try: + from formicos.surface.thread_plan import load_all_thread_plans # noqa: PLC0415 + + return load_all_thread_plans(data_dir) + except (ImportError, OSError, TypeError): + return [] + + +def _load_session_summaries_safe(data_dir: str) -> dict[str, str]: + """Return {thread_id: summary_text} for all session files.""" + sessions: dict[str, str] = {} + sessions_dir = Path(data_dir) / ".formicos" / "sessions" + if not sessions_dir.is_dir(): + return sessions + + try: + for sf in sessions_dir.glob("*.md"): + text = sf.read_text(encoding="utf-8")[:2000] + sessions[sf.stem] = text + except OSError: + pass + + return sessions + + +def _load_actions_safe( + data_dir: str, workspace_id: str, +) -> list[dict[str, Any]]: + try: + from formicos.surface.action_queue import read_actions # noqa: PLC0415 + + return read_actions(data_dir, workspace_id) + except (ImportError, OSError, TypeError): + return [] + + +def _compute_operator_activity( + projections: Any, + workspace_id: str, +) -> dict[str, Any]: + """Derive operator idle/active signal from projections.""" + result: dict[str, Any] = { + "last_operator_activity_at": None, + "idle_for_minutes": None, + "operator_active": False, + } + + if projections is None: + return result + + ws = None + if hasattr(projections, "workspaces"): + ws = projections.workspaces.get(workspace_id) + if ws is None: + return result + + # Scan colonies for the most recent operator chat message + latest_ts: str = "" + colonies: list[Any] = [] + if hasattr(projections, "list_colonies"): + colonies = projections.list_colonies(workspace_id) + for colony_proj in colonies: + if not hasattr(colony_proj, "chat_messages"): + continue + for msg in colony_proj.chat_messages: + sender = getattr(msg, "sender", "") + ts = getattr(msg, "timestamp", "") + if sender == "operator" and ts > latest_ts: + latest_ts = ts + + if latest_ts: + result["last_operator_activity_at"] = latest_ts + try: + last_dt = datetime.fromisoformat(latest_ts.replace("Z", "+00:00")) + now = datetime.now(UTC) + delta = now - last_dt + idle_minutes = int(delta.total_seconds() / 60) + result["idle_for_minutes"] = max(0, idle_minutes) + result["operator_active"] = idle_minutes < 15 + except (ValueError, TypeError): + pass + + return result + + +# --------------------------------------------------------------------------- +# Synthesis — continuation candidates +# --------------------------------------------------------------------------- + + +def _find_continuation_candidates( + thread_plans: list[dict[str, Any]], + sessions: dict[str, str], + projections: Any, + workspace_id: str, +) -> list[dict[str, Any]]: + """Identify threads that could be continued.""" + candidates: list[dict[str, Any]] = [] + + for plan in thread_plans: + summary = plan.get("summary", {}) + total = summary.get("total", 0) + completed = summary.get("completed", 0) + failed = summary.get("failed", 0) + pending = summary.get("pending", 0) + tid = plan.get("thread_id", "") + + if pending == 0: + continue # Fully complete or empty + + # Check for active colonies on this thread + has_active_colony = _thread_has_active_colony( + projections, workspace_id, tid, + ) + + # Has a session summary? (indicates prior work) + has_session = tid in sessions + + # Check for failures + if failed > 0: + candidates.append({ + "thread_id": tid, + "description": f"Thread {tid[:12]} has {failed} failed step(s), " + f"{pending} pending", + "ready_for_autonomy": False, + "blocked_reason": "prior failures need review", + "priority": "medium", + }) + elif not has_active_colony and has_session and pending > 0: + candidates.append({ + "thread_id": tid, + "description": f"Thread {tid[:12]}: {completed}/{total} steps done, " + f"no active colony", + "ready_for_autonomy": True, + "blocked_reason": "", + "priority": "high", + }) + elif not has_active_colony and pending > 0: + candidates.append({ + "thread_id": tid, + "description": f"Thread {tid[:12]}: {pending} pending steps, " + f"no active colony, no prior session", + "ready_for_autonomy": False, + "blocked_reason": "no prior session context", + "priority": "low", + }) + + return sorted(candidates, key=lambda c: {"high": 0, "medium": 1, "low": 2}.get( + c.get("priority", "low"), 3, + )) + + +def _thread_has_active_colony( + projections: Any, + workspace_id: str, + thread_id: str, +) -> bool: + """Check if a thread currently has an active (running) colony.""" + if projections is None or not hasattr(projections, "list_colonies"): + return False + + try: + colonies = projections.list_colonies(workspace_id) + for colony in colonies: + cid = getattr(colony, "thread_id", "") + status = getattr(colony, "status", "") + if cid == thread_id and status in ("running", "spawning"): + return True + except (AttributeError, TypeError): + pass + + return False + + +# --------------------------------------------------------------------------- +# Synthesis — sync issues +# --------------------------------------------------------------------------- + + +def _find_sync_issues( + milestones: list[dict[str, Any]], + thread_plans: list[dict[str, Any]], + actions: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Identify cross-artifact sync problems.""" + issues: list[dict[str, Any]] = [] + + # Build thread_id → plan lookup + plan_by_thread: dict[str, dict[str, Any]] = {} + for plan in thread_plans: + tid = plan.get("thread_id", "") + if tid: + plan_by_thread[tid] = plan + + # Check: milestone still pending but thread plan fully complete + for ms in milestones: + if ms.get("status") == "completed": + continue + tid = ms.get("thread_id", "") + if tid and tid in plan_by_thread: + plan = plan_by_thread[tid] + summary = plan.get("summary", {}) + if summary.get("total", 0) > 0 and summary.get("pending", 0) == 0: + issues.append({ + "type": "milestone_plan_mismatch", + "description": ( + f"Milestone '{ms.get('description', '?')[:50]}' is pending " + f"but thread {tid[:12]} plan is fully complete" + ), + }) + + # Check: pending actions with no clear milestone owner + pending_actions = [a for a in actions if a.get("status") == "pending_review"] + if len(pending_actions) > 3: + active_tids = {ms.get("thread_id") for ms in milestones + if ms.get("status") != "completed" and ms.get("thread_id")} + orphan_actions = [ + a for a in pending_actions + if a.get("thread_id") and a["thread_id"] not in active_tids + ] + if orphan_actions: + issues.append({ + "type": "orphan_actions", + "description": ( + f"{len(orphan_actions)} pending action(s) reference " + f"threads with no active milestone" + ), + }) + + return issues + + +# --------------------------------------------------------------------------- +# Synthesis — recent progress +# --------------------------------------------------------------------------- + + +def _collect_recent_progress( + thread_plans: list[dict[str, Any]], + milestones: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Collect recently completed steps and milestones.""" + progress: list[dict[str, Any]] = [] + + # Recently completed milestones + for ms in milestones: + if ms.get("status") == "completed": + progress.append({ + "type": "milestone_completed", + "description": f"Milestone completed: {ms.get('description', '?')[:60]}", + }) + + # Completed steps from thread plans + for plan in thread_plans: + tid = plan.get("thread_id", "") + summary = plan.get("summary", {}) + completed = summary.get("completed", 0) + total = summary.get("total", 0) + if completed > 0: + progress.append({ + "type": "thread_progress", + "description": f"Thread {tid[:12]}: {completed}/{total} steps completed", + }) + + return progress[:5] + + +# --------------------------------------------------------------------------- +# Stalled thread count +# --------------------------------------------------------------------------- + + +def _count_stalled_threads( + thread_plans: list[dict[str, Any]], + projections: Any, + workspace_id: str, +) -> int: + """Count threads with pending work but no active colony.""" + stalled = 0 + for plan in thread_plans: + summary = plan.get("summary", {}) + if summary.get("pending", 0) == 0: + continue + tid = plan.get("thread_id", "") + if not _thread_has_active_colony(projections, workspace_id, tid): + stalled += 1 + return stalled diff --git a/src/formicos/surface/project_plan.py b/src/formicos/surface/project_plan.py new file mode 100644 index 0000000..d875cd5 --- /dev/null +++ b/src/formicos/surface/project_plan.py @@ -0,0 +1,321 @@ +"""Shared project-plan parser/helper (Wave 70.0 Track 4). + +Single source of truth for: +- resolving the project plan path +- parsing markdown into structured milestones +- rendering parsed plan into compact Queen context text +- updating ``Updated:`` timestamps consistently +""" + +from __future__ import annotations + +import re +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import structlog + +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_PLAN_FILENAME = "project_plan.md" + +_MILESTONE_RE = re.compile( + r"^- \[(\d+)\] \[(\w+)\] (.*)$", +) + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + + +def project_plan_path(data_dir: str) -> Path: + """Return the canonical project plan path for a data root.""" + return Path(data_dir) / ".formicos" / _PROJECT_PLAN_FILENAME + + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + + +def parse_project_plan(text: str) -> dict[str, Any]: + """Parse project plan markdown into structured data. + + Returns:: + + { + "exists": True, + "goal": "...", + "updated": "...", + "milestones": [ + { + "index": 0, + "status": "completed", + "description": "...", + "thread_id": "...", + "completed_at": "...", + "note": "...", + } + ], + } + """ + goal = "" + updated = "" + milestones: list[dict[str, Any]] = [] + + for line in text.splitlines(): + stripped = line.strip() + + # Goal line: "# Project Plan: " + if stripped.startswith("# Project Plan:"): + goal = stripped[len("# Project Plan:"):].strip() + continue + + # Updated timestamp: "Updated: " + if stripped.startswith("Updated:"): + updated = stripped[len("Updated:"):].strip() + continue + + # Milestone line: "- [0] [completed] description" + m = _MILESTONE_RE.match(stripped) + if m: + idx_str, status, desc = m.groups() + milestone: dict[str, Any] = { + "index": int(idx_str), + "status": status, + "description": desc, + } + + # Parse optional thread ID: (thread ) + thread_match = re.search(r"\(thread\s+(\S+)\)", desc) + if thread_match: + milestone["thread_id"] = thread_match.group(1) + + # Parse optional completed_at: [completed_at ] + completed_match = re.search( + r"\[completed_at\s+([^\]]+)\]", desc, + ) + if completed_match: + milestone["completed_at"] = completed_match.group(1) + + # Parse optional note after em-dash + if " \u2014 " in desc: + # Strip metadata suffixes before extracting note + note_part = desc.split(" \u2014 ", 1)[1] + # Remove inline metadata from note + note_part = re.sub( + r"\(thread\s+\S+\)", "", note_part, + ).strip() + note_part = re.sub( + r"\[completed_at\s+[^\]]+\]", "", note_part, + ).strip() + if note_part: + milestone["note"] = note_part + + milestones.append(milestone) + + return { + "exists": True, + "goal": goal, + "updated": updated, + "milestones": milestones, + } + + +def load_project_plan(data_dir: str) -> dict[str, Any]: + """Load and parse the project plan from disk. + + Returns ``{"exists": False}`` when the plan file does not exist or + cannot be read. + """ + if not data_dir: + return {"exists": False} + + path = project_plan_path(data_dir) + if not path.is_file(): + return {"exists": False} + + try: + text = path.read_text(encoding="utf-8") + except OSError: + return {"exists": False} + + return parse_project_plan(text) + + +# --------------------------------------------------------------------------- +# Rendering — compact context for Queen injection +# --------------------------------------------------------------------------- + + +def render_for_queen(plan: dict[str, Any]) -> str: + """Render a parsed plan into compact text suitable for Queen context. + + Returns an empty string when the plan does not exist or has no + milestones. + """ + if not plan.get("exists"): + return "" + + milestones = plan.get("milestones", []) + goal = plan.get("goal", "") + + if not milestones and not goal: + return "" + + parts: list[str] = ["# Project Plan (cross-thread)"] + if goal: + parts.append(f"Goal: {goal}") + + for ms in milestones: + status = ms.get("status", "pending") + desc = ms.get("description", "") + # Strip inline metadata for compact rendering + desc = re.sub(r"\(thread\s+\S+\)", "", desc).strip() + desc = re.sub(r"\[completed_at\s+[^\]]+\]", "", desc).strip() + marker = "\u2713" if status == "completed" else "\u25cb" + parts.append(f" {marker} [{status}] {desc}") + + return "\n".join(parts) + + +# --------------------------------------------------------------------------- +# Mutation helpers +# --------------------------------------------------------------------------- + + +def _stamp_updated(lines: list[str]) -> list[str]: + """Insert or update the ``Updated:`` line after the title.""" + now_iso = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + updated_line = f"Updated: {now_iso}" + + for i, line in enumerate(lines): + if line.strip().startswith("Updated:"): + lines[i] = updated_line + return lines + + # Insert after title line (first non-empty line) + insert_at = 1 + for i, line in enumerate(lines): + if line.strip().startswith("# Project Plan:"): + insert_at = i + 1 + break + + lines.insert(insert_at, updated_line) + return lines + + +def add_milestone( + data_dir: str, + description: str, + *, + thread_id: str = "", + goal: str = "", +) -> dict[str, Any]: + """Add a milestone to the project plan, creating the file if needed. + + Returns the updated parsed plan. + """ + path = project_plan_path(data_dir) + path.parent.mkdir(parents=True, exist_ok=True) + + if path.is_file(): + text = path.read_text(encoding="utf-8") + lines = text.split("\n") + else: + title = goal or "Untitled project" + lines = [f"# Project Plan: {title}", "", "## Milestones"] + + # Update goal if provided and file is new or goal line is empty + if goal: + for i, line in enumerate(lines): + if line.strip().startswith("# Project Plan:"): + lines[i] = f"# Project Plan: {goal}" + break + + # Find or create ## Milestones section + milestones_idx = -1 + for i, line in enumerate(lines): + if line.strip() == "## Milestones": + milestones_idx = i + break + if milestones_idx == -1: + lines.append("") + lines.append("## Milestones") + milestones_idx = len(lines) - 1 + + # Count existing milestones to determine next index + next_idx = 0 + for line in lines[milestones_idx + 1:]: + m = _MILESTONE_RE.match(line.strip()) + if m: + next_idx = max(next_idx, int(m.group(1)) + 1) + + # Build milestone line + thread_suffix = f" (thread {thread_id})" if thread_id else "" + new_line = f"- [{next_idx}] [pending] {description}{thread_suffix}" + + # Insert after last milestone or after section header + insert_at = milestones_idx + 1 + for i in range(milestones_idx + 1, len(lines)): + if _MILESTONE_RE.match(lines[i].strip()): + insert_at = i + 1 + + lines.insert(insert_at, new_line) + lines = _stamp_updated(lines) + + path.write_text("\n".join(lines), encoding="utf-8") + return load_project_plan(data_dir) + + +def complete_milestone( + data_dir: str, + milestone_index: int, + *, + note: str = "", +) -> dict[str, Any]: + """Mark a milestone as completed. + + Returns the updated parsed plan, or ``{"exists": False, "error": ...}`` + on failure. + """ + path = project_plan_path(data_dir) + if not path.is_file(): + return {"exists": False, "error": "No project plan file found."} + + text = path.read_text(encoding="utf-8") + lines = text.split("\n") + + found = False + now_iso = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + + for i, line in enumerate(lines): + m = _MILESTONE_RE.match(line.strip()) + if m and int(m.group(1)) == milestone_index: + desc = m.group(3) + # Strip old status metadata from description + desc = re.sub(r"\[completed_at\s+[^\]]+\]", "", desc).strip() + note_suffix = f" \u2014 {note}" if note else "" + lines[i] = ( + f"- [{milestone_index}] [completed] " + f"{desc}{note_suffix} " + f"[completed_at {now_iso}]" + ) + found = True + break + + if not found: + return { + "exists": True, + "error": f"Milestone {milestone_index} not found.", + } + + lines = _stamp_updated(lines) + path.write_text("\n".join(lines), encoding="utf-8") + return load_project_plan(data_dir) diff --git a/src/formicos/surface/projections.py b/src/formicos/surface/projections.py index 9611cd1..0a2da8d 100644 --- a/src/formicos/surface/projections.py +++ b/src/formicos/surface/projections.py @@ -7,6 +7,8 @@ from __future__ import annotations +import contextlib +import re as _re import time as _time from dataclasses import dataclass, field from typing import Any @@ -545,7 +547,7 @@ class WorkspaceProjection: threads: dict[str, ThreadProjection] = field(default_factory=dict) # Wave 43: workspace-level budget truth budget: BudgetSnapshot = field(default_factory=BudgetSnapshot) - budget_limit: float = 50.0 # workspace-level default; configurable + budget_limit: float = 5.0 # workspace-level default; configurable @dataclass @@ -919,9 +921,13 @@ def workspace_budget_utilization(self, workspace_id: str) -> float: def _on_workspace_created(store: ProjectionStore, event: FormicOSEvent) -> None: e: WorkspaceCreated = event # type: ignore[assignment] - store.workspaces[e.name] = WorkspaceProjection( - id=e.name, name=e.name, config=dict(e.config), - ) + cfg = dict(e.config) + ws_proj = WorkspaceProjection(id=e.name, name=e.name, config=cfg) + # Sync budget_limit from config if present + if "budget" in cfg: + with contextlib.suppress(ValueError, TypeError): + ws_proj.budget_limit = float(cfg["budget"]) + store.workspaces[e.name] = ws_proj def _on_thread_created(store: ProjectionStore, event: FormicOSEvent) -> None: @@ -1326,6 +1332,10 @@ def _on_workspace_config_changed(store: ProjectionStore, event: FormicOSEvent) - ws.config.pop(e.field, None) else: ws.config[e.field] = e.new_value + # Keep budget_limit in sync with config["budget"] + if e.field == "budget": + with contextlib.suppress(ValueError, TypeError): + ws.budget_limit = float(e.new_value) if e.new_value else 5.0 def _on_approval_requested(store: ProjectionStore, event: FormicOSEvent) -> None: @@ -1581,6 +1591,26 @@ def _on_knowledge_access_recorded(store: ProjectionStore, event: FormicOSEvent) usage["last_accessed"] = ts +def _append_provenance_item( + entry: dict[str, Any], + *, + event_type: str, + timestamp: str, + actor_id: str, + detail: str, + confidence_delta: float | None = None, +) -> None: + """Append a provenance chain item to a knowledge entry (Wave 67.5).""" + chain: list[dict[str, Any]] = entry.setdefault("provenance_chain", []) + chain.append({ + "event_type": event_type, + "timestamp": timestamp, + "actor_id": actor_id, + "detail": detail, + "confidence_delta": confidence_delta, + }) + + def _on_memory_entry_created(store: ProjectionStore, event: FormicOSEvent) -> None: e: MemoryEntryCreated = event # type: ignore[assignment] entry = e.entry @@ -1592,11 +1622,26 @@ def _on_memory_entry_created(store: ProjectionStore, event: FormicOSEvent) -> No # Wave 50: derive initial scope from thread_id presence if "scope" not in data: data["scope"] = "thread" if data.get("thread_id") else "workspace" + # Wave 67: hierarchy path from primary domain + domains = data.get("domains", []) + _primary = domains[0] if domains else "uncategorized" + _norm = _re.sub(r"[\s\-]+", "_", _primary.strip()).lower() + data["hierarchy_path"] = f"/{_norm}/" + data["parent_id"] = "" store.memory_entries[entry_id] = data + # Wave 67.5: seed provenance chain + source_colony = entry.get("source_colony_id", "") + ts = e.timestamp.isoformat() if hasattr(e.timestamp, "isoformat") else str(e.timestamp) + _append_provenance_item( + data, + event_type="MemoryEntryCreated", + timestamp=ts, + actor_id=source_colony, + detail=f"Created by colony {source_colony}" if source_colony else "Created", + ) # Wave 45.5: new entry may form a competing pair store._competing_pairs_dirty = True # Wave 35.5 C3: track entries extracted per colony for outcome projection - source_colony = entry.get("source_colony_id", "") if source_colony: colony = store.colonies.get(source_colony) if colony is not None: @@ -1669,6 +1714,18 @@ def _on_memory_confidence_updated( entry["confidence"] = e.new_confidence # Wave 32 A1: track update timestamp for gamma-decay entry["last_confidence_update"] = e.timestamp.isoformat() + # Wave 67.5: provenance chain + ess = e.old_alpha + e.old_beta + old_mean = e.old_alpha / ess if ess > 0 else 0.5 + conf_delta = round(e.new_confidence - old_mean, 4) + _append_provenance_item( + entry, + event_type="MemoryConfidenceUpdated", + timestamp=e.timestamp.isoformat(), + actor_id=e.colony_id or "", + detail=f"Confidence updated ({e.reason})", + confidence_delta=conf_delta, + ) # Wave 45.5: confidence change affects competing-pair resolution store._competing_pairs_dirty = True # Wave 35 C3: track peak alpha for mastery-restoration bonus @@ -1819,22 +1876,31 @@ def _on_crdt_register_assigned(store: ProjectionStore, event: FormicOSEvent) -> def _on_memory_entry_merged(store: ProjectionStore, event: FormicOSEvent) -> None: e: MemoryEntryMerged = event # type: ignore[assignment] + ts = e.timestamp.isoformat() if hasattr(e.timestamp, "isoformat") else str(e.timestamp) target = store.memory_entries.get(e.target_id) if target: target["content"] = e.merged_content target["domains"] = e.merged_domains target["merged_from"] = e.merged_from target["merge_count"] = target.get("merge_count", 0) + 1 + _append_provenance_item( + target, event_type="MemoryEntryMerged", timestamp=ts, + actor_id="", detail=f"Merged entry {e.source_id} into this entry", + ) source = store.memory_entries.get(e.source_id) if source: source["status"] = "rejected" source["rejection_reason"] = f"merged_into:{e.target_id}" + _append_provenance_item( + source, event_type="MemoryEntryMerged", timestamp=ts, + actor_id="", detail=f"Merged into entry {e.target_id}", + ) # Wave 45.5: merge changes entry state — may affect competing pairs store._competing_pairs_dirty = True def _on_memory_entry_refined(store: ProjectionStore, event: FormicOSEvent) -> None: - from formicos.core.events import MemoryEntryRefined + from formicos.core.events import MemoryEntryRefined # noqa: PLC0415 e: MemoryEntryRefined = event # type: ignore[assignment] entry = store.memory_entries.get(e.entry_id) if entry is None: @@ -1844,7 +1910,13 @@ def _on_memory_entry_refined(store: ProjectionStore, event: FormicOSEvent) -> No entry["title"] = e.new_title entry["refinement_count"] = entry.get("refinement_count", 0) + 1 ts = e.timestamp - entry["last_refined_at"] = ts.isoformat() if hasattr(ts, "isoformat") else str(ts) + ts_str = ts.isoformat() if hasattr(ts, "isoformat") else str(ts) + entry["last_refined_at"] = ts_str + _append_provenance_item( + entry, event_type="MemoryEntryRefined", timestamp=ts_str, + actor_id=e.source_colony_id or "", + detail=f"Refined via {e.refinement_source}", + ) def _on_parallel_plan_created(store: ProjectionStore, event: FormicOSEvent) -> None: @@ -1904,6 +1976,15 @@ def _on_knowledge_entry_operator_action( elif action == "reinstate": overlays.invalidated_entries.discard(e.entry_id) + # Wave 67.5: provenance chain + entry = store.memory_entries.get(e.entry_id) + if entry is not None: + ts = e.timestamp.isoformat() if hasattr(e.timestamp, "isoformat") else str(e.timestamp) + _append_provenance_item( + entry, event_type="KnowledgeEntryOperatorAction", timestamp=ts, + actor_id=e.actor, detail=f"Operator action: {e.action}", + ) + def _on_knowledge_entry_annotated( store: ProjectionStore, event: FormicOSEvent, @@ -1919,6 +2000,16 @@ def _on_knowledge_entry_annotated( store.operator_overlays.annotations[e.entry_id] = [] store.operator_overlays.annotations[e.entry_id].append(annotation) + # Wave 67.5: provenance chain + entry = store.memory_entries.get(e.entry_id) + if entry is not None: + tag_note = f" [{e.tag}]" if e.tag else "" + _append_provenance_item( + entry, event_type="KnowledgeEntryAnnotated", + timestamp=str(e.timestamp.isoformat()), + actor_id=e.actor, detail=f"Annotation added{tag_note}", + ) + def _on_config_suggestion_overridden( store: ProjectionStore, event: FormicOSEvent, diff --git a/src/formicos/surface/queen_budget.py b/src/formicos/surface/queen_budget.py new file mode 100644 index 0000000..3a2f8f3 --- /dev/null +++ b/src/formicos/surface/queen_budget.py @@ -0,0 +1,112 @@ +"""Dynamic Queen context budget (ADR-051). + +Proportional slot budgeting derived from the model's context window, with +current defaults as the floor. Every slot uses ``max(fallback, proportional)`` +so proportional scaling may grow budgets but never shrinks them below today's +effective defaults. + +Wave 71.0: expanded from 7 to 9 slots — added ``operating_procedures`` and +``queen_journal`` for durable operational memory. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import structlog + +log = structlog.get_logger() + +# Approximate chars-per-token ratio for budget estimation. +_CHARS_PER_TOKEN = 4 + +# --------------------------------------------------------------------------- +# Slot fractions (sum to 1.0) and fallback floors +# --------------------------------------------------------------------------- + +_FRACTIONS = { + "system_prompt": 0.15, + "memory_retrieval": 0.13, + "project_context": 0.08, + "project_plan": 0.05, + "operating_procedures": 0.05, + "queen_journal": 0.04, + "thread_context": 0.13, + "tool_memory": 0.09, + "conversation_history": 0.28, +} + +_FALLBACKS = { + "system_prompt": 2000, + "memory_retrieval": 1500, + "project_context": 500, + "project_plan": 400, + "operating_procedures": 400, + "queen_journal": 300, + "thread_context": 1500, + "tool_memory": 4000, + "conversation_history": 6000, +} + + +@dataclass(frozen=True) +class QueenContextBudget: + """Token budget for each Queen context slot.""" + + system_prompt: int + memory_retrieval: int + project_context: int + project_plan: int + operating_procedures: int + queen_journal: int + thread_context: int + tool_memory: int + conversation_history: int + + +# Singleton fallback budget matching current hardcoded defaults. +FALLBACK_BUDGET = QueenContextBudget(**_FALLBACKS) + + +def compute_queen_budget( + context_window: int | None, + output_reserve: int, +) -> QueenContextBudget: + """Compute proportional token budgets from the model's context window. + + Returns the fallback budget unchanged when *context_window* is missing, + invalid, or too small to produce any proportional gain. + """ + if context_window is None or context_window <= 0: + return FALLBACK_BUDGET + + available = max(0, context_window - output_reserve) + if available <= 0: + return FALLBACK_BUDGET + + slots = { + name: max(_FALLBACKS[name], int(available * frac)) + for name, frac in _FRACTIONS.items() + } + + budget = QueenContextBudget(**slots) + + log.debug( + "queen_budget.computed", + context_window=context_window, + output_reserve=output_reserve, + available=available, + slots={ + "system_prompt": budget.system_prompt, + "memory_retrieval": budget.memory_retrieval, + "project_context": budget.project_context, + "project_plan": budget.project_plan, + "operating_procedures": budget.operating_procedures, + "queen_journal": budget.queen_journal, + "thread_context": budget.thread_context, + "tool_memory": budget.tool_memory, + "conversation_history": budget.conversation_history, + }, + ) + + return budget diff --git a/src/formicos/surface/queen_runtime.py b/src/formicos/surface/queen_runtime.py index a27451d..7c49ba1 100644 --- a/src/formicos/surface/queen_runtime.py +++ b/src/formicos/surface/queen_runtime.py @@ -8,6 +8,7 @@ from __future__ import annotations import asyncio +import json from dataclasses import dataclass, field from datetime import UTC, datetime, timedelta from pathlib import Path @@ -35,6 +36,11 @@ generate_config_recommendations, generate_evaporation_recommendations, ) +from formicos.surface.queen_budget import ( + FALLBACK_BUDGET, + QueenContextBudget, + compute_queen_budget, +) from formicos.surface.queen_shared import ( PendingConfigProposal, _is_experimentable, # pyright: ignore[reportPrivateUsage] @@ -167,13 +173,15 @@ def _is_pinned(msg: Any) -> bool: # noqa: ANN401 def _compact_thread_history( queen_messages: list[Any], + token_budget: int = _THREAD_TOKEN_BUDGET, + recent_window: int = _RECENT_WINDOW, ) -> list[dict[str, str]]: """Build LLM message entries from Queen thread history with compaction. - When total estimated tokens exceed ``_THREAD_TOKEN_BUDGET``, older - messages are collapsed into one deterministic ``Earlier conversation:`` - block. Recent messages and pinned items (asks, active previews) are - always kept raw. + When total estimated tokens exceed *token_budget*, older messages are + collapsed into one deterministic ``Earlier conversation:`` block. + Recent messages and pinned items (asks, active previews) are always + kept raw. Returns a list of ``{role, content}`` dicts ready for the LLM prompt. """ @@ -183,7 +191,7 @@ def _compact_thread_history( total_tokens = sum( _estimate_tokens(m.content) for m in queen_messages ) - if total_tokens <= _THREAD_TOKEN_BUDGET or total <= _RECENT_WINDOW: + if total_tokens <= token_budget or total <= recent_window: return [ { "role": "user" if m.role == "operator" else "assistant", @@ -193,7 +201,7 @@ def _compact_thread_history( ] # Split into older (compactable) and recent (kept raw). - split_idx = max(0, total - _RECENT_WINDOW) + split_idx = max(0, total - recent_window) older = queen_messages[:split_idx] recent = queen_messages[split_idx:] @@ -277,6 +285,11 @@ def __init__(self, runtime: Runtime) -> None: # Wave 63 Track 2: parallel plan aggregation tracker # Maps plan_id -> {colony_id: result_meta | None} self._pending_parallel: dict[str, dict[str, dict[str, Any] | None]] = {} + # Wave 74 Track 4: session-scoped tool call counters + self._tool_call_counts: dict[str, int] = {} + self._tool_last_status: dict[str, str] = {} + # Wave 74 Track 5c: per-workspace initial board population + self._board_populated_workspaces: set[str] = set() async def name_colony( self, @@ -481,6 +494,16 @@ async def follow_up_colony( # Wave 63 Track 2: include failure reason in meta if colony_failed and failure_reason: result_meta["failureReason"] = failure_reason + # Wave 69 Track 3: file change count for diff badge + _artifacts = getattr(colony, "artifacts", []) + if _artifacts: + _file_arts = [ + a for a in _artifacts + if a.get("artifact_type") in ("file", "code", "patch") + or a.get("mime_type", "").startswith("text/") + ] + if _file_arts: + result_meta["filesChanged"] = len(_file_arts) # Wave 63 Track 2: check parallel plan aggregation completed_plan_id = self._check_parallel_aggregation(colony_id, result_meta) @@ -744,6 +767,119 @@ async def _emit_queen_message( except Exception: log.exception("queen.emit_failed", workspace_id=workspace_id, thread_id=thread_id) + def emit_session_summary( + self, workspace_id: str, thread_id: str, + ) -> None: + """Write a session summary file for later startup injection. + + Content assembled deterministically from projections — no LLM call. + File written to .formicos/sessions/{thread_id}.md. + """ + thread = self._runtime.projections.get_thread( + workspace_id, thread_id, + ) + if thread is None: + return + + lines: list[str] = [ + f"# Session Summary: {thread.name}", + f"**Thread:** {thread_id}", + f"**Status:** {thread.status}", + "", + ] + + # Plan state (from plan file, if exists) + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _plan_path = ( + Path(_data_dir) / ".formicos" / "plans" + / f"{thread_id}.md" + ) + if _plan_path.is_file(): + _plan_text = _plan_path.read_text( + encoding="utf-8", + )[:1000] + lines.append("## Active Plan") + lines.append(_plan_text) + lines.append("") + except (OSError, TypeError, AttributeError): + pass + + # Colony outcomes this session + lines.append("## Colony Activity") + lines.append( + f"- {thread.completed_colony_count} completed, " + f"{thread.failed_colony_count} failed, " + f"{thread.colony_count} total" + ) + + # Workflow step status + if thread.workflow_steps: + completed = sum( + 1 for s in thread.workflow_steps + if s.get("status") == "completed" + ) + pending = sum( + 1 for s in thread.workflow_steps + if s.get("status") == "pending" + ) + lines.append( + f"- Workflow: {completed} steps completed," + f" {pending} pending" + ) + + # Last few Queen decisions (last 5 queen messages) + queen_msgs = [ + m for m in thread.queen_messages if m.role == "queen" + ] + if queen_msgs: + lines.append("") + lines.append("## Recent Queen Activity") + for msg in queen_msgs[-5:]: + content = msg.content[:200] if msg.content else "" + if content: + lines.append(f"- {content}") + + summary_text = "\n".join(lines) + + # Write to file + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _session_dir = ( + Path(_data_dir) / ".formicos" / "sessions" + ) + _session_dir.mkdir(parents=True, exist_ok=True) + _session_path = _session_dir / f"{thread_id}.md" + _session_path.write_text( + summary_text, encoding="utf-8", + ) + except (OSError, TypeError, AttributeError): + log.warning( + "session_summary.write_failed", + workspace_id=workspace_id, + thread_id=thread_id, + ) + + # Wave 71.0: append journal entry on session summary emission + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + from formicos.surface.operational_state import ( # noqa: PLC0415 + append_journal_entry, + ) + _tc = thread.colony_count if thread else 0 + _dc = thread.completed_colony_count if thread else 0 + _name = thread.name if thread else thread_id + append_journal_entry( + _data_dir, workspace_id, "session", + f"Session saved for '{_name}': " + f"{_dc}/{_tc} colonies completed", + ) + except (OSError, TypeError, AttributeError): + pass + async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: """Generate a Queen response. May iterate tool calls up to _MAX_TOOL_ITERATIONS times. @@ -756,13 +892,42 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: await self._emit_queen_message(workspace_id, thread_id, msg) return QueenResponse(reply=msg) + # Wave 74 Track 5c: populate display board on first respond() per workspace + if workspace_id and workspace_id not in self._board_populated_workspaces: + try: + from formicos.surface.operational_state import post_sweep_observations # noqa: PLC0415 + from formicos.surface.operations_coordinator import build_operations_summary # noqa: PLC0415 + + _data_dir_str = self._runtime.settings.system.data_dir + _summary = build_operations_summary( + _data_dir_str, workspace_id, self._runtime.projections, + ) + post_sweep_observations( + _data_dir_str, workspace_id, _summary, self._runtime.projections, + ) + except Exception: # noqa: BLE001 + pass + self._board_populated_workspaces.add(workspace_id) + # Wave 63 Track 3: check for pending edit/delete confirmation pending_result = await self._apply_pending_action(thread, workspace_id, thread_id) if pending_result is not None: return pending_result queen_model = self._resolve_queen_model(workspace_id) - messages = self._build_messages(thread) + + # Wave 68 Track 3: compute dynamic context budget (ADR-051) + _output_reserve = self._queen_max_tokens(workspace_id) + _ctx_window: int | None = None + _model_addr = queen_model + if _model_addr: + for _rec in self._runtime.settings.models.registry: + if _rec.address == _model_addr: + _ctx_window = _rec.context_window + break + budget = compute_queen_budget(_ctx_window, _output_reserve) + + messages = self._build_messages(thread, budget=budget) tools = self._queen_tools() actions: list[dict[str, Any]] = [] response = None @@ -770,13 +935,16 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: # Wave 26 B3: deterministic pre-spawn memory retrieval last_operator_msg = "" + _consulted: list[dict[str, Any]] = [] for msg in reversed(thread.queen_messages): if msg.role == "operator": last_operator_msg = msg.content break if last_operator_msg: - memory_block = await self._runtime.retrieve_relevant_memory( - last_operator_msg, workspace_id, thread_id=thread_id, + memory_block, _memory_items = ( + await self._runtime.retrieve_relevant_memory( + last_operator_msg, workspace_id, thread_id=thread_id, + ) ) if memory_block: # Insert after system prompt(s) but before conversation history @@ -791,6 +959,15 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: "role": "system", "content": memory_block, }) + # Wave 69: capture consulted entries for message metadata + for _item in _memory_items[:5]: + _consulted.append({ + "id": _item.get("id", ""), + "title": str(_item.get("title", ""))[:80], + "confidence": round( + float(_item.get("confidence", 0.5)), 2, + ), + }) # Wave 63 Track 8: inject project context from workspace try: @@ -798,7 +975,9 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: if isinstance(_data_dir, str) and _data_dir: _pc_path = Path(_data_dir) / ".formicos" / "project_context.md" if _pc_path.is_file(): - _pc_text = _pc_path.read_text(encoding="utf-8")[:2000] + _pc_text = _pc_path.read_text( + encoding="utf-8", + )[:budget.project_context * 4] if _pc_text: _pc_insert = 0 for _pi, _pm in enumerate(messages): @@ -814,6 +993,118 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: except (AttributeError, TypeError, OSError): pass + # Wave 70.0 Track 6: inject project plan (cross-thread) + try: + _data_dir_pp = self._runtime.settings.system.data_dir + if isinstance(_data_dir_pp, str) and _data_dir_pp: + from formicos.surface.project_plan import ( # noqa: PLC0415 + load_project_plan, + render_for_queen, + ) + + _pp_data = load_project_plan(_data_dir_pp) + _pp_text = render_for_queen(_pp_data) + if _pp_text: + _pp_text = _pp_text[:budget.project_plan * 4] + _pp_insert = 0 + for _ppi, _ppm in enumerate(messages): + if _ppm.get("role") != "system": + _pp_insert = _ppi + break + else: + _pp_insert = len(messages) + messages.insert(_pp_insert, { + "role": "system", + "content": _pp_text, + }) + except (AttributeError, TypeError, OSError): + pass + + # Wave 71.0: inject operating procedures (budget-backed) + try: + _data_dir_op = self._runtime.settings.system.data_dir + if isinstance(_data_dir_op, str) and _data_dir_op: + from formicos.surface.operational_state import ( # noqa: PLC0415 + render_procedures_for_queen, + ) + _proc_text = render_procedures_for_queen( + _data_dir_op, workspace_id, + ) + if _proc_text: + _proc_text = _proc_text[ + :budget.operating_procedures * 4 + ] + _proc_insert = 0 + for _pri, _prm in enumerate(messages): + if _prm.get("role") != "system": + _proc_insert = _pri + break + else: + _proc_insert = len(messages) + messages.insert(_proc_insert, { + "role": "system", + "content": _proc_text, + }) + except (AttributeError, TypeError, OSError): + pass + + # Wave 71.0: inject recent journal tail (budget-backed) + try: + _data_dir_jn = self._runtime.settings.system.data_dir + if isinstance(_data_dir_jn, str) and _data_dir_jn: + from formicos.surface.operational_state import ( # noqa: PLC0415 + render_journal_for_queen, + ) + _jn_text = render_journal_for_queen( + _data_dir_jn, workspace_id, + ) + if _jn_text: + _jn_text = _jn_text[:budget.queen_journal * 4] + _jn_insert = 0 + for _jni, _jnm in enumerate(messages): + if _jnm.get("role") != "system": + _jn_insert = _jni + break + else: + _jn_insert = len(messages) + messages.insert(_jn_insert, { + "role": "system", + "content": _jn_text, + }) + except (AttributeError, TypeError, OSError): + pass + + # Wave 68: session continuity — inject prior session summary + # Wave 71.0: replaced hardcoded [:4000] with budget-backed cap + try: + _data_dir2 = self._runtime.settings.system.data_dir + if isinstance(_data_dir2, str) and _data_dir2: + _session_path = ( + Path(_data_dir2) / ".formicos" / "sessions" + / f"{thread_id}.md" + ) + if _session_path.is_file(): + _session_text = _session_path.read_text( + encoding="utf-8", + )[:budget.thread_context * 4] + if _session_text: + _ss_insert = 0 + for _si, _sm in enumerate(messages): + if _sm.get("role") != "system": + _ss_insert = _si + break + else: + _ss_insert = len(messages) + messages.insert(_ss_insert, { + "role": "system", + "content": ( + "# Prior Session Context\n" + f"{_session_text}" + ), + }) + except (OSError, TypeError, AttributeError): + pass + # Wave 29: inject thread workflow context thread_ctx = self._build_thread_context(thread_id, workspace_id) if thread_ctx: @@ -898,6 +1189,95 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: except Exception: log.debug("queen.briefing_injection_failed", workspace_id=workspace_id) + # Wave 71.0 Track 9: compact operational continuity cue + try: + _data_dir_ops = self._runtime.settings.system.data_dir + if isinstance(_data_dir_ops, str) and _data_dir_ops: + from formicos.surface.operations_coordinator import ( # noqa: PLC0415 + build_operations_summary, + render_continuity_block, + ) + + _ops_summary = build_operations_summary( + _data_dir_ops, workspace_id, + self._runtime.projections, + ) + _ops_text = render_continuity_block(_ops_summary) + if _ops_text: + # Cap at half the thread-context allocation + _ops_cap = budget.thread_context * 2 + _ops_text = _ops_text[:_ops_cap] + _ops_pos = 0 + for _oi, _om in enumerate(messages): + if _om.get("role") != "system": + _ops_pos = _oi + break + else: + _ops_pos = len(messages) + messages.insert(_ops_pos, { + "role": "system", + "content": _ops_text, + }) + except Exception: + log.debug("queen.ops_continuity_injection_failed", workspace_id=workspace_id) + + # Wave 72 Track 7: warm-start continuation cue + # On the first returning turn, surface pending continuation opportunities. + # Placed after session/ops context so the Queen sees it as a proposal. + try: + _data_dir_cont = self._runtime.settings.system.data_dir + if isinstance(_data_dir_cont, str) and _data_dir_cont: + from formicos.surface.continuation import ( # noqa: PLC0415 + build_warm_start_cue, + ) + + _cont_cue = build_warm_start_cue( + _data_dir_cont, workspace_id, + self._runtime.projections, + max_candidates=3, + ) + if _cont_cue: + _cont_cap = budget.thread_context * 2 + _cont_cue = _cont_cue[:_cont_cap] + _cont_pos = 0 + for _ci, _cm in enumerate(messages): + if _cm.get("role") != "system": + _cont_pos = _ci + break + else: + _cont_pos = len(messages) + messages.insert(_cont_pos, { + "role": "system", + "content": _cont_cue, + }) + except Exception: + log.debug("queen.warm_start_cue_failed", workspace_id=workspace_id) + + # Wave 68 Track 4: deliberation frame injection (ADR-051) + if last_operator_msg and _DELIBERATION_RE.search( + last_operator_msg, + ): + _delib_frame = self._build_deliberation_frame( + workspace_id, thread_id, + ) + if _delib_frame: + _delib_cap = budget.thread_context * 4 + if len(_delib_frame) > _delib_cap: + _delib_frame = ( + _delib_frame[:_delib_cap] + "\n...(truncated)" + ) + _delib_pos = 0 + for _di, _dm in enumerate(messages): + if _dm.get("role") != "system": + _delib_pos = _di + break + else: + _delib_pos = len(messages) + messages.insert(_delib_pos, { + "role": "system", + "content": _delib_frame, + }) + # Wave 64 Track 4: heuristic cloud routing expansion # Extends Wave 62 propose_plan check with complexity heuristics, # @cloud tag, and auto-escalation on parse failure. @@ -948,7 +1328,7 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: for m in messages if m.get("role") == "system" ) - if _sys_tokens > 2000: + if _sys_tokens > budget.system_prompt: use_cloud = True if use_cloud: @@ -1152,6 +1532,12 @@ async def respond(self, workspace_id: str, thread_id: str) -> QueenResponse: msg_meta = {} msg_meta["tool_memory"] = tool_memory[:10] # cap entries + # Wave 69: consulted knowledge entries in meta + if _consulted: + if msg_meta is None: + msg_meta = {} + msg_meta["consulted_entries"] = _consulted + # Wave 64 Track 4 (D): routing indicator in meta if msg_meta is None: msg_meta = {} @@ -1195,8 +1581,232 @@ def _queen_max_tokens(self, workspace_id: str = "") -> int: return min(caste_max, rec.max_output_tokens) return caste_max - def _build_messages(self, thread: Any) -> list[dict[str, str]]: # noqa: ANN401 + def _build_deliberation_frame( + self, + workspace_id: str, + thread_id: str, + ) -> str: + """Assemble a deterministic deliberation frame from projections. + + No LLM calls. No network. Source-labeled sections so the Queen + can reason about exploratory operator messages with structured, + source-labeled evidence. + """ + proj = self._runtime.projections + parts: list[str] = ["# Deliberation Context"] + + # -- Institutional Memory Coverage -- + ws_entries = [ + e for e in proj.memory_entries.values() + if e.get("workspace_id") == workspace_id + ] + if ws_entries: + domain_stats: dict[str, list[float]] = {} + for entry in ws_entries: + for dom in entry.get("domains", []): + alpha = entry.get("conf_alpha", 5.0) + beta = entry.get("conf_beta", 5.0) + denom = alpha + beta + mean = alpha / denom if denom > 0 else 0.5 + domain_stats.setdefault(dom, []).append(mean) + if domain_stats: + parts.append("\n## Institutional Memory Coverage") + for dom, confs in sorted( + domain_stats.items(), + key=lambda x: -len(x[1]), + )[:10]: + avg = sum(confs) / len(confs) + parts.append( + f"- {dom}: {len(confs)} entries, " + f"avg confidence {avg:.2f}" + ) + + # -- Recent Colony Outcomes -- + ws_outcomes = sorted( + ( + o for o in proj.colony_outcomes.values() + if o.workspace_id == workspace_id + ), + key=lambda o: o.colony_id, + reverse=True, + )[:5] + if ws_outcomes: + parts.append("\n## Recent Colony Outcomes") + for o in ws_outcomes: + marker = "ok" if o.succeeded else "FAIL" + parts.append( + f"- [{marker}] strategy={o.strategy} " + f"rounds={o.total_rounds} " + f"cost=${o.total_cost:.4f}" + ) + + # -- Addon Corpus Coverage -- + addon_parts: list[str] = [] + try: + from formicos.surface.addon_loader import ( # noqa: PLC0415 + AddonManifest, + ) + manifests: list[AddonManifest] = [] + _app_state = getattr( + getattr(self._runtime, "app", None), + "state", + None, + ) + if _app_state is not None: + manifests = ( + getattr(_app_state, "addon_manifests", []) + or [] + ) + except Exception: + manifests = [] + for m in manifests: + ck = getattr(m, "content_kinds", []) + pg = getattr(m, "path_globs", []) + st = getattr(m, "search_tool", "") + if ck or pg or st: + line = f"- {m.name}: content {', '.join(ck)}" + if pg: + line += f"; files {', '.join(pg)}" + if st: + line += f"; search via {st}" + addon_parts.append(line) + elif m.tools: + names = [t.name for t in m.tools] + addon_parts.append( + f"- {m.name}: " + f"{m.description or 'addon'}; " + f"tools: {', '.join(names)}" + ) + if addon_parts: + parts.append("\n## Addon Corpus Coverage") + parts.extend(addon_parts) + + # -- Wave 70.0: Bridge status (capability-based, no addon-name checks) -- + if _app_state is not None: + _regs: list[Any] = getattr(_app_state, "addon_registrations", []) or [] + for _reg in _regs: + _bhfn = (_reg.runtime_context or {}).get("get_bridge_health") + if callable(_bhfn): + try: + _bh = _bhfn() + parts.append( + f"\n## MCP Bridge: " + f"{_bh.get('connectedServers', 0)} connected, " + f"{_bh.get('unhealthyServers', 0)} unhealthy, " + f"{_bh.get('totalRemoteTools', 0)} remote tools" + ) + except Exception: # noqa: BLE001 + pass + break # Only one bridge expected + + # -- Thread Progress -- + thread = proj.get_thread(workspace_id, thread_id) + if thread is not None and thread.goal: + parts.append("\n## Thread Progress") + parts.append(f"Goal: {thread.goal}") + tc = thread.colony_count + dc = thread.completed_colony_count + fc = thread.failed_colony_count + if tc: + parts.append( + f"Colonies: {tc} total, " + f"{dc} completed, {fc} failed" + ) + + # -- Active Alerts -- + try: + b = generate_briefing(workspace_id, proj) + alerts = [ + i for i in b.insights + if i.severity in ("warning", "critical") + ][:3] + if alerts: + parts.append("\n## Active Alerts") + for a in alerts: + parts.append( + f"- [{a.severity.upper()}] " + f"{a.title}: {a.detail}" + ) + except Exception: + pass + + if len(parts) <= 1: + return "" + return "\n".join(parts) + + def _build_override_block(self, workspace_id: str) -> str: + """Build workspace behavioral override text for Queen context (Wave 74).""" + ws = self._runtime.projections.workspaces.get(workspace_id) + if not ws: + return "" + cfg = getattr(ws, "config", None) + if not isinstance(cfg, dict): + return "" + parts: list[str] = [] + + disabled = cfg.get("queen.disabled_tools", "") + if isinstance(disabled, str) and disabled: + try: + tools = json.loads(disabled) + except (json.JSONDecodeError, TypeError): + tools = [] + if isinstance(tools, list) and tools: + parts.append( + "DISABLED TOOLS (require operator confirmation): " + + ", ".join(str(t) for t in tools) + ) + + custom = cfg.get("queen.custom_rules", "") + if isinstance(custom, str) and custom: + try: + rules = json.loads(custom) + except (json.JSONDecodeError, TypeError): + rules = custom + if rules: + parts.append(f"OPERATOR RULES:\n{rules}") + + team_comp = cfg.get("queen.team_composition", "") + if isinstance(team_comp, str) and team_comp: + try: + overrides = json.loads(team_comp) + except (json.JSONDecodeError, TypeError): + overrides = None + if isinstance(overrides, dict) and overrides: + lines = ["TEAM COMPOSITION OVERRIDES:"] + for task_type, composition in overrides.items(): + lines.append(f" {task_type}: {composition}") + parts.append("\n".join(lines)) + + round_budget = cfg.get("queen.round_budget", "") + if isinstance(round_budget, str) and round_budget: + try: + rb_overrides = json.loads(round_budget) + except (json.JSONDecodeError, TypeError): + rb_overrides = None + if isinstance(rb_overrides, dict) and rb_overrides: + lines = ["ROUND / BUDGET OVERRIDES:"] + for complexity, limits in rb_overrides.items(): + if isinstance(limits, dict): + rounds = limits.get("rounds") + budget = limits.get("budget") + lines.append( + f" {complexity}: rounds={rounds}," + f" budget={budget}" + ) + parts.append("\n".join(lines)) + + if not parts: + return "" + return "# Workspace Behavioral Overrides\n\n" + "\n\n".join(parts) + + def _build_messages( + self, + thread: Any, # noqa: ANN401 + budget: QueenContextBudget | None = None, + ) -> list[dict[str, str]]: """Build LLM message list from thread's Queen conversation history.""" + if budget is None: + budget = FALLBACK_BUDGET messages: list[dict[str, str]] = [] # System prompt from Queen caste recipe @@ -1208,8 +1818,23 @@ def _build_messages(self, thread: Any) -> list[dict[str, str]]: # noqa: ANN401 recipe = self._runtime.castes.castes.get("queen") if recipe: system_prompt = recipe.system_prompt + # Wave 74 Track 6: self-assembling tool inventory from tool_specs() + if "{TOOL_INVENTORY}" in system_prompt: + all_specs = self._tool_dispatcher.tool_specs() + tool_names = [s["name"] for s in all_specs] + tool_section = f"## Tools ({len(tool_names)})\n{', '.join(sorted(tool_names))}" + system_prompt = system_prompt.replace("{TOOL_INVENTORY}", tool_section) messages.append({"role": "system", "content": system_prompt}) + # Wave 74: Inject workspace behavioral overrides after base system prompt + workspace_id_early = ( + thread.workspace_id if hasattr(thread, "workspace_id") else "" + ) + if workspace_id_early: + override_block = self._build_override_block(workspace_id_early) + if override_block: + messages.append({"role": "system", "content": override_block}) + # Inject latest Queen notes (Wave 21 Track A, thread-scoped Wave 22 Track B) workspace_id = thread.workspace_id if hasattr(thread, "workspace_id") else "" thread_id = ( @@ -1255,10 +1880,10 @@ def _build_messages(self, thread: Any) -> list[dict[str, str]]: # noqa: ANN401 f"[{entry.get('tool', '?')}] {entry.get('summary', '')[:300]}" ) if tool_mem_lines: - # Cap total to ~1500 tokens (~6000 chars) + _tool_mem_cap = budget.tool_memory * 4 joined = "\n".join(tool_mem_lines) - if len(joined) > 6000: - joined = joined[:6000] + "\n...(truncated)" + if len(joined) > _tool_mem_cap: + joined = joined[:_tool_mem_cap] + "\n...(truncated)" messages.append({ "role": "system", "content": ( @@ -1272,7 +1897,11 @@ def _build_messages(self, thread: Any) -> list[dict[str, str]]: # noqa: ANN401 # Wave 49: conversation history with deterministic compaction. # Token-aware — compacts older messages when thread exceeds budget # while preserving recent window, unresolved asks, and active previews. - compacted = _compact_thread_history(thread.queen_messages) + compacted = _compact_thread_history( + thread.queen_messages, + token_budget=budget.conversation_history, + recent_window=max(5, budget.conversation_history // 600), + ) for entry in compacted: messages.append(entry) @@ -1357,6 +1986,17 @@ def _build_thread_context(self, thread_id: str, workspace_id: str) -> str: lines.append(f"Goal: {thread.goal}") lines.append(f"Status: {thread.status}") + # Wave 68 Track 6: inject workspace taxonomy tags + _tag_raw = ws.config.get("taxonomy_tags") + if _tag_raw: + import json as _json # noqa: PLC0415 + try: + _tags = _json.loads(str(_tag_raw)) if isinstance(_tag_raw, str) else _tag_raw + if isinstance(_tags, list) and _tags: + lines.append(f"Tags: {', '.join(str(t) for t in _tags)}") + except (ValueError, TypeError): + pass + if thread.expected_outputs: parts: list[str] = [] for out_type in thread.expected_outputs: @@ -1410,6 +2050,30 @@ def _build_thread_context(self, thread_id: str, workspace_id: str) -> str: col_info = f" (colony {col[:8]})" if col else "" lines.append(f" [{idx}] [{status}] {desc}{col_info}") + # Wave 68: inject plan file for persistent attention + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir: + _plan_path = ( + Path(_data_dir) / ".formicos" / "plans" + / f"{thread_id}.md" + ) + if _plan_path.is_file(): + _plan_text = _plan_path.read_text( + encoding="utf-8", + )[:2000] + if _plan_text: + lines.append(f"\n{_plan_text}") + except (OSError, TypeError, AttributeError): + pass + + # Wave 68 Track 6: gentle nudge for tagless new workspaces + if not _tag_raw and len(ws.threads) < 3: + lines.append( + "(Hint: use set_workspace_tags to add taxonomy hints " + "for better routing.)" + ) + return "\n".join(lines) def _queen_tools(self) -> list[dict[str, Any]]: @@ -1441,6 +2105,15 @@ async def _execute_tool( inputs, workspace_id, thread_id, ) + # Wave 74 Track 4: instrument tool call counter + tool_name = tc.get("name", "") + if tool_name: + self._tool_call_counts[tool_name] = self._tool_call_counts.get(tool_name, 0) + 1 + status = "ok" + if result[1] is None and "failed" in result[0].lower(): + status = "error" + self._tool_last_status[tool_name] = status + return result async def on_governance_alert( diff --git a/src/formicos/surface/queen_tools.py b/src/formicos/surface/queen_tools.py index 6e0dc25..33c9d4f 100644 --- a/src/formicos/surface/queen_tools.py +++ b/src/formicos/surface/queen_tools.py @@ -184,7 +184,7 @@ def __init__( "set_thread_goal": self._handle_set_thread_goal, "complete_thread": self._handle_complete_thread, "query_service": self._handle_query_service, - "propose_plan": lambda i, w, t: self._propose_plan(i, w), + "propose_plan": lambda i, w, t: self._propose_plan(i, w, t), "query_outcomes": lambda i, w, t: self._query_outcomes(i, w), "analyze_colony": lambda i, w, t: self._analyze_colony(i), "query_briefing": lambda i, w, t: self._query_briefing(i, w), @@ -194,6 +194,8 @@ def __init__( "edit_file": lambda i, w, t: self._edit_file(i, w), "run_tests": lambda i, w, t: self._run_tests(i, w), "delete_file": lambda i, w, t: self._delete_file(i, w), + # Wave 68: plan step tracking + "mark_plan_step": self._mark_plan_step, # Wave 64 Track 3: retry failed colony "retry_colony": self._retry_colony, # Wave 65 Track 5: autonomous agency tools @@ -203,6 +205,15 @@ def __init__( "list_addons": lambda i, w, t: self._list_addons(), # Wave 65 Track 4: manual addon trigger "trigger_addon": lambda i, w, t: self._trigger_addon(i), + # Wave 68 Track 6: workspace taxonomy + "set_workspace_tags": self._set_workspace_tags, + # Wave 70.0 Track 5: project-level milestone tools + "propose_project_milestone": self._propose_project_milestone, + "complete_project_milestone": self._complete_project_milestone, + # Wave 70.0 Track 7: autonomy budget visibility + "check_autonomy_budget": self._check_autonomy_budget, + # Wave 74 Track 3: display board posting + "post_observation": self._post_observation, } def _find_colony(self, colony_id: str) -> Any: @@ -897,6 +908,54 @@ def tool_specs(self) -> list[dict[str, Any]]: "required": ["summary"], }, }, + # Wave 68: plan step tracking + { + "name": "mark_plan_step", + "description": ( + "Update a plan step's status. Call after spawning " + "a colony for a plan step or when a step " + "completes/blocks." + ), + "parameters": { + "type": "object", + "properties": { + "step_index": { + "type": "integer", + "description": ( + "Zero-based step index in the plan" + ), + }, + "status": { + "type": "string", + "enum": [ + "pending", "started", + "completed", "blocked", + ], + "description": "New status for this step", + }, + "description": { + "type": "string", + "description": ( + "Step description (required when " + "adding a new step)" + ), + }, + "colony_id": { + "type": "string", + "description": ( + "Colony executing this step (optional)" + ), + }, + "note": { + "type": "string", + "description": ( + "Brief status note (optional)" + ), + }, + }, + "required": ["step_index", "status"], + }, + }, # Wave 61 Track 3: analytical tools { "name": "query_outcomes", @@ -1332,6 +1391,134 @@ def tool_specs(self) -> list[dict[str, Any]]: "required": ["addon_name", "handler"], }, }, + # Wave 68 Track 6: workspace taxonomy + { + "name": "set_workspace_tags", + "description": ( + "Set soft taxonomy tags on the current workspace. " + "Tags are free-form hints (e.g., 'python', 'auth', " + "'web-api') that help route queries to the right " + "sources." + ), + "parameters": { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "List of tags (max 20, each max 50 chars)." + ), + }, + }, + "required": ["tags"], + }, + }, + # Wave 70.0 Track 5: project-level milestone tools + { + "name": "propose_project_milestone", + "description": ( + "Add a milestone to the project-wide plan. Creates the " + "plan file if it doesn't exist. Milestones span threads " + "and give the Queen cross-thread planning context." + ), + "parameters": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": ( + "What this milestone achieves." + ), + }, + "goal": { + "type": "string", + "description": ( + "Overall project goal (used only when " + "creating a new plan)." + ), + }, + }, + "required": ["description"], + }, + }, + { + "name": "complete_project_milestone", + "description": ( + "Mark a project milestone as completed. " + "Use the milestone index from the project plan." + ), + "parameters": { + "type": "object", + "properties": { + "milestone_index": { + "type": "integer", + "description": "Index of the milestone to complete.", + }, + "note": { + "type": "string", + "description": "Optional completion note.", + }, + }, + "required": ["milestone_index"], + }, + }, + # Wave 70.0 Track 7: autonomy budget visibility + { + "name": "check_autonomy_budget", + "description": ( + "Check daily autonomy budget status: remaining budget, " + "active maintenance colonies, autonomy level, and trust " + "score. Optionally estimate blast radius for a proposed task." + ), + "parameters": { + "type": "object", + "properties": { + "task": { + "type": "string", + "description": ( + "Optional task description to estimate blast radius." + ), + }, + }, + }, + }, + # Wave 74 Track 3: display board posting + { + "name": "post_observation", + "description": ( + "Post a structured observation to the display board. Use for " + "status updates, flagged concerns, notable findings. The operator " + "sees these when they open the Queen tab." + ), + "parameters": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["observation", "status", "concern", "metric"], + "description": "Kind of observation.", + }, + "priority": { + "type": "string", + "enum": ["normal", "attention", "urgent"], + "description": ( + "Display priority. Use 'urgent' sparingly — only " + "for items requiring immediate operator action." + ), + }, + "title": { + "type": "string", + "description": "Short heading (under 80 chars).", + }, + "content": { + "type": "string", + "description": "Body text with details. Keep under 200 chars.", + }, + }, + "required": ["type", "title", "content"], + }, + }, # Wave 64 Track 6a: addon-registered tools appended dynamically *self._addon_tool_specs, ] @@ -3057,6 +3244,7 @@ def _propose_plan( self, inputs: dict[str, Any], workspace_id: str, + thread_id: str = "", ) -> tuple[str, dict[str, Any] | None]: """Handle propose_plan tool — present a plan for operator review.""" summary = inputs.get("summary", "") @@ -3173,8 +3361,391 @@ def _propose_plan( "render": "proposal_card", "proposal": proposal, } + + # Wave 70.0 Track 8: attach blast-radius and autonomy truth + try: + from formicos.surface.self_maintenance import ( # noqa: PLC0415 + compute_autonomy_score, + estimate_blast_radius, + ) + + _br = estimate_blast_radius( + task=summary, + workspace_id=workspace_id, + projections=self._runtime.projections, + ) + action["blast_radius"] = { + "score": _br.score, + "level": _br.level, + "factors": _br.factors, + "recommendation": _br.recommendation, + } + _as = compute_autonomy_score(workspace_id, self._runtime.projections) + action["autonomy_score"] = { + "score": _as.score, + "grade": _as.grade, + "components": _as.components, + "recommendation": _as.recommendation, + } + except Exception: # noqa: BLE001 + pass # best-effort metadata, not critical + + # Wave 68: persist plan to file for attention injection + try: + _data_dir = self._runtime.settings.system.data_dir + if isinstance(_data_dir, str) and _data_dir and thread_id: + _plan_dir = Path(_data_dir) / ".formicos" / "plans" + _plan_dir.mkdir(parents=True, exist_ok=True) + _plan_path = _plan_dir / f"{thread_id}.md" + _plan_lines = [f"# Plan: {summary[:200]}", ""] + if recommendation: + _plan_lines.append(f"**Approach:** {recommendation}") + _plan_lines.append("") + if enriched_options: + _plan_lines.append("## Options") + for _i, _opt in enumerate(enriched_options, 1): + _label = _opt.get("label", f"Option {_i}") + _desc = _opt.get("description", "") + _plan_lines.append(f"{_i}. **{_label}:** {_desc}") + _plan_lines.append("") + _plan_lines.append("## Steps") + _plan_lines.append( + "*(No steps defined yet." + " Use mark_plan_step to add.)*" + ) + _plan_path.write_text( + "\n".join(_plan_lines), encoding="utf-8", + ) + except (OSError, TypeError): + pass # plan file is best-effort, not critical path + return (result_text, action) + # ------------------------------------------------------------------ # + # Wave 68: plan step tracking # + # ------------------------------------------------------------------ # + + _STEP_RE = _re.compile( + r"^- \[(\d+)\] \[(\w+)\] (.*)$", + ) + + def _mark_plan_step( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, None]: + """Update or append a step in the thread's plan file.""" + step_index: int = inputs.get("step_index", 0) + status: str = inputs.get("status", "pending") + description: str = inputs.get("description", "") + colony_id: str = inputs.get("colony_id", "") + note: str = inputs.get("note", "") + + try: + _data_dir = self._runtime.settings.system.data_dir + if not isinstance(_data_dir, str) or not _data_dir: + return ("No data directory configured.", None) + _plan_path = ( + Path(_data_dir) + / ".formicos" + / "plans" + / f"{thread_id}.md" + ) + if not _plan_path.is_file(): + return ( + f"No plan file for thread {thread_id}. " + "Use propose_plan first.", + None, + ) + + text = _plan_path.read_text(encoding="utf-8") + lines = text.split("\n") + + # Find the ## Steps section + steps_idx = -1 + for li, line in enumerate(lines): + if line.strip() == "## Steps": + steps_idx = li + break + if steps_idx == -1: + lines.append("## Steps") + steps_idx = len(lines) - 1 + + # Parse existing steps + steps: list[dict[str, Any]] = [] + step_line_indices: list[int] = [] + for li in range(steps_idx + 1, len(lines)): + m = self._STEP_RE.match(lines[li]) + if m: + steps.append({ + "index": int(m.group(1)), + "status": m.group(2), + "text": m.group(3), + }) + step_line_indices.append(li) + + # Build step line + col_suffix = ( + f" (colony {colony_id[:8]})" if colony_id else "" + ) + note_suffix = f" — {note}" if note else "" + desc_text = description or ( + steps[step_index]["text"].split(" — ")[0].split( + " (colony", + )[0] + if step_index < len(steps) + else f"Step {step_index}" + ) + new_line = ( + f"- [{step_index}] [{status}]" + f" {desc_text}{col_suffix}{note_suffix}" + ) + + if step_index < len(steps): + # Update existing step + lines[step_line_indices[step_index]] = new_line + else: + # Append new step — remove placeholder if present + insert_at = ( + step_line_indices[-1] + 1 + if step_line_indices + else steps_idx + 1 + ) + # Remove the "no steps" placeholder + for li in range(steps_idx + 1, len(lines)): + if "No steps defined yet" in lines[li]: + lines.pop(li) + insert_at = ( + min(insert_at, li) + if step_line_indices + else li + ) + break + lines.insert(insert_at, new_line) + + _plan_path.write_text( + "\n".join(lines), encoding="utf-8", + ) + return ( + f"Step [{step_index}] marked as [{status}].", + None, + ) + except (OSError, TypeError) as exc: + return (f"Failed to update plan: {exc}", None) + + # ------------------------------------------------------------------ # + # Wave 70.0 Track 5: project-level milestone tools # + # ------------------------------------------------------------------ # + + def _propose_project_milestone( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, None]: + """Add a milestone to the project-wide plan.""" + description = inputs.get("description", "").strip() + if not description: + return ("Error: description is required.", None) + + goal = inputs.get("goal", "").strip() + + _data_dir = self._runtime.settings.system.data_dir + if not isinstance(_data_dir, str) or not _data_dir: + return ("No data directory configured.", None) + + try: + from formicos.surface.project_plan import add_milestone # noqa: PLC0415 + + plan = add_milestone( + _data_dir, + description, + thread_id=thread_id, + goal=goal, + ) + count = len(plan.get("milestones", [])) + return ( + f"Milestone added to project plan ({count} total). " + f"Goal: {plan.get('goal', 'N/A')}", + None, + ) + except (OSError, TypeError) as exc: + return (f"Failed to add milestone: {exc}", None) + + def _complete_project_milestone( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, None]: + """Mark a project milestone as completed.""" + milestone_index = inputs.get("milestone_index") + if milestone_index is None: + return ("Error: milestone_index is required.", None) + note = inputs.get("note", "").strip() + + _data_dir = self._runtime.settings.system.data_dir + if not isinstance(_data_dir, str) or not _data_dir: + return ("No data directory configured.", None) + + try: + from formicos.surface.project_plan import complete_milestone # noqa: PLC0415 + + plan = complete_milestone( + _data_dir, + int(milestone_index), + note=note, + ) + if plan.get("error"): + return (f"Error: {plan['error']}", None) + return ( + f"Milestone [{milestone_index}] marked as completed.", + None, + ) + except (OSError, TypeError, ValueError) as exc: + return (f"Failed to complete milestone: {exc}", None) + + # ------------------------------------------------------------------ # + # Wave 70.0 Track 7: autonomy budget visibility # + # ------------------------------------------------------------------ # + + def _check_autonomy_budget( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, dict[str, Any] | None]: + """Show the Queen her remaining daily budget and autonomy status.""" + import json as _json # noqa: PLC0415 + + from formicos.core.types import MaintenancePolicy # noqa: PLC0415 + from formicos.surface.self_maintenance import ( # noqa: PLC0415 + compute_autonomy_score, + estimate_blast_radius, + ) + + ws = self._runtime.projections.workspaces.get(workspace_id) + if ws is None: + return ("Workspace not found.", None) + + raw_policy = ws.config.get("maintenance_policy") + policy = MaintenancePolicy() + if raw_policy is not None: + try: + data = _json.loads(raw_policy) if isinstance(raw_policy, str) else raw_policy + policy = MaintenancePolicy(**data) + except Exception: # noqa: BLE001 + pass + + dispatcher = getattr(self._runtime, "maintenance_dispatcher", None) + daily_spend = 0.0 + active_maintenance = 0 + if dispatcher is not None: + dispatcher._reset_daily_budget_if_needed() # pyright: ignore[reportPrivateUsage] + daily_spend = dispatcher._daily_spend.get(workspace_id, 0.0) # pyright: ignore[reportPrivateUsage] + active_maintenance = dispatcher._count_active_maintenance_colonies( # pyright: ignore[reportPrivateUsage] + workspace_id, + ) + + budget_limit = policy.daily_maintenance_budget + remaining = max(0.0, budget_limit - daily_spend) + + budget = getattr(ws, "budget", None) + total_cost = budget.total_cost if budget else 0.0 + + lines = [ + "## Autonomy Budget Status", + "", + f"**Autonomy level:** {policy.autonomy_level}", + f"**Daily budget:** ${budget_limit:.2f}", + f"**Spent today:** ${daily_spend:.2f}", + f"**Remaining:** ${remaining:.2f}", + f"**Active maintenance colonies:** {active_maintenance}" + f" / {policy.max_maintenance_colonies} max", + "", + f"**Workspace total cost:** ${total_cost:.2f}", + ] + + if policy.auto_actions: + lines.append( + f"**Auto-dispatch categories:** {', '.join(policy.auto_actions)}" + ) + else: + lines.append("**Auto-dispatch categories:** none") + + if remaining <= 0: + lines.extend([ + "", + "Warning: Daily budget exhausted. No autonomous dispatch until " + "midnight UTC reset.", + ]) + elif remaining < budget_limit * 0.2: + lines.extend([ + "", + f"Warning: Budget running low ({remaining / budget_limit:.0%} remaining).", + ]) + + # Optional blast radius estimate + task_text = inputs.get("task", "") + if task_text: + estimate = estimate_blast_radius( + task=task_text, + workspace_id=workspace_id, + projections=self._runtime.projections, + ) + lines.extend([ + "", + "## Blast Radius Estimate", + f"**Score:** {estimate.score} ({estimate.level})", + f"**Recommendation:** {estimate.recommendation}", + ]) + for factor in estimate.factors: + lines.append(f" - {factor}") + + # Autonomy score + auto_score = compute_autonomy_score( + workspace_id, self._runtime.projections, + ) + lines.extend([ + "", + "## Autonomy Score", + f"**Score:** {auto_score.score}/100 (Grade: {auto_score.grade})", + f"**Recommendation:** {auto_score.recommendation}", + ]) + for component, value in auto_score.components.items(): + lines.append(f" - {component}: {value}") + + return ("\n".join(lines), None) + + # ------------------------------------------------------------------ # + # Wave 74 Track 3: display board posting # + # ------------------------------------------------------------------ # + + async def _post_observation( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, dict[str, Any] | None]: + """Post a structured observation to the display board.""" + from formicos.surface.operational_state import append_journal_entry # noqa: PLC0415 + + obs_type = inputs.get("type", "observation") + priority = inputs.get("priority", "normal") + title = inputs.get("title", "") + content = inputs.get("content", "") + + data_dir = self._runtime.settings.system.data_dir + append_journal_entry( + data_dir, workspace_id, + source="queen", + message=content, + heading=f"{obs_type}:{priority} — {title}", + metadata={"display_board": True, "type": obs_type, "priority": priority}, + ) + return (f"Posted {obs_type}: {title}", None) + # ------------------------------------------------------------------ # # Wave 62 Track 2: Queen direct work tools # # ------------------------------------------------------------------ # @@ -3755,57 +4326,101 @@ def _draft_document( {"tool": "draft_document", "path": rel_path, "mode": mode}, ) - def _list_addons(self) -> tuple[str, dict[str, Any] | None]: - """List installed addons with their tools and handlers.""" - # Addon tool specs carry the addon metadata - addon_tools = self._addon_tool_specs - # Handler registry shows all registered handlers (built-in + addon) - all_handlers = list(self._handlers.keys()) - - # Identify addon handlers (not in the built-in set) - addon_handlers = [ - h for h in all_handlers - if h not in { - "spawn_colony", "spawn_parallel", "kill_colony", - "get_status", "list_templates", "inspect_template", - "inspect_colony", "read_workspace_files", - "suggest_config_change", "approve_config_change", - "redirect_colony", "escalate_colony", - "read_colony_output", "memory_search", - "write_workspace_file", "queen_note", - "set_thread_goal", "complete_thread", - "query_service", "propose_plan", - "query_outcomes", "analyze_colony", - "query_briefing", "search_codebase", - "run_command", "edit_file", "run_tests", - "delete_file", "retry_colony", - "batch_command", "summarize_thread", - "draft_document", "list_addons", - "trigger_addon", - "archive_thread", "define_workflow_steps", - } - ] + async def _set_workspace_tags( + self, + inputs: dict[str, Any], + workspace_id: str, + thread_id: str, + ) -> tuple[str, dict[str, Any] | None]: + """Set soft taxonomy tags on a workspace.""" + import json as _json # noqa: PLC0415 - parts: list[str] = ["# Installed Addons"] - if addon_tools: - parts.append(f"\n## Addon Tools ({len(addon_tools)})") - for spec in addon_tools: - name = spec.get("name", "unknown") - desc = spec.get("description", "")[:100] - parts.append(f"- **{name}**: {desc}") - else: - parts.append("\nNo addon tools registered.") + from formicos.core.events import WorkspaceConfigChanged # noqa: PLC0415 - if addon_handlers: - parts.append(f"\n## Addon Handlers ({len(addon_handlers)})") - for h in addon_handlers: - parts.append(f"- {h}") + raw_tags = inputs.get("tags", []) + if not isinstance(raw_tags, list): + return ("Error: tags must be a list of strings.", None) - parts.append(f"\n## Built-in Tools ({len(all_handlers) - len(addon_handlers)})") - parts.append( - "Use tool_specs for the full list of built-in tools." + # Normalize: lowercase, strip, dedup, cap + tags: list[str] = [] + seen: set[str] = set() + for t in raw_tags: + if not isinstance(t, str): + continue + normalized = t.strip().lower()[:50] + if normalized and normalized not in seen: + tags.append(normalized) + seen.add(normalized) + if len(tags) >= 20: + break + + ws = self._runtime.projections.workspaces.get(workspace_id) + if ws is None: + return ("Error: workspace not found.", None) + + old_raw = ws.config.get("taxonomy_tags") + old_str = str(old_raw) if old_raw is not None else None + + await self._runtime.emit_and_broadcast(WorkspaceConfigChanged( + seq=0, + timestamp=_now(), + address=workspace_id, + workspace_id=workspace_id, + field="taxonomy_tags", + old_value=old_str, + new_value=_json.dumps(tags), + )) + + return ( + f"Workspace tags set: {', '.join(tags)}", + {"tool": "set_workspace_tags", "tags": tags}, ) + def _list_addons(self) -> tuple[str, dict[str, Any] | None]: + """List installed addons with capability metadata for Queen routing.""" + manifests: list[Any] = getattr(self, "_addon_manifests", []) or [] + + parts: list[str] = ["# Installed Addons"] + if not manifests: + parts.append("\nNo addons installed.") + return ("\n".join(parts), None) + + # Wave 70.0: capability-based bridge health (no addon-name branching) + _addon_ctx: dict[str, Any] = getattr( + self, "_addon_runtime_context", {}, + ) or {} + _bridge_health_fn = _addon_ctx.get("get_bridge_health") + + for m in manifests: + parts.append(f"\n**{m.name}**: {m.description}") + if m.content_kinds: + parts.append(f" Content: {', '.join(m.content_kinds)}") + if m.path_globs: + parts.append(f" Files: {', '.join(m.path_globs)}") + if m.search_tool: + parts.append(f" Search via: {m.search_tool}") + # Surface refresh/index path from manual triggers + for trigger in m.triggers: + if trigger.type == "manual": + parts.append(f" Index via: {trigger.handler}") + tool_names = [t.name for t in m.tools] + if tool_names: + parts.append(f" Tools: {', '.join(tool_names)}") + + # Capability-based: surface bridge health if available + if callable(_bridge_health_fn): + try: + bh = _bridge_health_fn() + parts.append( + f"\n## Bridge Status: " + f"{bh.get('connectedServers', 0)} connected, " + f"{bh.get('unhealthyServers', 0)} unhealthy, " + f"{bh.get('totalRemoteTools', 0)} remote tools" + ) + except Exception: # noqa: BLE001 + pass + + parts.append(f"\nTotal: {len(manifests)} addons") return ("\n".join(parts), None) async def _trigger_addon( diff --git a/src/formicos/surface/routes/api.py b/src/formicos/surface/routes/api.py index 867e780..383c724 100644 --- a/src/formicos/surface/routes/api.py +++ b/src/formicos/surface/routes/api.py @@ -2,6 +2,7 @@ from __future__ import annotations +import contextlib import json from datetime import UTC, datetime, timedelta, timezone from pathlib import Path @@ -238,6 +239,7 @@ async def update_model_policy(request: Request) -> JSONResponse: _EDITABLE = { "max_output_tokens", "time_multiplier", "tool_call_multiplier", + "hidden", } updates: dict[str, Any] = { k: v for k, v in body.items() if k in _EDITABLE @@ -492,6 +494,16 @@ async def get_workspace_outcomes(request: Request) -> JSONResponse: "outcomes": [asdict(o) for o, _ in recent], }) + # -- Wave 67: knowledge hierarchy tree -- + + async def get_knowledge_tree(request: Request) -> JSONResponse: + """Return knowledge entry hierarchy as a tree grouped by domain.""" + workspace_id = request.path_params["workspace_id"] + from formicos.surface.hierarchy import build_knowledge_tree # noqa: PLC0415 + + branches = build_knowledge_tree(runtime.projections, workspace_id) + return JSONResponse({"branches": branches}) + # -- Wave 38 2B: escalation outcome matrix -- async def get_escalation_matrix(request: Request) -> JSONResponse: @@ -639,6 +651,36 @@ async def create_demo_workspace(_request: Request) -> JSONResponse: "suggested_task": raw.get("suggested_demo_task", ""), }, status_code=201) + # -- Wave 73 B3: workspace creation -- + + async def create_workspace_endpoint(request: Request) -> JSONResponse: + """Create a new workspace.""" + try: + body = await request.json() + except Exception: # noqa: BLE001 + return _err_response("INVALID_JSON") + + name = body.get("name", "").strip() + if not name: + return _err_response( + "INVALID_PARAMETER", + message="Workspace name is required", + status_code=400, + ) + + if name in runtime.projections.workspaces: + return _err_response( + "INVALID_PARAMETER", + message=f"Workspace '{name}' already exists", + status_code=409, + ) + + workspace_id = await runtime.create_workspace(name) + return JSONResponse( + {"workspace_id": workspace_id, "name": name}, + status_code=201, + ) + # -- Wave 39 1A: colony audit view -- # -- Wave 48 1A: thread-scoped timeline -- @@ -982,6 +1024,109 @@ async def get_workspace_budget(request: Request) -> JSONResponse: "colonies": colony_list, }) + # --- Wave 74 B3: Queen context budget endpoint --- + + async def get_queen_budget(_request: Request) -> JSONResponse: + """Return the Queen's 9-slot context budget allocation.""" + from formicos.surface.queen_budget import ( # noqa: PLC0415 + _FALLBACKS, + _FRACTIONS, + ) + + slots = [ + {"name": name, "fraction": frac, "fallback_tokens": _FALLBACKS.get(name, 0)} + for name, frac in _FRACTIONS.items() + ] + return JSONResponse({"slots": slots}) + + # --- Wave 74 Track 4: Queen tool stats endpoint --- + + async def get_queen_tool_stats(request: Request) -> JSONResponse: + """Return session-scoped Queen tool call counts.""" + runtime = request.app.state.runtime + queen = runtime.queen + counts: dict[str, int] = getattr(queen, "_tool_call_counts", {}) + statuses: dict[str, str] = getattr(queen, "_tool_last_status", {}) + tools = [ + {"name": name, "calls": count, "last_status": statuses.get(name, "unknown")} + for name, count in sorted(counts.items(), key=lambda x: -x[1]) + ] + return JSONResponse({"tools": tools, "total_calls": sum(counts.values())}) + + # --- Wave 70.0 Track 9: autonomy status endpoint --- + + async def get_autonomy_status(request: Request) -> JSONResponse: + """Return structured autonomy trust data for a workspace.""" + import json as _json # noqa: PLC0415 + + from formicos.core.types import MaintenancePolicy # noqa: PLC0415 + from formicos.surface.self_maintenance import ( # noqa: PLC0415 + compute_autonomy_score, + ) + + workspace_id = request.path_params["workspace_id"] + ws = runtime.projections.workspaces.get(workspace_id) + if ws is None: + return _err_response("WORKSPACE_NOT_FOUND") + + raw_policy = ws.config.get("maintenance_policy") + policy = MaintenancePolicy() + if raw_policy is not None: + try: + data = _json.loads(raw_policy) if isinstance(raw_policy, str) else raw_policy + policy = MaintenancePolicy(**data) + except Exception: # noqa: BLE001 + pass + + dispatcher = getattr(runtime, "maintenance_dispatcher", None) + daily_spend = 0.0 + active_maintenance = 0 + if dispatcher is not None: + dispatcher._reset_daily_budget_if_needed() # pyright: ignore[reportPrivateUsage] + daily_spend = dispatcher._daily_spend.get(workspace_id, 0.0) # pyright: ignore[reportPrivateUsage] + active_maintenance = dispatcher._count_active_maintenance_colonies( # pyright: ignore[reportPrivateUsage] + workspace_id, + ) + + budget_limit = policy.daily_maintenance_budget + remaining = max(0.0, budget_limit - daily_spend) + + auto_score = compute_autonomy_score(workspace_id, runtime.projections) + + # Build recent actions from colony outcomes + recent_actions: list[dict[str, Any]] = [] + outcomes = sorted( + ( + o for o in runtime.projections.colony_outcomes.values() + if o.workspace_id == workspace_id + ), + key=lambda o: getattr(o, "colony_id", ""), + reverse=True, + ) + for o in outcomes[:10]: + recent_actions.append({ + "colony_id": o.colony_id, + "strategy": o.strategy, + "outcome": "completed" if o.succeeded else "failed", + "cost": round(o.total_cost, 4), + "quality_score": round(o.quality_score, 2), + }) + + return JSONResponse({ + "level": str(policy.autonomy_level), + "score": auto_score.score, + "grade": auto_score.grade, + "daily_budget": budget_limit, + "daily_spend": round(daily_spend, 4), + "remaining": round(remaining, 4), + "active_maintenance_colonies": active_maintenance, + "max_maintenance_colonies": policy.max_maintenance_colonies, + "auto_actions": policy.auto_actions, + "components": auto_score.components, + "recommendation": auto_score.recommendation, + "recent_actions": recent_actions, + }) + # --- Wave 64 Track 5: provider health endpoint --- async def get_provider_health(_request: Request) -> JSONResponse: @@ -1280,6 +1425,874 @@ async def delete_workflow_step(request: Request) -> JSONResponse: )) return JSONResponse({"step_index": step_index, "skipped": True}) + # -- Wave 66 T1: Addon endpoints -- + + async def list_addons(request: Request) -> JSONResponse: + """List installed addons with health summaries.""" + regs: list[Any] = getattr(request.app.state, "addon_registrations", []) + addons = [] + for reg in regs: + m = reg.manifest + addons.append({ + "name": m.name, + "version": m.version, + "description": m.description, + "status": reg.health_status, + "lastError": reg.last_error, + "tools": [ + { + "name": t.name, + "description": t.description, + "callCount": reg.tool_call_counts.get(t.name, 0), + } + for t in m.tools + ], + "handlers": [ + { + "event": h.event, + "lastFired": reg.last_handler_fire, + "errorCount": reg.handler_error_count, + } + for h in m.handlers + ], + "triggers": [ + { + "type": t.type, + "schedule": t.schedule, + "handler": t.handler, + "lastFired": reg.trigger_fire_times.get(t.handler), + } + for t in m.triggers + ], + "panels": reg.registered_panels, + "config": [ + { + "key": c.key, + "type": c.type, + "default": c.default, + "label": c.label or c.key, + "options": c.options, + } + for c in m.config + ], + }) + # Wave 70.0: capability-based bridge health (no addon-name check) + _bhfn = (reg.runtime_context or {}).get("get_bridge_health") + if callable(_bhfn): + with contextlib.suppress(Exception): + addons[-1]["bridgeHealth"] = _bhfn() + return JSONResponse(addons) + + async def trigger_addon(request: Request) -> JSONResponse: + """Manually fire an addon trigger handler.""" + from formicos.surface.addon_loader import _resolve_handler # noqa: PLC0415 + + addon_name = request.path_params["addon_name"] + try: + body = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + handler_ref = body.get("handler", "") + if not handler_ref: + return _err_response( + "MISSING_FIELD", message="handler is required", + ) + + regs: list[Any] = getattr( + request.app.state, "addon_registrations", [], + ) + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return _err_response( + "ADDON_NOT_FOUND", + message=f"Addon '{addon_name}' not installed", + status_code=404, + ) + + if getattr(reg, "disabled", False): + return JSONResponse( + {"error": f"Addon '{addon_name}' is currently disabled"}, + status_code=409, + ) + + try: + handler_fn = _resolve_handler(addon_name, handler_ref) + except (ValueError, AttributeError) as exc: + return JSONResponse( + {"error": str(exc)}, status_code=400, + ) + + import inspect # noqa: PLC0415 + try: + sig = inspect.signature(handler_fn) + accepts_ctx = "runtime_context" in sig.parameters + # Detect tool-convention handlers: (inputs, workspace_id, thread_id, *, ...) + positional_params = [ + p for p in sig.parameters.values() + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.name not in ("self", "cls", "runtime_context") + ] + has_tool_args = len(positional_params) >= 3 + except (ValueError, TypeError): + accepts_ctx = False + has_tool_args = False + + inputs = body.get("inputs", {}) + trigger_ws_id = body.get("workspace_id", "") + trigger_thread_id = body.get("thread_id", "") + + try: + if has_tool_args and accepts_ctx: + result = await handler_fn( + inputs, trigger_ws_id, trigger_thread_id, + runtime_context=reg.runtime_context, + ) + elif has_tool_args: + result = await handler_fn( + inputs, trigger_ws_id, trigger_thread_id, + ) + elif accepts_ctx: + result = await handler_fn( + runtime_context=reg.runtime_context, + ) + else: + result = await handler_fn() + except Exception as exc: # noqa: BLE001 + return JSONResponse( + {"error": f"Trigger failed: {exc}"}, status_code=500, + ) + + # Record trigger fire time + from datetime import UTC, datetime # noqa: PLC0415 + reg.trigger_fire_times[handler_ref] = datetime.now(tz=UTC).isoformat() + + return JSONResponse({ + "addon": addon_name, + "handler": handler_ref, + "result": str(result) if result is not None else "ok", + }) + + # -- Wave 72.5 Track 1e: Soft addon disable toggle -- + + async def toggle_addon(request: Request) -> JSONResponse: + """Enable or disable an addon at runtime.""" + addon_name = request.path_params["addon_name"] + try: + body = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + disabled = bool(body.get("disabled", False)) + + regs: list[Any] = getattr( + request.app.state, "addon_registrations", [], + ) + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return _err_response( + "ADDON_NOT_FOUND", + message=f"Addon '{addon_name}' not installed", + status_code=404, + ) + + reg.disabled = disabled + return JSONResponse({ + "addon": addon_name, + "disabled": reg.disabled, + }) + + # -- Wave 66 T2: Addon config surface -- + + async def get_addon_config(request: Request) -> JSONResponse: + """Return addon config schema + current values for a workspace.""" + addon_name = request.path_params["addon_name"] + workspace_id = request.query_params.get("workspace_id", "") + + regs: list[Any] = getattr( + request.app.state, "addon_registrations", [], + ) + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return _err_response( + "ADDON_NOT_FOUND", + message=f"Addon '{addon_name}' not installed", + status_code=404, + ) + + import json as _json # noqa: PLC0415 + + manifest = reg.manifest + params = [] + for cp in manifest.config: + # Current value from workspace config (if workspace_id given) + current = cp.default + if workspace_id: + dim = f"addon.{addon_name}.{cp.key}" + ws_proj = runtime.projections.workspaces.get(workspace_id) + if ws_proj is not None: + ws_config = getattr(ws_proj, "config", {}) + if dim in ws_config: + # Config values stored as JSON strings in projection + try: + current = _json.loads(ws_config[dim]) + except (ValueError, TypeError): + current = ws_config[dim] + params.append({ + "key": cp.key, + "type": cp.type, + "default": cp.default, + "label": cp.label or cp.key, + "options": cp.options, + "value": current, + }) + + return JSONResponse({ + "addon": addon_name, + "config": params, + }) + + async def put_addon_config(request: Request) -> JSONResponse: + """Update addon config values for a workspace via WorkspaceConfigChanged.""" + import json as _json # noqa: PLC0415 + from datetime import UTC, datetime # noqa: PLC0415 + + from formicos.core.events import WorkspaceConfigChanged # noqa: PLC0415 + + addon_name = request.path_params["addon_name"] + + try: + body = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + workspace_id = body.get("workspace_id", "") + if not workspace_id: + return _err_response( + "MISSING_FIELD", message="workspace_id is required", + ) + values: dict[str, Any] = body.get("values", {}) + if not values: + return _err_response( + "MISSING_FIELD", message="values dict is required", + ) + + regs: list[Any] = getattr( + request.app.state, "addon_registrations", [], + ) + reg = next( + (r for r in regs if r.manifest.name == addon_name), None, + ) + if reg is None: + return _err_response( + "ADDON_NOT_FOUND", + message=f"Addon '{addon_name}' not installed", + status_code=404, + ) + + # Validate keys against manifest schema + valid_keys = {cp.key for cp in reg.manifest.config} + unknown = set(values.keys()) - valid_keys + if unknown: + return _err_response( + "MISSING_FIELD", + message=f"Unknown config keys: {sorted(unknown)}", + ) + + # Wave 73 B4: coerce values to match declared types + for param in reg.manifest.config: + if param.key in values: + val = values[param.key] + if param.type == "boolean" and isinstance(val, str): + values[param.key] = val.lower() in ("true", "1", "yes") + elif param.type == "integer" and isinstance(val, str): + with contextlib.suppress(ValueError): + values[param.key] = int(val) + + # Emit WorkspaceConfigChanged for each key + updated = [] + for key, new_val in values.items(): + dim = f"addon.{addon_name}.{key}" + # Get old value + old_val = None + ws_proj = runtime.projections.workspaces.get(workspace_id) + if ws_proj is not None: + ws_config = getattr(ws_proj, "config", {}) + old_val = ws_config.get(dim) + + await runtime.emit_and_broadcast(WorkspaceConfigChanged( + seq=0, + timestamp=datetime.now(tz=UTC), + address=workspace_id, + workspace_id=workspace_id, + field=dim, + old_value=old_val if isinstance(old_val, str) else ( + _json.dumps(old_val) if old_val is not None else None + ), + new_value=_json.dumps(new_val), + )) + updated.append(key) + + return JSONResponse({ + "addon": addon_name, + "workspace_id": workspace_id, + "updated": updated, + }) + + # Wave 69 Track 4: thread plan read endpoint + import re as _re + _PLAN_STEP_RE = _re.compile( + r"^- \[(\d+)\] \[(\w+)\] (.*)$", + ) + + async def get_thread_plan(request: Request) -> JSONResponse: + thread_id = request.path_params["thread_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"exists": False}) + + plan_path = Path(data_dir_str) / ".formicos" / "plans" / f"{thread_id}.md" + if not plan_path.is_file(): + return JSONResponse({"exists": False}) + + try: + text = plan_path.read_text(encoding="utf-8") + except OSError: + return JSONResponse({"exists": False}) + + title = "" + approach = "" + steps: list[dict[str, Any]] = [] + + for line in text.splitlines(): + if line.startswith("# Plan: "): + title = line[8:].strip() + elif line.startswith("**Approach:**"): + approach = line[len("**Approach:**"):].strip() + else: + m = _PLAN_STEP_RE.match(line) + if m: + idx_str, status, desc = m.groups() + step: dict[str, Any] = { + "index": int(idx_str), + "status": status, + "description": desc, + } + # Parse optional colony ID: (colony abc123) + col_match = _re.search( + r"\(colony\s+(\S+)\)", desc, + ) + if col_match: + step["colony_id"] = col_match.group(1) + # Parse optional note after em-dash + if " \u2014 " in desc: + step["note"] = desc.split(" \u2014 ", 1)[1] + steps.append(step) + + return JSONResponse({ + "exists": True, + "title": title or "Plan", + "approach": approach, + "steps": steps, + }) + + # Wave 71.0 Track 5: action queue endpoints + async def list_operation_actions(request: Request) -> JSONResponse: + workspace_id = request.path_params["workspace_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({ + "actions": [], "total": 0, + "counts_by_status": {}, "counts_by_kind": {}, + }) + + from formicos.surface.action_queue import list_actions as _list_actions # noqa: PLC0415 + + status_filter = request.query_params.get("status", "") + kind_filter = request.query_params.get("kind", "") + limit = min(int(request.query_params.get("limit", "100")), 500) + result = _list_actions( + data_dir_str, workspace_id, + status=status_filter, kind=kind_filter, limit=limit, + ) + return JSONResponse(result) + + async def approve_action(request: Request) -> JSONResponse: + workspace_id = request.path_params["workspace_id"] + action_id = request.path_params["action_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"error": "No data directory"}, status_code=500) + + from formicos.core.types import CasteSlot as _CasteSlot # noqa: PLC0415 + from formicos.surface.action_queue import ( # noqa: PLC0415 + STATUS_APPROVED, + STATUS_EXECUTED, + STATUS_FAILED, + ) + from formicos.surface.action_queue import ( + update_action as _update_action, + ) + + updated = _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_APPROVED}, + ) + if updated is None: + return JSONResponse({"error": "Action not found"}, status_code=404) + + # If there is a maintenance dispatcher, attempt dispatch + dispatcher = getattr(request.app.state, "maintenance_dispatcher", None) + if dispatcher is not None and updated.get("payload", {}).get("suggested_colony"): + try: + sc = updated["payload"]["suggested_colony"] + colony_id: str = await runtime.spawn_colony( + workspace_id=workspace_id, + thread_id=updated.get("thread_id") or "maintenance", + task=sc.get("task", updated.get("title", "")), + castes=[_CasteSlot(caste=sc.get("caste", "researcher"))], + strategy=sc.get("strategy", "sequential"), + max_rounds=sc.get("max_rounds", 3), + ) + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_EXECUTED, "executed_at": datetime.now(UTC).isoformat()}, + ) + return JSONResponse({"ok": True, "action_id": action_id, "colony_id": colony_id}) + except Exception as exc: # noqa: BLE001 + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_FAILED, "operator_reason": str(exc)}, + ) + return JSONResponse({"ok": True, "action_id": action_id, "error": str(exc)}) + + # Wave 72: handle workflow_template approval — save as learned template + if updated.get("kind") == "workflow_template": + try: + payload = updated.get("payload", {}) + from formicos.core.types import CasteSlot as _CasteSlot2 # noqa: PLC0415 + from formicos.surface.template_manager import ( # noqa: PLC0415 + ColonyTemplate, + new_template_id, + save_template, + ) + + castes_list = payload.get("castes", ["researcher"]) + tmpl = ColonyTemplate( + template_id=new_template_id(), + name=updated.get("title", "Learned template"), + description=updated.get("detail", ""), + castes=[_CasteSlot2(caste=c) for c in castes_list], + strategy=payload.get("strategy", "sequential"), + learned=True, + task_category="", + ) + await save_template(tmpl) + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_EXECUTED, "executed_at": datetime.now(UTC).isoformat()}, + ) + return JSONResponse({ + "ok": True, "action_id": action_id, + "template_id": tmpl.template_id, + }) + except Exception as exc: # noqa: BLE001 + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_FAILED, "operator_reason": str(exc)}, + ) + return JSONResponse({"ok": True, "action_id": action_id, "error": str(exc)}) + + # Wave 72: handle procedure_suggestion approval — append to procedures + if updated.get("kind") == "procedure_suggestion": + try: + from formicos.surface.operational_state import ( # noqa: PLC0415 + append_procedure_rule, + ) + + payload = updated.get("payload", {}) + heading = payload.get("heading", "General") + rule = payload.get("rule", updated.get("title", "")) + append_procedure_rule(data_dir_str, workspace_id, heading, rule) + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_EXECUTED, "executed_at": datetime.now(UTC).isoformat()}, + ) + return JSONResponse({"ok": True, "action_id": action_id, "appended": True}) + except Exception as exc: # noqa: BLE001 + _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_FAILED, "operator_reason": str(exc)}, + ) + return JSONResponse({"ok": True, "action_id": action_id, "error": str(exc)}) + + return JSONResponse({"ok": True, "action_id": action_id}) + + async def reject_action(request: Request) -> JSONResponse: + workspace_id = request.path_params["workspace_id"] + action_id = request.path_params["action_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"error": "No data directory"}, status_code=500) + + import contextlib # noqa: PLC0415 + + from formicos.surface.action_queue import ( # noqa: PLC0415 + STATUS_REJECTED, + ) + from formicos.surface.action_queue import ( + update_action as _update_action, + ) + + body: dict[str, Any] = {} + with contextlib.suppress(Exception): + body = await request.json() + + reason = body.get("reason", "") + + updated = _update_action( + data_dir_str, workspace_id, action_id, + {"status": STATUS_REJECTED, "operator_reason": reason}, + ) + if updated is None: + return JSONResponse({"error": "Action not found"}, status_code=404) + + return JSONResponse({"ok": True, "action_id": action_id}) + + # Wave 72 Track 2: Knowledge review processing + async def review_action(request: Request) -> JSONResponse: + """Process a knowledge review decision (confirm or invalidate).""" + workspace_id = request.path_params["workspace_id"] + action_id = request.path_params["action_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"error": "No data directory"}, status_code=500) + + import contextlib # noqa: PLC0415 + + from formicos.surface.action_queue import ( # noqa: PLC0415 + STATUS_EXECUTED, + ) + from formicos.surface.action_queue import ( + read_actions as _read_actions, + ) + from formicos.surface.action_queue import ( + update_action as _update_action, + ) + + body: dict[str, Any] = {} + with contextlib.suppress(Exception): + body = await request.json() + + decision = body.get("decision", "") + reason = body.get("reason", "") + + if decision not in ("confirm", "invalidate"): + return JSONResponse( + {"error": "decision must be 'confirm' or 'invalidate'"}, + status_code=400, + ) + + # Find the action and extract entry_id from payload + actions = _read_actions(data_dir_str, workspace_id) + target = None + for act in actions: + if act.get("action_id") == action_id: + target = act + break + if target is None: + return JSONResponse({"error": "Action not found"}, status_code=404) + + entry_id = target.get("payload", {}).get("entry_id", "") + if not entry_id: + return JSONResponse({"error": "No entry_id in action payload"}, status_code=400) + + entry = runtime.projections.memory_entries.get(entry_id) + if entry is None: + return JSONResponse( + {"error": f"Entry {entry_id} not found"}, + status_code=404, + ) + + if decision == "confirm": + # Reuse the replay-safe feedback path: positive operator feedback + from formicos.core.events import MemoryConfidenceUpdated # noqa: PLC0415 + + old_alpha = float(entry.get("conf_alpha", 5.0)) + old_beta = float(entry.get("conf_beta", 5.0)) + new_alpha = old_alpha + 1.0 + new_beta = old_beta + await runtime.emit_and_broadcast(MemoryConfidenceUpdated( + seq=0, + timestamp=datetime.now(UTC), + address=f"{workspace_id}/review", + entry_id=entry_id, + old_alpha=old_alpha, + old_beta=old_beta, + new_alpha=new_alpha, + new_beta=new_beta, + new_confidence=new_alpha / (new_alpha + new_beta), + workspace_id=workspace_id, + reason=f"review_confirmed: {reason}" if reason else "review_confirmed", + )) + else: + # Reuse the operator overlay invalidation path + from formicos.core.events import KnowledgeEntryOperatorAction # noqa: PLC0415 + + await runtime.emit_and_broadcast(KnowledgeEntryOperatorAction( + seq=0, + timestamp=datetime.now(UTC), + address=f"{workspace_id}/{entry_id}", + entry_id=entry_id, + workspace_id=workspace_id, + action="invalidate", + actor="operator", + reason=f"review_invalidated: {reason}" if reason else "review_invalidated", + )) + + # Mark the action as executed + _update_action( + data_dir_str, workspace_id, action_id, + { + "status": STATUS_EXECUTED, + "operator_reason": reason, + "executed_at": datetime.now(UTC).isoformat(), + }, + ) + + return JSONResponse({ + "ok": True, + "action_id": action_id, + "entry_id": entry_id, + "decision": decision, + }) + + # --- Wave 72 Track 10B: maintenance-policy GET/PUT --- + + async def get_maintenance_policy(request: Request) -> JSONResponse: + """Return current maintenance policy for a workspace.""" + from formicos.core.types import MaintenancePolicy # noqa: PLC0415 + + workspace_id = request.path_params["workspace_id"] + ws = runtime.projections.workspaces.get(workspace_id) + if ws is None: + return _err_response("WORKSPACE_NOT_FOUND") + + raw = ws.config.get("maintenance_policy") + if raw is None: + return JSONResponse(MaintenancePolicy().model_dump()) + if isinstance(raw, str): + try: + return JSONResponse(json.loads(raw)) + except (ValueError, TypeError): + return JSONResponse(MaintenancePolicy().model_dump()) + if isinstance(raw, dict): + return JSONResponse(dict(raw)) # type: ignore[arg-type] + return JSONResponse(MaintenancePolicy().model_dump()) + + async def put_maintenance_policy(request: Request) -> JSONResponse: + """Update maintenance policy for a workspace.""" + from formicos.core.events import WorkspaceConfigChanged # noqa: PLC0415 + from formicos.core.types import AutonomyLevel, MaintenancePolicy # noqa: PLC0415 + + workspace_id = request.path_params["workspace_id"] + ws = runtime.projections.workspaces.get(workspace_id) + if ws is None: + return _err_response("WORKSPACE_NOT_FOUND") + + try: + body = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + autonomy_level = body.get("autonomy_level", "suggest") + valid_levels = {e.value for e in AutonomyLevel} + if autonomy_level not in valid_levels: + return _err_response( + "INVALID_PARAMETER", + message=f"autonomy_level must be one of {sorted(valid_levels)}", + ) + + budget = float(body.get("daily_maintenance_budget", 1.0)) + if budget <= 0: + return _err_response( + "INVALID_PARAMETER", + message="daily_maintenance_budget must be > 0", + ) + + policy = MaintenancePolicy( + autonomy_level=AutonomyLevel(autonomy_level), + auto_actions=body.get("auto_actions", []), + max_maintenance_colonies=int(body.get("max_maintenance_colonies", 2)), + daily_maintenance_budget=budget, + ) + + old_raw = ws.config.get("maintenance_policy") + old_json = str(old_raw) if old_raw is not None else None + await runtime.emit_and_broadcast(WorkspaceConfigChanged( + seq=0, + timestamp=datetime.now(UTC), + address=workspace_id, + workspace_id=workspace_id, + field="maintenance_policy", + old_value=old_json, + new_value=policy.model_dump_json(), + )) + return JSONResponse({"status": "updated", "policy": policy.model_dump()}) + + # --- Wave 72 Track 10C: add model endpoint --- + + async def add_model(request: Request) -> JSONResponse: + """Add a new model to the registry.""" + try: + body: dict[str, Any] = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + address = str(body.get("address", "")).strip() + if not address: + return _err_response("MISSING_FIELD", message="address is required") + + # Check for duplicates + for m in settings.models.registry: + if m.address == address: + return _err_response( + "VALIDATION_ERROR", message=f"model '{address}' already exists", + ) + + provider = str(body.get("provider", address.split("/")[0] if "/" in address else "custom")) + try: + new_model = ModelRecord( + address=address, + provider=provider, + endpoint=body.get("endpoint"), + api_key_env=body.get("api_key_env"), + context_window=int(body.get("context_window", 8192)), + supports_tools=bool(body.get("supports_tools", True)), + supports_vision=bool(body.get("supports_vision", False)), + cost_per_input_token=body.get("cost_per_input_token"), + cost_per_output_token=body.get("cost_per_output_token"), + max_output_tokens=int(body.get("max_output_tokens", 4096)), + hidden=bool(body.get("hidden", False)), + ) + except (ValueError, TypeError) as exc: + return _err_response("VALIDATION_FAILED", message=f"invalid fields: {exc}") + + settings.models.registry.append(new_model) + save_model_registry(config_path, settings) + log.info("model.added", address=address) + return JSONResponse(new_model.model_dump(), status_code=201) + + # Wave 70.0 Track 5: project plan read endpoint + async def get_project_plan(request: Request) -> JSONResponse: + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"exists": False}) + + from formicos.surface.project_plan import load_project_plan # noqa: PLC0415 + + plan = load_project_plan(data_dir_str) + return JSONResponse(plan) + + # -- Wave 71.0 Track 3: Journal / Procedures endpoints -- + + async def get_queen_journal(request: Request) -> JSONResponse: + """Return structured journal entries for a workspace.""" + from formicos.surface.operational_state import ( # noqa: PLC0415 + get_journal_summary, + ) + + workspace_id = request.path_params["workspace_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"exists": False, "entries": []}) + + return JSONResponse(get_journal_summary(data_dir_str, workspace_id)) + + async def get_operating_procedures(request: Request) -> JSONResponse: + """Return operating procedures for a workspace.""" + from formicos.surface.operational_state import ( # noqa: PLC0415 + get_procedures_summary, + ) + + workspace_id = request.path_params["workspace_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({"exists": False, "content": ""}) + + return JSONResponse(get_procedures_summary(data_dir_str, workspace_id)) + + async def put_operating_procedures(request: Request) -> JSONResponse: + """Update operating procedures for a workspace.""" + from formicos.surface.operational_state import ( # noqa: PLC0415 + save_procedures, + ) + + workspace_id = request.path_params["workspace_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return _err_response( + "DATA_DIR_NOT_SET", message="data_dir not configured", + ) + + try: + body = await request.json() + except Exception: + return _err_response("INVALID_JSON") + + content = body.get("content", "") + if not isinstance(content, str): + return _err_response( + "VALIDATION_ERROR", message="content must be a string", + ) + + save_procedures(data_dir_str, workspace_id, content) + return JSONResponse({"updated": True}) + + # Wave 71.0 Track 9: Operations summary endpoint + async def get_operations_summary(request: Request) -> JSONResponse: + """Return synthesized operational summary for a workspace.""" + from formicos.surface.operations_coordinator import ( # noqa: PLC0415 + build_operations_summary, + ) + + workspace_id = request.path_params["workspace_id"] + data_dir = getattr(settings, "system", None) + data_dir_str = getattr(data_dir, "data_dir", "") if data_dir else "" + if not data_dir_str: + return JSONResponse({ + "workspace_id": workspace_id, + "pending_review_count": 0, + "active_milestone_count": 0, + "stalled_thread_count": 0, + "last_operator_activity_at": None, + "idle_for_minutes": None, + "operator_active": False, + "continuation_candidates": [], + "sync_issues": [], + "recent_progress": [], + }) + + proj = runtime.projections if runtime else None + return JSONResponse(build_operations_summary( + data_dir_str, workspace_id, proj, + )) + return [ Route("/api/v1/knowledge-graph", get_knowledge_graph), # Wave 60: entry relationships + operator feedback @@ -1308,6 +2321,7 @@ async def delete_workflow_step(request: Request) -> JSONResponse: Route("/api/v1/models/{address:path}", update_model_policy, methods=["PATCH"]), Route("/api/v1/workspaces/{workspace_id:str}/briefing", get_briefing), Route("/api/v1/workspaces/{workspace_id:str}/outcomes", get_workspace_outcomes), + Route("/api/v1/workspaces/{workspace_id:str}/knowledge-tree", get_knowledge_tree), Route("/api/v1/workspaces/{workspace_id:str}/escalation-matrix", get_escalation_matrix), Route( "/api/v1/workspaces/{workspace_id:str}/config-recommendations", @@ -1326,6 +2340,8 @@ async def delete_workflow_step(request: Request) -> JSONResponse: post_dismiss_autonomy, methods=["POST"], ), Route("/api/v1/workspaces/create-demo", create_demo_workspace, methods=["POST"]), + # Wave 73 B3: workspace creation + Route("/api/v1/workspaces", create_workspace_endpoint, methods=["POST"]), # Wave 55: learning summary Route( "/api/v1/workspaces/{workspace_id:str}/learning-summary", @@ -1359,6 +2375,41 @@ async def delete_workflow_step(request: Request) -> JSONResponse: "/api/v1/workspaces/{workspace_id:str}/forager/domains", get_domain_strategies, methods=["GET"], ), + # Wave 71.0 Track 3: Journal / Procedures + Route( + "/api/v1/workspaces/{workspace_id:str}/queen-journal", + get_queen_journal, methods=["GET"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/operating-procedures", + get_operating_procedures, methods=["GET"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/operating-procedures", + put_operating_procedures, methods=["PUT"], + ), + # Wave 71.0 Track 5: Action queue endpoints + Route( + "/api/v1/workspaces/{workspace_id:str}/operations/actions", + list_operation_actions, methods=["GET"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/operations/actions/{action_id:str}/approve", + approve_action, methods=["POST"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/operations/actions/{action_id:str}/reject", + reject_action, methods=["POST"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/operations/actions/{action_id:str}/review", + review_action, methods=["POST"], + ), + # Wave 71.0 Track 9: Operations summary + Route( + "/api/v1/workspaces/{workspace_id:str}/operations/summary", + get_operations_summary, methods=["GET"], + ), # Wave 63 Track 6: Knowledge CRUD Route( "/api/v1/knowledge/{entry_id:str}", @@ -1389,4 +2440,50 @@ async def delete_workflow_step(request: Request) -> JSONResponse: "/api/v1/workspaces/{workspace_id:str}/threads/{thread_id:str}/steps/{step_index:int}", delete_workflow_step, methods=["DELETE"], ), + # Wave 66 T1: addon endpoints + Route("/api/v1/addons", list_addons, methods=["GET"]), + Route( + "/api/v1/addons/{addon_name:str}/trigger", + trigger_addon, methods=["POST"], + ), + Route( + "/api/v1/addons/{addon_name:str}/toggle", + toggle_addon, methods=["POST"], + ), + # Wave 69: thread plan read endpoint + Route( + "/api/v1/workspaces/{workspace_id:str}/threads/{thread_id:str}/plan", + get_thread_plan, methods=["GET"], + ), + # Wave 70.0 Track 5: project plan read endpoint + Route("/api/v1/project-plan", get_project_plan, methods=["GET"]), + # Wave 66 T2: addon config surface + Route( + "/api/v1/addons/{addon_name:str}/config", + get_addon_config, methods=["GET"], + ), + Route( + "/api/v1/addons/{addon_name:str}/config", + put_addon_config, methods=["PUT"], + ), + # Wave 74 B3: Queen context budget + Route("/api/v1/queen-budget", get_queen_budget), + # Wave 70.0 Track 9: autonomy status + Route( + "/api/v1/workspaces/{workspace_id:str}/autonomy-status", + get_autonomy_status, methods=["GET"], + ), + # Wave 72 Track 10B: maintenance policy + Route( + "/api/v1/workspaces/{workspace_id:str}/maintenance-policy", + get_maintenance_policy, methods=["GET"], + ), + Route( + "/api/v1/workspaces/{workspace_id:str}/maintenance-policy", + put_maintenance_policy, methods=["PUT"], + ), + # Wave 72 Track 10C: add model + Route("/api/v1/models", add_model, methods=["POST"]), + # Wave 74 Track 4: Queen tool stats + Route("/api/v1/queen-tool-stats", get_queen_tool_stats, methods=["GET"]), ] diff --git a/src/formicos/surface/routes/knowledge_api.py b/src/formicos/surface/routes/knowledge_api.py index dbca96c..9498196 100644 --- a/src/formicos/surface/routes/knowledge_api.py +++ b/src/formicos/surface/routes/knowledge_api.py @@ -2,9 +2,13 @@ from __future__ import annotations +import asyncio +import inspect +import re from datetime import UTC, datetime from typing import TYPE_CHECKING, Any +import structlog from starlette.responses import JSONResponse from starlette.routing import Route @@ -17,6 +21,85 @@ from formicos.surface.projections import ProjectionStore from formicos.surface.runtime import Runtime +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Wave 69 Track 6: Addon result parsing (module-level for testability) +# --------------------------------------------------------------------------- + +# Regex to extract structured results from addon markdown output. +_ADDON_RESULT_RE = re.compile( + r"\*\*(.+?)\*\*\s*(?:\(score:\s*([\d.]+)\))?" +) + +# Map UI source tokens to addon content_kinds. +_SOURCE_TO_KINDS: dict[str, list[str]] = { + "docs": ["documentation"], + "code": ["source_code"], +} + + +def _parse_addon_results( + raw: str, + addon_name: str, + source_label: str, + content_kinds: list[str], + limit: int, +) -> list[dict[str, Any]]: + """Parse addon markdown output into structured result dicts.""" + results: list[dict[str, Any]] = [] + blocks = raw.split("\n\n") + for block in blocks: + if len(results) >= limit: + break + m = _ADDON_RESULT_RE.search(block) + if not m: + continue + path_part = m.group(1).strip() + score_str = m.group(2) + score = float(score_str) if score_str else 0.0 + + # Extract snippet from code block or remaining text + snippet = "" + code_start = block.find("```") + if code_start >= 0: + code_end = block.find("```", code_start + 3) + if code_end >= 0: + # Skip language marker on first line + inner = block[code_start + 3 : code_end] + first_nl = inner.find("\n") + snippet = ( + inner[first_nl + 1 :] if first_nl >= 0 else inner + )[:200] + else: + snippet = block[code_start + 3 :][:200] + else: + snippet = block[m.end() :][:200].strip() + + # Parse file path and line range + file_path = path_part + line_range = "" + if ":" in path_part: + parts = path_part.rsplit(":", 1) + if parts[1] and parts[1][0].isdigit(): + file_path = parts[0] + line_range = parts[1] + + results.append({ + "source": addon_name, + "source_label": source_label, + "id": f"{addon_name}:{path_part}", + "title": path_part, + "snippet": snippet, + "score": round(score, 4), + "metadata": { + "file_path": file_path, + "line_range": line_range, + "content_kinds": content_kinds, + }, + }) + return results + def routes( *, @@ -343,6 +426,22 @@ async def get_entry_annotations(request: Request) -> JSONResponse: "total": len(annotations), }) + # Wave 67.5: provenance chain endpoint + async def get_entry_provenance(request: Request) -> JSONResponse: + """Return the append-only provenance chain for a knowledge entry.""" + if projections is None: + return _err_response("KNOWLEDGE_CATALOG_UNAVAILABLE") + item_id = request.path_params["item_id"] + entry = projections.memory_entries.get(item_id) + if entry is None: + return _err_response("KNOWLEDGE_ITEM_NOT_FOUND") + chain = entry.get("provenance_chain", []) + return JSONResponse({ + "entry_id": item_id, + "chain": chain, + "total": len(chain), + }) + async def get_entry_overlay_status(request: Request) -> JSONResponse: """Get operator overlay status for a knowledge entry.""" if projections is None: @@ -394,6 +493,209 @@ async def override_config_suggestion(request: Request) -> JSONResponse: )) return JSONResponse({"ok": True, "workspace_id": workspace_id, "overridden": True}) + # ------------------------------------------------------------------ + # Wave 69 Track 6: Unified search across memory + addon indices + # ------------------------------------------------------------------ + + async def _search_memory_source( + query: str, workspace_id: str, limit: int, + ) -> list[dict[str, Any]]: + """Search institutional memory and return source-labeled results.""" + if knowledge_catalog is None: + return [] + items = await knowledge_catalog.search( + query=query, workspace_id=workspace_id, top_k=limit, + ) + return [ + { + "source": "memory", + "source_label": "Institutional Memory", + "id": it.get("id", ""), + "title": it.get("title", ""), + "snippet": ( + it.get("summary") or it.get("content_preview") or "" + )[:200], + "score": round(it.get("score", 0), 4), + "metadata": { + "confidence": round(it.get("confidence", 0.5), 2), + "status": it.get("status", ""), + "domains": it.get("domains", []), + "sub_type": it.get("sub_type", ""), + }, + } + for it in items + ] + + async def _search_addon_source( + addon_name: str, + manifest: Any, + reg: Any, + query: str, + workspace_id: str, + limit: int, + ) -> list[dict[str, Any]]: + """Search a single addon index and return source-labeled results.""" + search_tool_name = getattr(manifest, "search_tool", "") + content_kinds = getattr(manifest, "content_kinds", []) + tool_spec = None + for t in manifest.tools: + if t.name == search_tool_name: + tool_spec = t + break + if tool_spec is None: + # Fallback: try first tool with 'search' in name + for t in manifest.tools: + if "search" in t.name.lower(): + tool_spec = t + break + if tool_spec is None: + return [] + + try: + from formicos.surface.addon_loader import ( # noqa: PLC0415 + _resolve_handler, # pyright: ignore[reportPrivateUsage] + ) + + handler = _resolve_handler(addon_name, tool_spec.handler) + except Exception: # noqa: BLE001 + log.debug( + "unified_search.handler_resolve_failed", + addon=addon_name, + ) + return [] + + rc = getattr(reg, "runtime_context", {}) or {} + _accepts_ctx = "runtime_context" in inspect.signature(handler).parameters + + try: + if _accepts_ctx: + raw = await handler( + {"query": query, "top_k": limit}, + workspace_id, + "", + runtime_context=rc, + ) + else: + raw = await handler( + {"query": query, "top_k": limit}, + workspace_id, + "", + ) + except Exception: # noqa: BLE001 + log.debug( + "unified_search.addon_search_failed", + addon=addon_name, + ) + return [] + + if not isinstance(raw, str): + return [] + + return _parse_addon_results( + raw, addon_name, manifest.description or addon_name, + content_kinds, limit, + ) + + async def unified_search(request: Request) -> JSONResponse: + """Search across institutional memory and addon indices. + + GET /api/v1/workspaces/{workspace_id}/search?q=...&sources=memory,docs,code&limit=10 + """ + workspace_id = request.path_params["workspace_id"] + query = request.query_params.get("q", "").strip() + if not query: + return _err_response("QUERY_REQUIRED") + + sources_raw = request.query_params.get("sources", "") + requested_sources = ( + [s.strip() for s in sources_raw.split(",") if s.strip()] + if sources_raw + else [] + ) + try: + limit = max(1, min( + int(request.query_params.get("limit", "10")), 20, + )) + except ValueError: + return _err_response("LIMIT_INVALID") + + include_memory = not requested_sources or "memory" in requested_sources + + # Determine which addon sources to query + regs: list[Any] = getattr( + request.app.state, "addon_registrations", [], + ) + manifests: list[Any] = getattr( + request.app.state, "addon_manifests", [], + ) + + # Build mapping: addon_name -> (manifest, registration) + reg_by_name: dict[str, Any] = {} + for r in regs: + reg_by_name[r.manifest.name] = r + + addon_tasks: list[tuple[str, Any, Any]] = [] + for m in manifests: + search_tool = getattr(m, "search_tool", "") + if not search_tool: + # Check if any tool has 'search' in name + has_search = any( + "search" in t.name.lower() for t in m.tools + ) + if not has_search: + continue + + content_kinds = getattr(m, "content_kinds", []) + if requested_sources: + # Check if this addon matches any requested source + matches = False + if m.name in requested_sources: + matches = True + else: + for src_token in requested_sources: + expected_kinds = _SOURCE_TO_KINDS.get(src_token, []) + if expected_kinds and any( + k in content_kinds for k in expected_kinds + ): + matches = True + break + if not matches: + continue + + reg = reg_by_name.get(m.name) + if reg is not None: + addon_tasks.append((m.name, m, reg)) + + # Fan out searches in parallel + tasks: list[Any] = [] + if include_memory: + tasks.append(_search_memory_source(query, workspace_id, limit)) + for addon_name, manifest, reg in addon_tasks: + tasks.append( + _search_addon_source( + addon_name, manifest, reg, query, workspace_id, limit, + ), + ) + + raw_results = await asyncio.gather(*tasks, return_exceptions=True) + + # Collect results: memory first, then addon groups + all_results: list[dict[str, Any]] = [] + for result in raw_results: + if isinstance(result, BaseException): + log.debug( + "unified_search.source_error", + error=str(result), + ) + continue + if isinstance(result, list): + all_results.extend(result) # pyright: ignore[reportUnknownArgumentType] + + return JSONResponse({ + "results": all_results, + "total": len(all_results), + }) + return [ Route("/api/v1/knowledge", list_knowledge, methods=["GET"]), Route( @@ -429,6 +731,10 @@ async def override_config_suggestion(request: Request) -> JSONResponse: "/api/v1/knowledge/{item_id:str}/annotations", get_entry_annotations, methods=["GET"], ), + Route( + "/api/v1/knowledge/{item_id:str}/provenance", + get_entry_provenance, methods=["GET"], + ), Route( "/api/v1/knowledge/{item_id:str}/overlay", get_entry_overlay_status, methods=["GET"], @@ -437,4 +743,9 @@ async def override_config_suggestion(request: Request) -> JSONResponse: "/api/v1/knowledge/config-override", override_config_suggestion, methods=["POST"], ), + # Wave 69 Track 6: Unified search + Route( + "/api/v1/workspaces/{workspace_id:str}/search", + unified_search, methods=["GET"], + ), ] diff --git a/src/formicos/surface/runtime.py b/src/formicos/surface/runtime.py index e459e3f..549dd5e 100644 --- a/src/formicos/surface/runtime.py +++ b/src/formicos/surface/runtime.py @@ -491,6 +491,8 @@ def __init__( self.queen: Any = None # noqa: ANN401 # Set by app.py after memory store is created (Wave 26) self.memory_store: Any = None # noqa: ANN401 + # Set by app.py after addons are registered (Wave 64) + self.addon_registrations: list[Any] | None = None def _wire_kg_events(self) -> None: """Inject KG event callback into the adapter (Wave 14 Stream D).""" @@ -533,7 +535,13 @@ async def emit_and_broadcast(self, event: FormicOSEvent) -> int: elif etype == "MemoryEntryStatusChanged": sync_id = str(getattr(event_with_seq, "entry_id", "")) elif etype == "MemoryEntryMerged": - sync_id = str(getattr(event_with_seq, "target_id", "")) + # Wave 66 S3: sync both target (updated) and source (rejected) + for _merge_attr in ("target_id", "source_id"): + _merge_id = str(getattr(event_with_seq, _merge_attr, "")) + if _merge_id: + await self.memory_store.sync_entry( + _merge_id, self.projections.memory_entries, + ) elif etype in ("MemoryConfidenceUpdated", "MemoryEntryRefined"): sync_id = str(getattr(event_with_seq, "entry_id", "")) if sync_id: @@ -1159,19 +1167,19 @@ async def retrieve_relevant_memory( task: str, workspace_id: str, thread_id: str = "", - ) -> str: + ) -> tuple[str, list[dict[str, Any]]]: """Deterministic pre-spawn knowledge retrieval. Searches the unified knowledge catalog for skills and experiences - relevant to *task* and returns a formatted block for Queen context - injection. + relevant to *task* and returns a ``(formatted_block, raw_items)`` + tuple for Queen context injection and consulted-entry tracking. Called by ``QueenAgent.respond()`` before the first LLM call. This is a deterministic runtime action, not a model-facing nudge. """ catalog = getattr(self, "knowledge_catalog", None) if catalog is None: - return "" + return "", [] try: results: list[dict[str, Any]] = await catalog.search( @@ -1182,10 +1190,10 @@ async def retrieve_relevant_memory( ) except Exception: log.debug("runtime.memory_retrieval_failed", task=task[:80]) - return "" + return "", [] if not results: - return "" + return "", [] lines = [f"[System Knowledge -- {len(results)} entries found]"] @@ -1205,7 +1213,7 @@ async def retrieve_relevant_memory( ) lines.append(f" source: colony {colony}, confidence: {conf:.1f}") - return "\n".join(lines) + return "\n".join(lines), results # -- Unified knowledge fetch for agent context (Wave 28 A1) -- diff --git a/src/formicos/surface/self_maintenance.py b/src/formicos/surface/self_maintenance.py index 91921b5..6717f33 100644 --- a/src/formicos/surface/self_maintenance.py +++ b/src/formicos/surface/self_maintenance.py @@ -11,6 +11,8 @@ from __future__ import annotations +import math +from dataclasses import dataclass, field from datetime import UTC, date, datetime from typing import TYPE_CHECKING, Any @@ -20,6 +22,7 @@ if TYPE_CHECKING: from formicos.surface.proactive_intelligence import KnowledgeInsight, ProactiveBriefing + from formicos.surface.projections import ProjectionStore from formicos.surface.runtime import Runtime log = structlog.get_logger() @@ -42,6 +45,227 @@ def _log_forage_dispatch_task(task: Any) -> None: } +# --------------------------------------------------------------------------- +# Wave 70 Track 8: Blast Radius Estimator +# --------------------------------------------------------------------------- + + +@dataclass +class BlastRadiusEstimate: + """Estimated scope and impact of a proposed autonomous action.""" + + score: float # 0.0 (trivial) to 1.0 (high impact) + level: str # "low", "medium", "high" + factors: list[str] + recommendation: str # "proceed", "notify", "escalate" + + +def estimate_blast_radius( + task: str, + caste: str = "coder", + max_rounds: int = 3, + strategy: str = "sequential", + workspace_id: str = "", + projections: ProjectionStore | None = None, +) -> BlastRadiusEstimate: + """Estimate the blast radius of a proposed autonomous dispatch. + + Uses deterministic heuristics only. No LLM calls. + """ + score = 0.0 + factors: list[str] = [] + + # Factor 1: task length as proxy for complexity + task_len = len(task) + if task_len > 500: + score += 0.2 + factors.append("Long task description (complex scope)") + elif task_len > 200: + score += 0.1 + factors.append("Medium-length task description") + + # Factor 2: caste risk profile + caste_risk: dict[str, float] = { + "coder": 0.3, + "reviewer": 0.1, + "researcher": 0.1, + "archivist": 0.05, + } + risk = caste_risk.get(caste, 0.2) + score += risk + if risk >= 0.3: + factors.append(f"Caste '{caste}' can modify files") + + # Factor 3: round count as proxy for complexity + if max_rounds > 5: + score += 0.15 + factors.append(f"High round budget ({max_rounds} rounds)") + elif max_rounds > 3: + score += 0.05 + + # Factor 4: strategy + if strategy == "stigmergic": + score += 0.1 + factors.append("Stigmergic strategy (multi-agent, harder to predict)") + + # Factor 5: keyword signals in task text + # Keywords only carry full weight for castes that modify files (coder). + # Read-only castes (researcher, reviewer, archivist) get reduced weight + # because investigating a topic is not the same as changing it. + high_risk_keywords = [ + "delete", "remove", "drop", "migrate", "refactor", + "rename", "replace all", "database", "schema", "deploy", + "production", "auth", "security", "permission", + ] + task_lower = task.lower() + matched = [kw for kw in high_risk_keywords if kw in task_lower] + if matched: + # Only coders can act on these keywords; read-only castes just investigate + kw_weight = 0.15 if caste in ("coder",) else 0.0 + score += kw_weight * min(len(matched), 3) + factors.append(f"High-risk keywords: {', '.join(matched[:3])}") + + # Factor 6: prior outcome history for this caste/strategy + if projections and workspace_id and hasattr(projections, "outcome_stats"): + stats = projections.outcome_stats(workspace_id) + for stat in stats: + if stat["strategy"] == strategy and caste in stat.get("caste_mix", ""): + if stat["success_rate"] < 0.5 and stat["total"] >= 3: + score += 0.2 + factors.append( + f"Low historical success rate for {strategy}/{caste}: " + f"{stat['success_rate']:.0%}" + ) + break + + score = min(1.0, max(0.0, score)) + + if score >= 0.6: + level = "high" + recommendation = "escalate" + elif score >= 0.3: + level = "medium" + recommendation = "notify" + else: + level = "low" + recommendation = "proceed" + + return BlastRadiusEstimate( + score=round(score, 2), + level=level, + factors=factors, + recommendation=recommendation, + ) + + +# --------------------------------------------------------------------------- +# Wave 70 Track 9: Graduated Autonomy Scoring +# --------------------------------------------------------------------------- + + +@dataclass +class AutonomyScore: + """Graduated autonomy trust score from outcome history.""" + + score: int # 0-100 + grade: str # "A", "B", "C", "D", "F" + components: dict[str, float] = field(default_factory=lambda: {}) + recommendation: str = "" + + +def compute_autonomy_score( + workspace_id: str, + projections: ProjectionStore, +) -> AutonomyScore: + """Compute graduated autonomy trust score from outcome history. + + Components: + - success_rate (40%): fraction of successful colonies + - volume (20%): log-scaled colony count (caps at 50 colonies) + - cost_efficiency (20%): avg cost vs budget (lower is better) + - operator_trust (20%): follow-through rate minus kill rate + """ + outcomes = [ + o for o in projections.colony_outcomes.values() + if o.workspace_id == workspace_id + ] + if not outcomes: + return AutonomyScore( + score=0, + grade="F", + components={ + "success_rate": 0.0, "volume": 0.0, + "cost_efficiency": 0.0, "operator_trust": 0.0, + }, + recommendation="No outcome history. Start with supervised dispatch.", + ) + + successes = sum(1 for o in outcomes if o.succeeded) + success_rate = successes / len(outcomes) + + # Volume (log-scaled, caps at 50) + volume = min(1.0, math.log(1 + len(outcomes)) / math.log(51)) + + # Cost efficiency: avg cost relative to estimated budget + avg_cost = sum(o.total_cost for o in outcomes) / len(outcomes) + cost_efficiency = 1.0 / (1.0 + avg_cost * 2) + + # Operator trust: follow-through vs kills + behavior = getattr(projections, "operator_behavior", None) + operator_trust = 0.5 # neutral baseline + if behavior is not None: + total_acted = sum(behavior.suggestion_categories_acted_on.values()) + total_kills = len(behavior.kill_records) + total_signals = total_acted + total_kills + if total_signals > 0: + operator_trust = total_acted / total_signals + + components = { + "success_rate": round(success_rate, 2), + "volume": round(volume, 2), + "cost_efficiency": round(cost_efficiency, 2), + "operator_trust": round(operator_trust, 2), + } + + raw = ( + success_rate * 0.40 + + volume * 0.20 + + cost_efficiency * 0.20 + + operator_trust * 0.20 + ) + score_val = max(0, min(100, int(round(raw * 100)))) + + if score_val >= 80: + grade, recommendation = "A", ( + "Strong track record. Consider promoting to autonomous level." + ) + elif score_val >= 65: + grade, recommendation = "B", ( + "Good track record. Auto-notify with expanded categories " + "is appropriate." + ) + elif score_val >= 50: + grade, recommendation = "C", ( + "Mixed results. Auto-notify with limited categories recommended." + ) + elif score_val >= 35: + grade, recommendation = "D", ( + "Below average. Suggest-only mode recommended until outcomes improve." + ) + else: + grade, recommendation = "F", ( + "Poor track record. Suggest-only mode recommended. Review " + "recent colony failures." + ) + + return AutonomyScore( + score=score_val, + grade=grade, + components=components, + recommendation=recommendation, + ) + + class MaintenanceDispatcher: """Connects proactive insights to automatic colony dispatch. @@ -67,6 +291,8 @@ async def evaluate_and_dispatch( policy = self._get_policy(workspace_id) if policy.autonomy_level == AutonomyLevel.suggest: + # Wave 71.0: queue suggest-only insights instead of dropping + self._queue_suggest_only(workspace_id, briefing) return [] dispatched: list[str] = [] @@ -96,6 +322,55 @@ async def evaluate_and_dispatch( policy.autonomy_level == AutonomyLevel.auto_notify and insight.category not in policy.auto_actions ): + # Wave 71.0: queue as self-rejected instead of dropping + self._queue_insight( + workspace_id, insight, + reason=f"Category '{insight.category}' not in auto_actions", + self_rejected=True, + ) + continue + + # Wave 70 Track 8: blast radius gate + sc = insight.suggested_colony + estimate = estimate_blast_radius( + task=sc.task, + caste=sc.caste, + max_rounds=sc.max_rounds, + strategy=sc.strategy, + workspace_id=workspace_id, + projections=self._runtime.projections, + ) + if estimate.recommendation == "escalate": + log.info( + "maintenance.blast_radius_escalation", + workspace_id=workspace_id, + category=insight.category, + score=estimate.score, + factors=estimate.factors, + ) + # Wave 71.0: queue instead of silently dropping + self._queue_insight( + workspace_id, insight, + blast_radius=estimate.score, + reason=f"Blast radius escalation (score={estimate.score:.2f})", + ) + continue + if ( + policy.autonomy_level == AutonomyLevel.auto_notify + and estimate.recommendation == "notify" + ): + log.info( + "maintenance.blast_radius_notify_skip", + workspace_id=workspace_id, + category=insight.category, + score=estimate.score, + ) + # Wave 71.0: queue instead of silently dropping + self._queue_insight( + workspace_id, insight, + blast_radius=estimate.score, + reason=f"Blast radius notify (score={estimate.score:.2f})", + ) continue colony_id = await self._spawn_maintenance_colony( @@ -112,6 +387,79 @@ async def evaluate_and_dispatch( return dispatched + # ------------------------------------------------------------------ # + # Wave 71.0: action queue integration # + # ------------------------------------------------------------------ # + + def _get_data_dir(self) -> str: + """Return the data directory string, or empty.""" + try: + dd = self._runtime.settings.system.data_dir + return str(dd) if isinstance(dd, str) and dd else "" + except AttributeError: + return "" + + def _queue_insight( + self, + workspace_id: str, + insight: KnowledgeInsight, + *, + blast_radius: float = 0.0, + reason: str = "", + self_rejected: bool = False, + ) -> None: + """Queue a proactive insight as a durable action record.""" + data_dir = self._get_data_dir() + if not data_dir: + return + try: + from formicos.surface.action_queue import ( # noqa: PLC0415 + queue_from_insight, + ) + + sc = insight.suggested_colony + sc_dict: dict[str, Any] | None = None + if sc is not None: + sc_dict = { + "caste": sc.caste, + "strategy": sc.strategy, + "max_rounds": sc.max_rounds, + "task": sc.task[:500], + "estimated_cost": sc.estimated_cost, + } + + queue_from_insight( + data_dir, + workspace_id, + insight_category=insight.category, + insight_title=insight.title, + insight_detail=str(insight.detail)[:500] if insight.detail else "", + suggested_colony=sc_dict, + blast_radius=blast_radius, + estimated_cost=sc.estimated_cost if sc else 0.0, + confidence=0.0, + reason=reason, + self_rejected=self_rejected, + ) + except Exception: # noqa: BLE001 + log.debug( + "maintenance.queue_insight_failed", + workspace_id=workspace_id, + ) + + def _queue_suggest_only( + self, + workspace_id: str, + briefing: ProactiveBriefing, + ) -> None: + """Queue all suggest-only insights as pending_review.""" + for insight in briefing.insights: + if insight.suggested_colony: + self._queue_insight( + workspace_id, insight, + reason="Suggest-only autonomy level", + ) + async def evaluate_distillation( self, workspace_id: str, ) -> list[str]: @@ -313,10 +661,15 @@ async def run_proactive_dispatch(self) -> dict[str, list[str]]: ) results: dict[str, list[str]] = {} + self.last_briefing_insights: dict[str, list[dict[str, object]]] = {} workspace_ids = list(self._runtime.projections.workspaces.keys()) for ws_id in workspace_ids: try: briefing = generate_briefing(ws_id, self._runtime.projections) + self.last_briefing_insights[ws_id] = [ + i.model_dump() if hasattr(i, "model_dump") else dict(i) + for i in briefing.insights + ] dispatched = await self.evaluate_and_dispatch(ws_id, briefing) if dispatched: results[ws_id] = dispatched @@ -461,4 +814,10 @@ def _safe_str_list(val: Any) -> list[str]: return [str(item) for item in val] -__all__ = ["MaintenanceDispatcher"] +__all__ = [ + "AutonomyScore", + "BlastRadiusEstimate", + "MaintenanceDispatcher", + "compute_autonomy_score", + "estimate_blast_radius", +] diff --git a/src/formicos/surface/structured_error.py b/src/formicos/surface/structured_error.py index c2c2f0d..2418850 100644 --- a/src/formicos/surface/structured_error.py +++ b/src/formicos/surface/structured_error.py @@ -426,6 +426,13 @@ def to_ws_error(err: StructuredError) -> dict[str, Any]: category=ErrorCategory.validation, recovery_hint="Content contains embedded secrets; remove before retrying", ), + "ADDON_NOT_FOUND": StructuredError( + error_code="ADDON_NOT_FOUND", + message="Addon not installed", + severity=ErrorSeverity.permanent, + category=ErrorCategory.not_found, + recovery_hint="Check installed addons with GET /api/v1/addons", + ), "INVALID_JSON": StructuredError( error_code="INVALID_JSON", message="Invalid JSON in request body", diff --git a/src/formicos/surface/thread_plan.py b/src/formicos/surface/thread_plan.py new file mode 100644 index 0000000..7b40602 --- /dev/null +++ b/src/formicos/surface/thread_plan.py @@ -0,0 +1,204 @@ +"""Shared thread-plan parser/helper (Wave 71.0 Track 7). + +Canonical helper for reading ``.formicos/plans/{thread_id}.md``. +Follows the same pattern as ``project_plan.py``. + +The step-line format matches ``_STEP_RE`` in ``queen_tools.py``:: + + - [0] [pending] Description text + - [1] [completed] Another step +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +import structlog + +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_STEP_RE = re.compile( + r"^- \[(\d+)\] \[(\w+)\] (.*)$", +) + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + + +def thread_plan_path(data_dir: str, thread_id: str) -> Path: + """Return the canonical thread plan path.""" + return Path(data_dir) / ".formicos" / "plans" / f"{thread_id}.md" + + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + + +def parse_thread_plan(text: str) -> dict[str, Any]: + """Parse a thread plan markdown file into structured data. + + Returns:: + + { + "exists": True, + "goal": "...", + "thread_id": "...", + "steps": [ + {"index": 0, "status": "pending", "description": "..."}, + ... + ], + "summary": { + "total": 5, + "completed": 2, + "pending": 3, + "failed": 0, + }, + } + """ + goal = "" + thread_id = "" + steps: list[dict[str, Any]] = [] + + for line in text.splitlines(): + stripped = line.strip() + + # Goal line: "# Thread Plan: " or "# Plan: " + if stripped.startswith("# Thread Plan:") or stripped.startswith("# Plan:"): + prefix = "# Thread Plan:" if stripped.startswith("# Thread Plan:") else "# Plan:" + goal = stripped[len(prefix):].strip() + continue + + # Thread ID line: "Thread: " + if stripped.startswith("Thread:"): + thread_id = stripped[len("Thread:"):].strip() + continue + + # Step line: "- [0] [pending] description" + m = _STEP_RE.match(stripped) + if m: + idx_str, status, desc = m.groups() + steps.append({ + "index": int(idx_str), + "status": status, + "description": desc, + }) + + # Compute summary counts + status_counts: dict[str, int] = {} + for step in steps: + s = step["status"] + status_counts[s] = status_counts.get(s, 0) + 1 + + return { + "exists": True, + "goal": goal, + "thread_id": thread_id, + "steps": steps, + "summary": { + "total": len(steps), + "completed": status_counts.get("completed", 0), + "pending": status_counts.get("pending", 0), + "failed": status_counts.get("failed", 0), + }, + } + + +def load_thread_plan(data_dir: str, thread_id: str) -> dict[str, Any]: + """Load and parse a thread plan from disk. + + Returns ``{"exists": False}`` when the plan file does not exist. + """ + if not data_dir or not thread_id: + return {"exists": False} + + path = thread_plan_path(data_dir, thread_id) + if not path.is_file(): + return {"exists": False} + + try: + text = path.read_text(encoding="utf-8") + except OSError: + return {"exists": False} + + plan = parse_thread_plan(text) + if not plan.get("thread_id"): + plan["thread_id"] = thread_id + return plan + + +def load_all_thread_plans(data_dir: str) -> list[dict[str, Any]]: + """Load all thread plans from the plans directory. + + Returns a list of parsed plan dicts (each with ``exists: True``). + """ + if not data_dir: + return [] + + plans_dir = Path(data_dir) / ".formicos" / "plans" + if not plans_dir.is_dir(): + return [] + + plans: list[dict[str, Any]] = [] + try: + for plan_file in sorted(plans_dir.glob("*.md")): + thread_id = plan_file.stem + plan = load_thread_plan(data_dir, thread_id) + if plan.get("exists"): + plans.append(plan) + except OSError: + pass + + return plans + + +# --------------------------------------------------------------------------- +# Rendering — compact context for coordinator / Queen injection +# --------------------------------------------------------------------------- + + +def render_for_queen(plan: dict[str, Any]) -> str: + """Render a parsed thread plan into compact text. + + Returns an empty string when the plan does not exist or has no steps. + """ + if not plan.get("exists"): + return "" + + steps = plan.get("steps", []) + if not steps: + return "" + + goal = plan.get("goal", "") + summary = plan.get("summary", {}) + tid = plan.get("thread_id", "?") + + parts: list[str] = [] + header = f"[Plan:{tid[:12]}]" + if goal: + header += f" {goal}" + parts.append(header) + + completed = summary.get("completed", 0) + total = summary.get("total", 0) + if total: + parts.append(f" Progress: {completed}/{total}") + + # Show pending/failed steps only (completed are noise for the Queen) + for step in steps: + if step["status"] != "completed": + marker = "\u2717" if step["status"] == "failed" else "\u25cb" + parts.append( + f" {marker} [{step['index']}] [{step['status']}] " + f"{step['description']}" + ) + + return "\n".join(parts) diff --git a/src/formicos/surface/view_state.py b/src/formicos/surface/view_state.py index 952940f..0b5bf26 100644 --- a/src/formicos/surface/view_state.py +++ b/src/formicos/surface/view_state.py @@ -26,6 +26,7 @@ def build_snapshot( probed_local: dict[str, dict[str, Any]] | None = None, provider_health: dict[str, str] | None = None, registry: CapabilityRegistry | None = None, + addon_registrations: list[Any] | None = None, ) -> dict[str, Any]: """Build the full operator state snapshot matching types.ts OperatorStateSnapshot.""" return { @@ -39,9 +40,78 @@ def build_snapshot( "castes": _build_castes(castes), "runtimeConfig": _build_runtime_config(settings, probed=probed_local), "skillBankStats": skill_bank_stats or {"total": 0, "avgConfidence": 0.0}, + "addons": _build_addons(addon_registrations), } +def _build_addons( + registrations: list[Any] | None, +) -> list[dict[str, Any]]: + """Build addon summaries from AddonRegistration objects.""" + if not registrations: + return [] + result: list[dict[str, Any]] = [] + for reg in registrations: + manifest = reg.manifest + if getattr(manifest, "hidden", False): + continue + result.append({ + "name": manifest.name, + "version": manifest.version, + "description": manifest.description, + "tools": [ + { + "name": t.name, + "description": t.description, + "handler": t.handler, + "parameters": t.parameters, + "callCount": reg.tool_call_counts.get(t.name, 0), + } + for t in manifest.tools + ], + "handlers": [ + { + "event": h.event, + "lastFired": reg.last_handler_fire, + "errorCount": reg.handler_error_count, + } + for h in manifest.handlers + ], + "triggers": [ + { + "type": t.type, + "schedule": t.schedule, + "handler": t.handler, + "lastFired": reg.trigger_fire_times.get(t.handler), + } + for t in manifest.triggers + ], + "panels": [ + { + "target": p.get("target", ""), + "displayType": p.get("display_type", "status_card"), + "path": p.get("path", ""), + "addonName": p.get("addon_name", manifest.name), + } + for p in reg.registered_panels + ], + "config": [ + { + "key": c.key, + "type": c.type, + "default": c.default, + "label": c.label, + "options": c.options, + } + for c in manifest.config + ], + "status": reg.health_status, + "lastError": reg.last_error, + "disabled": getattr(reg, "disabled", False), + }) + return result + + def _build_tree(store: ProjectionStore) -> list[dict[str, Any]]: """Build tree nodes from workspaces → threads → colonies.""" nodes: list[dict[str, Any]] = [] @@ -323,7 +393,7 @@ def _build_protocol_status( mcp_tools = len(MCP_TOOL_NAMES) except Exception: # noqa: BLE001 - mcp_tools = 19 # known tool count from mcp_server.py + mcp_tools = 27 # known tool count from mcp_server.py try: from formicos.surface.agui_endpoint import AGUI_EVENT_TYPES diff --git a/src/formicos/surface/workflow_learning.py b/src/formicos/surface/workflow_learning.py new file mode 100644 index 0000000..ad8b052 --- /dev/null +++ b/src/formicos/surface/workflow_learning.py @@ -0,0 +1,376 @@ +"""Workflow learning — pattern recognition and procedure suggestions (Wave 72 Track 8-9). + +Deterministic extractors that propose ``workflow_template`` and +``procedure_suggestion`` actions through the existing action queue. +Called by the operational sweep in ``app.py`` (Team B wires the order). + +No LLM calls. No new events. All proposals flow through ``action_queue``. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Any, cast + +import structlog + +from formicos.surface.action_queue import ( + STATUS_PENDING_REVIEW, + append_action, + create_action, + read_actions, +) + +log = structlog.get_logger() + +# --------------------------------------------------------------------------- +# Track 8: Workflow pattern recognition +# --------------------------------------------------------------------------- + +# Minimum successful occurrences before proposing a template +_MIN_SUCCESS_COUNT = 3 +# Minimum distinct threads to count as a real pattern +_MIN_DISTINCT_THREADS = 2 + + +def extract_workflow_patterns( + data_dir: str, + workspace_id: str, + outcomes: list[Any], + existing_templates: list[Any] | None = None, +) -> list[dict[str, Any]]: + """Scan colony outcomes for repeating successful patterns. + + Returns list of proposed actions (already persisted to the queue). + """ + if not data_dir or not workspace_id: + return [] + + # Group successful outcomes by (strategy, caste_set) fingerprint + fingerprints: dict[str, list[Any]] = {} + for outcome in outcomes: + if not _is_successful(outcome): + continue + fp = _fingerprint(outcome) + if fp: + fingerprints.setdefault(fp, []).append(outcome) + + # Check for pending workflow_template actions to avoid duplicates + existing_actions = read_actions(data_dir, workspace_id) + pending_fps = _pending_workflow_fps(existing_actions) + + # Check existing learned templates to avoid re-proposing + template_fps = _template_fingerprints(existing_templates or []) + + proposals: list[dict[str, Any]] = [] + for fp, group in fingerprints.items(): + if len(group) < _MIN_SUCCESS_COUNT: + continue + if fp in pending_fps or fp in template_fps: + continue + + # Require multiple distinct threads + thread_ids = {_get_thread_id(o) for o in group if _get_thread_id(o)} + if len(thread_ids) < _MIN_DISTINCT_THREADS: + continue + + # Build proposal + representative = group[0] + strategy = _get_strategy(representative) + castes = _get_castes(representative) + avg_cost = sum(_get_cost(o) for o in group) / len(group) + + action = create_action( + kind="workflow_template", + title=f"Learned pattern: {strategy} with {', '.join(sorted(castes))}", + detail=( + f"Observed {len(group)} successful colonies across " + f"{len(thread_ids)} threads using strategy={strategy}, " + f"castes={sorted(castes)}. Avg cost ${avg_cost:.3f}." + ), + source_category="workflow_learning", + rationale=( + f"Repeated success pattern ({len(group)} occurrences, " + f"{len(thread_ids)} threads) suggests a reusable template." + ), + payload={ + "fingerprint": fp, + "strategy": strategy, + "castes": sorted(castes), + "occurrence_count": len(group), + "avg_cost": round(avg_cost, 4), + "thread_ids": sorted(thread_ids)[:5], + }, + estimated_cost=round(avg_cost, 4), + confidence=min(0.9, 0.5 + 0.1 * len(group)), + created_by="workflow_learning", + ) + append_action(data_dir, workspace_id, action) + proposals.append(action) + + if proposals: + log.info( + "workflow_learning.patterns_proposed", + workspace_id=workspace_id, + count=len(proposals), + ) + + return proposals + + +# --------------------------------------------------------------------------- +# Track 9: Procedure suggestions +# --------------------------------------------------------------------------- + +# Minimum repeated behavior count to propose a procedure +_MIN_BEHAVIOR_COUNT = 3 + + +def detect_operator_patterns( + data_dir: str, + workspace_id: str, + actions: list[dict[str, Any]] | None = None, +) -> list[dict[str, Any]]: + """Scan operator behavior for recurring patterns that suggest procedures. + + Conservative heuristics only: + - Repeated rejection of autonomous work on shared keywords + - Repeated review-after-coding patterns + - Repeated testing-after-change behavior + + Returns list of proposed actions (already persisted to the queue). + """ + if not data_dir or not workspace_id: + return [] + + all_actions = actions if actions is not None else read_actions(data_dir, workspace_id) + + # Avoid duplicate proposals + pending_suggestions = { + a.get("payload", {}).get("pattern_key", "") + for a in all_actions + if a.get("kind") == "procedure_suggestion" + and a.get("status") == STATUS_PENDING_REVIEW + } + + proposals: list[dict[str, Any]] = [] + + # Pattern 1: Repeated rejection of specific kinds/categories + rejection_patterns = _find_rejection_patterns(all_actions) + for pattern_key, detail in rejection_patterns: + if pattern_key in pending_suggestions: + continue + action = _create_procedure_suggestion( + data_dir, workspace_id, pattern_key, detail, + ) + proposals.append(action) + + # Pattern 2: Repeated approval of review-type work + review_patterns = _find_review_patterns(all_actions) + for pattern_key, detail in review_patterns: + if pattern_key in pending_suggestions: + continue + action = _create_procedure_suggestion( + data_dir, workspace_id, pattern_key, detail, + ) + proposals.append(action) + + if proposals: + log.info( + "workflow_learning.procedure_suggestions", + workspace_id=workspace_id, + count=len(proposals), + ) + + return proposals + + +# --------------------------------------------------------------------------- +# Internal helpers — pattern fingerprinting +# --------------------------------------------------------------------------- + + +def _fingerprint(outcome: Any) -> str: + """Create a stable fingerprint from (strategy, sorted castes).""" + strategy = _get_strategy(outcome) + castes = _get_castes(outcome) + if not strategy or not castes: + return "" + return f"{strategy}:{','.join(sorted(castes))}" + + +def _is_successful(outcome: Any) -> bool: + """Check if an outcome represents a successful colony.""" + if hasattr(outcome, "succeeded"): + return bool(outcome.succeeded) + if isinstance(outcome, dict): + return bool(cast("dict[str, Any]", outcome).get("succeeded", False)) + return False + + +def _get_strategy(outcome: Any) -> str: + if hasattr(outcome, "strategy"): + return str(outcome.strategy) + if isinstance(outcome, dict): + return str(cast("dict[str, Any]", outcome).get("strategy", "")) + return "" + + +def _get_castes(outcome: Any) -> set[str]: + cc: Any + if hasattr(outcome, "caste_composition"): + cc = outcome.caste_composition + elif isinstance(outcome, dict): + cc = cast("dict[str, Any]", outcome).get("caste_composition", {}) + else: + return set() + if isinstance(cc, dict): + return set(cast("dict[str, Any]", cc).keys()) + return set() + + +def _get_cost(outcome: Any) -> float: + if hasattr(outcome, "total_cost"): + return float(outcome.total_cost) + if isinstance(outcome, dict): + return float(cast("dict[str, Any]", outcome).get("total_cost", 0.0)) + return 0.0 + + +def _get_thread_id(outcome: Any) -> str: + if hasattr(outcome, "colony_id"): + return str(getattr(outcome, "thread_id", "")) or str(outcome.colony_id)[:12] + if isinstance(outcome, dict): + d = cast("dict[str, Any]", outcome) + return str(d.get("thread_id", "")) or str(d.get("colony_id", ""))[:12] + return "" + + +def _pending_workflow_fps(actions: list[dict[str, Any]]) -> set[str]: + """Get fingerprints of pending workflow_template actions.""" + return { + a.get("payload", {}).get("fingerprint", "") + for a in actions + if a.get("kind") == "workflow_template" + and a.get("status") == STATUS_PENDING_REVIEW + } + + +def _template_fingerprints(templates: list[Any]) -> set[str]: + """Get fingerprints of existing learned templates.""" + fps: set[str] = set() + for t in templates: + strategy = getattr(t, "strategy", "") or "" + castes_raw = getattr(t, "castes", []) or [] + castes = sorted( + c.caste if hasattr(c, "caste") else str(c) for c in castes_raw + ) + if strategy and castes: + fps.add(f"{strategy}:{','.join(castes)}") + return fps + + +# --------------------------------------------------------------------------- +# Internal helpers — procedure suggestion patterns +# --------------------------------------------------------------------------- + + +def _find_rejection_patterns( + actions: list[dict[str, Any]], +) -> list[tuple[str, dict[str, Any]]]: + """Find repeated rejection of specific action categories.""" + results: list[tuple[str, dict[str, Any]]] = [] + + # Count rejections by source_category + rejection_counts: Counter[str] = Counter() + for a in actions: + if a.get("status") != "rejected": + continue + cat = a.get("source_category", "") + if cat: + rejection_counts[cat] += 1 + + for category, count in rejection_counts.items(): + if count >= _MIN_BEHAVIOR_COUNT: + pattern_key = f"reject:{category}" + results.append((pattern_key, { + "heading": "Autonomy", + "rule": ( + f"Require my approval before running {category} actions " + f"(rejected {count} times)." + ), + "reason": ( + f"You rejected {count} actions from category '{category}'. " + f"This suggests a standing rule may be appropriate." + ), + "pattern_type": "rejection", + "category": category, + "count": count, + })) + + return results + + +def _find_review_patterns( + actions: list[dict[str, Any]], +) -> list[tuple[str, dict[str, Any]]]: + """Find patterns suggesting review-after-work procedures.""" + results: list[tuple[str, dict[str, Any]]] = [] + + # Count approved maintenance actions — repeated approval of a category + # suggests the operator wants to keep reviewing rather than auto-approving + approval_counts: Counter[str] = Counter() + for a in actions: + if a.get("status") != "approved" or a.get("kind") != "maintenance": + continue + cat = a.get("source_category", "") + if cat: + approval_counts[cat] += 1 + + for category, count in approval_counts.items(): + if count >= _MIN_BEHAVIOR_COUNT: + pattern_key = f"review:{category}" + results.append((pattern_key, { + "heading": "Autonomy", + "rule": ( + f"Always review {category} actions before execution " + f"(approved {count} manually)." + ), + "reason": ( + f"You manually approved {count} '{category}' actions. " + f"Formalizing this as a standing rule ensures consistent review." + ), + "pattern_type": "review", + "category": category, + "count": count, + })) + + return results + + +def _create_procedure_suggestion( + data_dir: str, + workspace_id: str, + pattern_key: str, + detail: dict[str, Any], +) -> dict[str, Any]: + """Create and persist a procedure_suggestion action.""" + action = create_action( + kind="procedure_suggestion", + title=f"Suggested rule: {detail['rule'][:80]}", + detail=detail.get("reason", ""), + source_category="workflow_learning", + rationale=detail.get("reason", ""), + payload={ + "pattern_key": pattern_key, + "heading": detail.get("heading", "General"), + "rule": detail["rule"], + "pattern_type": detail.get("pattern_type", ""), + "category": detail.get("category", ""), + "count": detail.get("count", 0), + }, + confidence=0.6, + created_by="workflow_learning", + ) + append_action(data_dir, workspace_id, action) + return action diff --git a/src/formicos/surface/ws_handler.py b/src/formicos/surface/ws_handler.py index 3142535..40eee66 100644 --- a/src/formicos/surface/ws_handler.py +++ b/src/formicos/surface/ws_handler.py @@ -157,6 +157,7 @@ def __init__( self._castes = castes self._runtime = runtime self._registry: CapabilityRegistry | None = None + self._addon_registrations: list[Any] | None = None # workspace_id -> set of connected websockets self._subscribers: dict[str, set[WebSocket]] = {} # colony_id -> set of queues for colony-scoped subscriptions (AG-UI, A2A attach) @@ -345,6 +346,7 @@ async def send_state(self, ws: WebSocket) -> None: probed_local=probed, provider_health=p_health, registry=self._registry, + addon_registrations=self._addon_registrations, ) await ws.send_text(json.dumps({"type": "state", "state": snapshot})) diff --git a/tests/unit/adapters/test_vector_qdrant.py b/tests/unit/adapters/test_vector_qdrant.py index 9ab775b..8cf2a50 100644 --- a/tests/unit/adapters/test_vector_qdrant.py +++ b/tests/unit/adapters/test_vector_qdrant.py @@ -127,8 +127,8 @@ async def test_ensure_collection_creates_payload_indexes(self) -> None: await port.ensure_collection("test_col") - # Should create 6 payload indexes - assert port._client.create_payload_index.await_count == 6 + # Should create 7 payload indexes (6 original + hierarchy_path from Wave 67) + assert port._client.create_payload_index.await_count == 7 # --------------------------------------------------------------------------- diff --git a/tests/unit/adapters/test_vector_qdrant_filters.py b/tests/unit/adapters/test_vector_qdrant_filters.py index c3aafa9..e595503 100644 --- a/tests/unit/adapters/test_vector_qdrant_filters.py +++ b/tests/unit/adapters/test_vector_qdrant_filters.py @@ -86,7 +86,7 @@ async def test_confidence_index_is_float(self) -> None: assert conf_calls[0].args[1] == "confidence" @pytest.mark.anyio() - async def test_all_six_indexes_created(self) -> None: + async def test_all_seven_indexes_created(self) -> None: port = QdrantVectorPort(embed_fn=_embed) port._client = AsyncMock() port._client.collection_exists = AsyncMock(return_value=False) @@ -102,6 +102,7 @@ async def test_all_six_indexes_created(self) -> None: assert indexed_fields == { "namespace", "confidence", "algorithm_version", "extracted_at", "source_colony", "source_colony_id", + "hierarchy_path", } diff --git a/tests/unit/addons/test_addon_capability.py b/tests/unit/addons/test_addon_capability.py new file mode 100644 index 0000000..1431431 --- /dev/null +++ b/tests/unit/addons/test_addon_capability.py @@ -0,0 +1,119 @@ +"""Tests for Wave 68 Track 5: addon capability metadata.""" + +from __future__ import annotations + +from typing import Any + +from formicos.surface.addon_loader import AddonManifest + + +class TestManifestCapabilityFields: + def test_parses_with_capability_fields(self) -> None: + manifest = AddonManifest( + name="test-addon", + version="1.0.0", + description="Test addon", + content_kinds=["documentation"], + path_globs=["**/*.md", "**/*.rst"], + search_tool="semantic_search_docs", + ) + assert manifest.content_kinds == ["documentation"] + assert manifest.path_globs == ["**/*.md", "**/*.rst"] + assert manifest.search_tool == "semantic_search_docs" + + def test_parses_without_capability_fields(self) -> None: + manifest = AddonManifest( + name="legacy-addon", + version="0.1.0", + ) + assert manifest.content_kinds == [] + assert manifest.path_globs == [] + assert manifest.search_tool == "" + + def test_existing_fields_preserved(self) -> None: + manifest = AddonManifest( + name="full-addon", + version="2.0.0", + description="Full addon", + content_kinds=["source_code"], + path_globs=["**/*.py"], + search_tool="semantic_search_code", + tools=[], + handlers=[], + config=[], + ) + assert manifest.name == "full-addon" + assert manifest.tools == [] + assert manifest.content_kinds == ["source_code"] + + +class TestListAddonsCapabilityText: + def _make_dispatcher( + self, manifests: list[AddonManifest], + ) -> Any: + """Create a minimal mock dispatcher with manifests.""" + from unittest.mock import MagicMock + + dispatcher = MagicMock() + dispatcher._addon_manifests = manifests + dispatcher._addon_tool_specs = [] + dispatcher._handlers = {} + # Bind the real method + from formicos.surface.queen_tools import QueenToolDispatcher + dispatcher._list_addons = ( + QueenToolDispatcher._list_addons.__get__(dispatcher) + ) + return dispatcher + + def test_includes_capability_text(self) -> None: + from formicos.surface.addon_loader import ( + AddonToolSpec, + ) + + manifest = AddonManifest( + name="docs-index", + description="Semantic documentation search", + content_kinds=["documentation"], + path_globs=["**/*.md", "**/*.rst"], + search_tool="semantic_search_docs", + tools=[ + AddonToolSpec( + name="semantic_search_docs", + description="Search docs", + handler="search.py::handle_semantic_search", + ), + ], + ) + dispatcher = self._make_dispatcher([manifest]) + text, meta = dispatcher._list_addons() + + assert "Content: documentation" in text + assert "Files: **/*.md, **/*.rst" in text + assert "Search via: semantic_search_docs" in text + + def test_includes_refresh_path_when_present(self) -> None: + from formicos.surface.addon_loader import ( + AddonTriggerSpec, + ) + + manifest = AddonManifest( + name="docs-index", + description="Docs search", + content_kinds=["documentation"], + search_tool="semantic_search_docs", + triggers=[ + AddonTriggerSpec( + type="manual", + handler="indexer.py::incremental_reindex", + ), + ], + ) + dispatcher = self._make_dispatcher([manifest]) + text, _ = dispatcher._list_addons() + + assert "Index via: indexer.py::incremental_reindex" in text + + def test_no_manifests_shows_empty(self) -> None: + dispatcher = self._make_dispatcher([]) + text, _ = dispatcher._list_addons() + assert "No addons installed" in text diff --git a/tests/unit/addons/test_addon_config.py b/tests/unit/addons/test_addon_config.py new file mode 100644 index 0000000..6835dc9 --- /dev/null +++ b/tests/unit/addons/test_addon_config.py @@ -0,0 +1,85 @@ +"""Tests for Wave 66 T2: Addon config surface.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest +import yaml + +from formicos.surface.addon_loader import AddonConfigParam, AddonManifest + + +class TestAddonConfigManifest: + """Test that addon manifests parse config declarations.""" + + def test_manifest_parses_config_params(self) -> None: + raw = { + "name": "test-addon", + "version": "1.0.0", + "config": [ + { + "key": "enabled", + "type": "boolean", + "default": True, + "label": "Enable feature", + }, + { + "key": "mode", + "type": "select", + "default": "fast", + "label": "Processing mode", + "options": ["fast", "thorough"], + }, + ], + } + manifest = AddonManifest(**raw) + assert len(manifest.config) == 2 + assert manifest.config[0].key == "enabled" + assert manifest.config[0].type == "boolean" + assert manifest.config[0].default is True + assert manifest.config[1].options == ["fast", "thorough"] + + def test_manifest_config_defaults_empty(self) -> None: + manifest = AddonManifest(name="bare-addon") + assert manifest.config == [] + + def test_config_param_defaults(self) -> None: + param = AddonConfigParam(key="foo") + assert param.type == "string" + assert param.default is None + assert param.label == "" + assert param.options == [] + + def test_real_manifests_parse_config(self) -> None: + """All shipped addon manifests should parse with config field.""" + addons_dir = Path(__file__).resolve().parents[3] / "addons" + for child in sorted(addons_dir.iterdir()): + manifest_path = child / "addon.yaml" + if child.is_dir() and manifest_path.exists(): + raw = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = AddonManifest(**raw) + # All three shipped addons have config blocks + assert isinstance(manifest.config, list), f"{child.name} config not a list" + + def test_git_control_has_auto_stage_config(self) -> None: + manifest_path = ( + Path(__file__).resolve().parents[3] + / "addons" / "git-control" / "addon.yaml" + ) + raw = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = AddonManifest(**raw) + keys = [c.key for c in manifest.config] + assert "git_auto_stage" in keys + + def test_codebase_index_has_chunk_config(self) -> None: + manifest_path = ( + Path(__file__).resolve().parents[3] + / "addons" / "codebase-index" / "addon.yaml" + ) + raw = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = AddonManifest(**raw) + keys = [c.key for c in manifest.config] + assert "chunk_size" in keys + assert "skip_dirs" in keys diff --git a/tests/unit/addons/test_addon_panels_routes.py b/tests/unit/addons/test_addon_panels_routes.py new file mode 100644 index 0000000..b2f2da8 --- /dev/null +++ b/tests/unit/addons/test_addon_panels_routes.py @@ -0,0 +1,120 @@ +"""Tests for Wave 66 T3: Addon panels + routes wiring.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock + +import pytest +import yaml + +from formicos.surface.addon_loader import ( + AddonManifest, + AddonRegistration, + register_addon, +) + + +class TestRouteRegistration: + """Test that addon routes are resolved and stored on registration.""" + + def test_route_registered_on_addon(self) -> None: + raw = { + "name": "test-addon", + "routes": [ + {"path": "/status", "handler": "status.py::get_status"}, + ], + } + manifest = AddonManifest(**raw) + # Route resolution will fail (no module), but we can test manifest parsing + reg = register_addon(manifest) + # Handler resolution fails gracefully — route not registered + assert isinstance(reg.registered_routes, list) + + def test_panel_registered_on_addon(self) -> None: + raw = { + "name": "test-addon", + "panels": [ + { + "target": "knowledge", + "display_type": "status_card", + "path": "/status", + "handler": "status.py::get_status", + }, + ], + } + manifest = AddonManifest(**raw) + reg = register_addon(manifest) + assert len(reg.registered_panels) == 1 + assert reg.registered_panels[0]["target"] == "knowledge" + assert reg.registered_panels[0]["addon_name"] == "test-addon" + + def test_real_manifests_have_routes_and_panels(self) -> None: + """Shipped addon manifests should declare routes and panels.""" + addons_dir = Path(__file__).resolve().parents[3] / "addons" + found_routes = 0 + found_panels = 0 + for child in sorted(addons_dir.iterdir()): + manifest_path = child / "addon.yaml" + if child.is_dir() and manifest_path.exists(): + raw = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = AddonManifest(**raw) + found_routes += len(manifest.routes) + found_panels += len(manifest.panels) + # git-control and codebase-index each declare 1 route + 1 panel + assert found_routes >= 2 + assert found_panels >= 2 + + +class TestAddonStatusEndpoints: + """Test addon status endpoint functions.""" + + @pytest.mark.asyncio + async def test_codebase_index_status_no_vector(self) -> None: + from formicos.addons.codebase_index.status import get_status + + result = await get_status({}, "ws-1", "t-1", runtime_context={}) + assert result["display_type"] == "status_card" + assert any(i["label"] == "Status" for i in result["items"]) + + @pytest.mark.asyncio + async def test_codebase_index_status_with_vector(self) -> None: + from formicos.addons.codebase_index.status import get_status + + mock_vp = AsyncMock() + mock_vp.collection_info.return_value = {"points_count": 1247} + result = await get_status( + {}, "ws-1", "t-1", + runtime_context={"vector_port": mock_vp}, + ) + assert result["display_type"] == "status_card" + assert any(i["value"] == "1247" for i in result["items"]) + + @pytest.mark.asyncio + async def test_git_status_no_workspace(self) -> None: + from formicos.addons.git_control.status import get_status + + result = await get_status({}, "ws-1", "t-1", runtime_context={}) + assert result["display_type"] == "status_card" + assert any(i["label"] == "Status" for i in result["items"]) + + +class TestAddonRegistrationFields: + """Test that AddonRegistration has route and panel fields.""" + + def test_registration_has_route_and_panel_lists(self) -> None: + manifest = AddonManifest(name="test") + reg = AddonRegistration(manifest) + assert reg.registered_routes == [] + assert reg.registered_panels == [] + + def test_templates_still_warn(self) -> None: + """Templates field should still log a warning (unimplemented).""" + manifest = AddonManifest( + name="test", + templates=[{"name": "foo"}], + ) + # Should not raise + reg = register_addon(manifest) + assert reg.registered_panels == [] diff --git a/tests/unit/addons/test_docs_index.py b/tests/unit/addons/test_docs_index.py new file mode 100644 index 0000000..93641b6 --- /dev/null +++ b/tests/unit/addons/test_docs_index.py @@ -0,0 +1,185 @@ +"""Tests for the docs-index addon — chunking, search, reindex, status.""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from formicos.addons.docs_index.indexer import DocChunk, chunk_document + +# --------------------------------------------------------------------------- +# Chunking tests +# --------------------------------------------------------------------------- + + +class TestMarkdownChunking: + def test_splits_on_headings(self) -> None: + content = "# Intro\nHello world\n## Details\nSome details here\n### Sub\nDeep content\n" + chunks = chunk_document(content, "README.md") + assert len(chunks) == 3 + assert chunks[0].section == "Intro" + assert chunks[1].section == "Details" + assert chunks[2].section == "Sub" + + def test_single_section(self) -> None: + content = "# Only heading\nJust one section.\n" + chunks = chunk_document(content, "doc.md") + assert len(chunks) == 1 + assert chunks[0].section == "Only heading" + + def test_intro_before_first_heading(self) -> None: + content = "Some preamble text\n# First heading\nBody\n" + chunks = chunk_document(content, "doc.md") + assert len(chunks) == 2 + assert chunks[0].section == "(intro)" + assert chunks[1].section == "First heading" + + +class TestChunkMetadata: + def test_includes_section_and_lines(self) -> None: + content = "# Title\nLine 1\nLine 2\n## Next\nLine 3\n" + chunks = chunk_document(content, "test.md") + for chunk in chunks: + assert isinstance(chunk, DocChunk) + assert chunk.section + assert chunk.line_start >= 1 + assert chunk.line_end >= chunk.line_start + assert chunk.path == "test.md" + + def test_chunk_id_is_deterministic(self) -> None: + content = "# Heading\nBody text\n" + chunks_a = chunk_document(content, "a.md") + chunks_b = chunk_document(content, "a.md") + assert chunks_a[0].id == chunks_b[0].id + + +class TestRstChunking: + def test_splits_on_underlines(self) -> None: + content = "Title\n=====\nIntro text\n\nSubtitle\n--------\nMore text\n" + chunks = chunk_document(content, "doc.rst") + assert len(chunks) == 2 + assert chunks[0].section == "Title" + assert chunks[1].section == "Subtitle" + + +class TestHtmlChunking: + def test_splits_on_heading_tags(self) -> None: + content = "

Welcome

\n

Hello

\n

Details

\n

Info

\n" + chunks = chunk_document(content, "page.html") + assert len(chunks) == 2 + assert chunks[0].section == "Welcome" + assert chunks[1].section == "Details" + + +class TestTextChunking: + def test_splits_on_blank_lines(self) -> None: + content = "Paragraph one line one\nLine two\n\nParagraph two\n" + chunks = chunk_document(content, "notes.txt") + assert len(chunks) == 2 + + +# --------------------------------------------------------------------------- +# Search handler tests +# --------------------------------------------------------------------------- + + +class TestHandleSemanticSearch: + @pytest.mark.asyncio + async def test_queries_docs_index(self) -> None: + from formicos.addons.docs_index.search import handle_semantic_search + + mock_hit = MagicMock() + mock_hit.metadata = {} + mock_hit.payload = { + "path": "docs/guide.md", + "section": "Setup", + "line_start": 1, + "line_end": 5, + "content": "Install with pip", + } + mock_hit.score = 0.95 + + vector_port = AsyncMock() + vector_port.search = AsyncMock(return_value=[mock_hit]) + + result = await handle_semantic_search( + {"query": "installation"}, + workspace_id="ws-1", + thread_id="th-1", + runtime_context={"vector_port": vector_port}, + ) + + vector_port.search.assert_called_once_with("docs_index", "installation", 10) + assert "docs/guide.md" in result + assert "Setup" in result + + @pytest.mark.asyncio + async def test_missing_query_returns_error(self) -> None: + from formicos.addons.docs_index.search import handle_semantic_search + + result = await handle_semantic_search( + {}, + workspace_id="ws-1", + thread_id="th-1", + ) + assert "Error" in result + + +# --------------------------------------------------------------------------- +# Reindex handler tests +# --------------------------------------------------------------------------- + + +class TestHandleReindex: + @pytest.mark.asyncio + async def test_indexes_docs_from_workspace(self, tmp_path: Any) -> None: + from formicos.addons.docs_index.search import handle_reindex + + # Create a doc file in the temp workspace + doc = tmp_path / "guide.md" + doc.write_text("# Guide\nSome guide content\n") + + vector_port = AsyncMock() + vector_port.upsert = AsyncMock() + + result = await handle_reindex( + {}, + workspace_id="ws-1", + thread_id="th-1", + runtime_context={ + "vector_port": vector_port, + "workspace_root_fn": lambda ws_id: tmp_path, + }, + ) + + assert "1 files" in result + vector_port.upsert.assert_called_once() + + +# --------------------------------------------------------------------------- +# Status endpoint test +# --------------------------------------------------------------------------- + + +class TestStatusEndpoint: + @pytest.mark.asyncio + async def test_returns_status_card(self) -> None: + from formicos.addons.docs_index.status import get_status + + vector_port = AsyncMock() + vector_port.collection_info = AsyncMock(return_value={"points_count": 42}) + + result = await get_status( + {}, + workspace_id="ws-1", + _thread_id="th-1", + runtime_context={"vector_port": vector_port}, + ) + + assert result["display_type"] == "status_card" + items = result["items"] + labels = [i["label"] for i in items] + assert "Documents indexed" in labels + assert "Collection" in labels diff --git a/tests/unit/addons/test_mcp_bridge.py b/tests/unit/addons/test_mcp_bridge.py new file mode 100644 index 0000000..71c13c8 --- /dev/null +++ b/tests/unit/addons/test_mcp_bridge.py @@ -0,0 +1,266 @@ +"""Wave 70.0 Team A: MCP bridge addon tests. + +Tests cover: +1. Bridge connects to configured server (mocked) +2. Bridge health reports disconnected/error states +3. Discovery handles unavailable server gracefully +4. _list_addons includes generic bridge health without name-based branching +5. Addon summary payload exposes bridge health additively +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from formicos.addons.mcp_bridge.client import McpBridge, ServerHealth + + +# --------------------------------------------------------------------------- +# 1. Bridge connects to configured server +# --------------------------------------------------------------------------- + +@pytest.mark.anyio() +async def test_bridge_connect_and_list_tools() -> None: + """Bridge establishes connection and lists remote tools.""" + bridge = McpBridge() + bridge.configure([{"name": "test-srv", "url": "http://localhost:9999"}]) + + mock_tool = MagicMock() + mock_tool.name = "remote_add" + mock_tool.description = "Add two numbers" + mock_tool.inputSchema = {"type": "object"} + + mock_client = MagicMock() + mock_client.is_connected = MagicMock(return_value=True) + mock_client.list_tools = AsyncMock(return_value=[mock_tool]) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + + with patch( + "formicos.addons.mcp_bridge.client.Client", + return_value=mock_client, + ): + tools = await bridge.list_tools("test-srv") + + assert len(tools) == 1 + assert tools[0]["name"] == "remote_add" + assert tools[0]["server"] == "test-srv" + + health = bridge.get_bridge_health() + assert health["connectedServers"] == 1 + assert health["totalRemoteTools"] == 1 + + +# --------------------------------------------------------------------------- +# 2. Bridge health reports disconnected/error states +# --------------------------------------------------------------------------- + +def test_bridge_health_disconnected() -> None: + """Health reports disconnected servers correctly.""" + bridge = McpBridge() + bridge.configure([ + {"name": "srv-a", "url": "http://a:9999"}, + {"name": "srv-b", "url": "http://b:9999"}, + ]) + # Simulate srv-a connected, srv-b disconnected with errors + bridge._health["srv-a"] = ServerHealth( + name="srv-a", url="http://a:9999", + connected=True, tool_count=3, + ) + bridge._health["srv-b"] = ServerHealth( + name="srv-b", url="http://b:9999", + connected=False, error_count=5, + last_error="Connection refused", + ) + + health = bridge.get_bridge_health() + assert health["connectedServers"] == 1 + assert health["unhealthyServers"] == 1 + assert health["totalRemoteTools"] == 3 + # srv-b should show error status (error_count >= 3) + srv_b = next(s for s in health["servers"] if s["name"] == "srv-b") + assert srv_b["status"] == "error" + + +# --------------------------------------------------------------------------- +# 3. Discovery handles unavailable server gracefully +# --------------------------------------------------------------------------- + +@pytest.mark.anyio() +async def test_discovery_unavailable_server() -> None: + """discover_mcp_tools returns friendly message when server is down.""" + from formicos.addons.mcp_bridge.discovery import handle_discover_tools + + bridge = McpBridge() + bridge.configure([{"name": "down-srv", "url": "http://localhost:1"}]) + + mock_client = MagicMock() + mock_client.__aenter__ = AsyncMock(side_effect=ConnectionError("refused")) + mock_client.__aexit__ = AsyncMock(return_value=None) + + with patch( + "formicos.addons.mcp_bridge.client.Client", + return_value=mock_client, + ): + result = await handle_discover_tools( + {"server": "down-srv"}, "ws1", "th1", + runtime_context={"mcp_bridge": bridge}, + ) + + assert "No tools discovered" in result + + +@pytest.mark.anyio() +async def test_discovery_no_bridge() -> None: + """discover_mcp_tools returns message when bridge not configured.""" + from formicos.addons.mcp_bridge.discovery import handle_discover_tools + + result = await handle_discover_tools({}, "ws1", "th1", runtime_context={}) + assert "not configured" in result + + +# --------------------------------------------------------------------------- +# 4. _list_addons includes generic bridge health (no name-based branching) +# --------------------------------------------------------------------------- + +def test_list_addons_bridge_health_generic() -> None: + """_list_addons surfaces bridge health via capability protocol.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = MagicMock() + dispatcher = QueenToolDispatcher(runtime) + + # Simulate addon manifests + manifest = SimpleNamespace( + name="mcp-bridge", + description="Bridge remote MCP servers", + content_kinds=[], + path_globs=[], + search_tool="", + tools=[SimpleNamespace(name="discover_mcp_tools")], + triggers=[], + ) + dispatcher._addon_manifests = [manifest] + + # Simulate bridge health callable in runtime context + def fake_bridge_health() -> dict[str, Any]: + return { + "connectedServers": 2, + "unhealthyServers": 0, + "totalRemoteTools": 7, + "servers": [], + } + + dispatcher._addon_runtime_context = { + "get_bridge_health": fake_bridge_health, + } + + text, _ = dispatcher._list_addons() + + assert "Bridge Status" in text + assert "2 connected" in text + assert "7 remote tools" in text + # Verify no addon-name branching — the text doesn't come from + # checking addon name, it comes from the capability protocol + assert "mcp-bridge" in text # Addon listed normally by name + + +# --------------------------------------------------------------------------- +# 5. Addon summary payload exposes bridge health additively +# --------------------------------------------------------------------------- + +def test_addon_summary_bridge_health() -> None: + """GET /api/v1/addons includes bridgeHealth when capability present.""" + from starlette.testclient import TestClient + + from formicos.surface.addon_loader import AddonManifest, AddonRegistration + + manifest = AddonManifest( + name="mcp-bridge", + version="1.0.0", + description="Bridge remote MCP servers", + ) + reg = AddonRegistration(manifest) + + def fake_health() -> dict[str, Any]: + return { + "connectedServers": 1, + "unhealthyServers": 0, + "totalRemoteTools": 5, + "servers": [{"name": "srv", "status": "connected"}], + } + + reg.runtime_context = {"get_bridge_health": fake_health} + + # Build a minimal Starlette app with just the addons endpoint + from starlette.applications import Starlette + from starlette.routing import Route + + from formicos.surface.routes.api import routes + + settings_mock = MagicMock() + settings_mock.system = SimpleNamespace(data_dir="/tmp/test") + + route_list = routes( + runtime=MagicMock(), + settings=settings_mock, + castes=None, + castes_path="", + config_path="", + vector_store=None, + kg_adapter=None, + embed_client=None, + skill_collection="", + ws_manager=MagicMock(), + ) + app = Starlette(routes=route_list) + app.state.addon_registrations = [reg] # type: ignore[attr-defined] + + client = TestClient(app) + resp = client.get("/api/v1/addons") + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert "bridgeHealth" in data[0] + bh = data[0]["bridgeHealth"] + assert bh["connectedServers"] == 1 + assert bh["totalRemoteTools"] == 5 + + +# --------------------------------------------------------------------------- +# 6. call_mcp_tool handler +# --------------------------------------------------------------------------- + +@pytest.mark.anyio() +async def test_call_tool_handler() -> None: + """call_mcp_tool handler delegates to bridge.""" + from formicos.addons.mcp_bridge.discovery import handle_call_tool + + bridge = MagicMock() + bridge.call_tool = AsyncMock(return_value="result: 42") + + result = await handle_call_tool( + {"server": "srv", "tool": "add", "arguments": {"a": 1}}, + "ws1", "th1", + runtime_context={"mcp_bridge": bridge}, + ) + + assert result == "result: 42" + bridge.call_tool.assert_awaited_once_with("srv", "add", {"a": 1}) + + +@pytest.mark.anyio() +async def test_call_tool_missing_params() -> None: + """call_mcp_tool returns error when required params missing.""" + from formicos.addons.mcp_bridge.discovery import handle_call_tool + + result = await handle_call_tool( + {"server": "", "tool": ""}, + "ws1", "th1", + runtime_context={"mcp_bridge": MagicMock()}, + ) + assert "required" in result.lower() diff --git a/tests/unit/engine/test_scoring_math_ess.py b/tests/unit/engine/test_scoring_math_ess.py new file mode 100644 index 0000000..000f61e --- /dev/null +++ b/tests/unit/engine/test_scoring_math_ess.py @@ -0,0 +1,85 @@ +"""Tests for Wave 67 ESS cap and rank-based credit assignment.""" + +from __future__ import annotations + +import math + +from formicos.engine.scoring_math import rescale_preserving_mean + + +class TestRescalePreservingMean: + def test_under_cap_returns_unchanged(self) -> None: + alpha, beta = rescale_preserving_mean(10.0, 5.0) + assert alpha == 10.0 + assert beta == 5.0 + + def test_over_cap_rescales_to_150(self) -> None: + alpha, beta = rescale_preserving_mean(100.0, 80.0) + ess = alpha + beta + assert abs(ess - 150.0) < 1e-9 + + def test_over_cap_preserves_mean(self) -> None: + orig_mean = 100.0 / (100.0 + 80.0) + alpha, beta = rescale_preserving_mean(100.0, 80.0) + new_mean = alpha / (alpha + beta) + assert abs(new_mean - orig_mean) < 1e-9 + + def test_exact_cap_returns_unchanged(self) -> None: + alpha, beta = rescale_preserving_mean(75.0, 75.0) + assert alpha == 75.0 + assert beta == 75.0 + + def test_large_asymmetric_preserves_mean(self) -> None: + # alpha >> beta: mean should stay near 1.0 + orig_alpha, orig_beta = 290.0, 10.0 + orig_mean = orig_alpha / (orig_alpha + orig_beta) + alpha, beta = rescale_preserving_mean(orig_alpha, orig_beta) + ess = alpha + beta + assert abs(ess - 150.0) < 1e-9 + new_mean = alpha / (alpha + beta) + assert abs(new_mean - orig_mean) < 1e-9 + + def test_custom_max_ess(self) -> None: + alpha, beta = rescale_preserving_mean(60.0, 60.0, max_ess=100.0) + ess = alpha + beta + assert abs(ess - 100.0) < 1e-9 + + def test_small_values_under_cap(self) -> None: + alpha, beta = rescale_preserving_mean(1.0, 1.0) + assert alpha == 1.0 + assert beta == 1.0 + + +class TestRankCredit: + """Verify geometric credit 0.7^rank produces expected decay.""" + + def test_credit_values(self) -> None: + credits = [0.7 ** r for r in range(5)] + assert abs(credits[0] - 1.0) < 1e-9 + assert abs(credits[1] - 0.7) < 1e-9 + assert abs(credits[2] - 0.49) < 1e-9 + assert abs(credits[3] - 0.343) < 1e-9 + assert abs(credits[4] - 0.2401) < 1e-9 + + def test_rank_0_gets_full_delta(self) -> None: + quality_score = 0.8 + base_delta = min(max(0.5 + quality_score, 0.5), 1.5) + credit = 0.7 ** 0 + assert abs(base_delta * credit - base_delta) < 1e-9 + + def test_rank_5_gets_diminished_delta(self) -> None: + quality_score = 0.8 + base_delta = min(max(0.5 + quality_score, 0.5), 1.5) + credit_0 = 0.7 ** 0 + credit_5 = 0.7 ** 5 + delta_0 = base_delta * credit_0 + delta_5 = base_delta * credit_5 + assert delta_0 > delta_5 + # rank-5 should be ~16.8% of rank-0 + assert abs(delta_5 / delta_0 - 0.7 ** 5) < 1e-9 + + def test_delta_always_positive(self) -> None: + """Even at high ranks, credit is positive (never zero).""" + for rank in range(20): + credit = 0.7 ** rank + assert credit > 0 diff --git a/tests/unit/surface/test_action_queue.py b/tests/unit/surface/test_action_queue.py new file mode 100644 index 0000000..016b135 --- /dev/null +++ b/tests/unit/surface/test_action_queue.py @@ -0,0 +1,394 @@ +"""Tests for Wave 71.0 Team B: Durable action queue.""" + +from __future__ import annotations + +import gzip +import json +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from starlette.testclient import TestClient + +from formicos.surface.action_queue import ( + STATUS_APPROVED, + STATUS_EXECUTED, + STATUS_FAILED, + STATUS_PENDING_REVIEW, + STATUS_REJECTED, + STATUS_SELF_REJECTED, + append_action, + compact_action_log, + create_action, + list_actions, + queue_from_insight, + read_actions, + update_action, +) + + +WS_ID = "ws-test-1" + + +# --------------------------------------------------------------------------- +# Test 1: Queue appends and reads action records correctly +# --------------------------------------------------------------------------- + + +class TestAppendAndRead: + def test_append_and_read(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = create_action(kind="maintenance", title="Test action") + append_action(data_dir, WS_ID, action) + + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 1 + assert actions[0]["kind"] == "maintenance" + assert actions[0]["title"] == "Test action" + assert actions[0]["status"] == STATUS_PENDING_REVIEW + assert actions[0]["action_id"].startswith("act-") + + def test_multiple_appends(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + for i in range(5): + action = create_action(kind="maintenance", title=f"Action {i}") + append_action(data_dir, WS_ID, action) + + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 5 + + def test_read_empty(self, tmp_path: Path) -> None: + actions = read_actions(str(tmp_path), WS_ID) + assert actions == [] + + def test_create_action_fields(self) -> None: + action = create_action( + kind="continuation", + title="Resume work", + detail="Thread stalled", + source_category="stalled_thread", + blast_radius=0.3, + estimated_cost=0.05, + confidence=0.8, + requires_approval=True, + created_by="coordinator", + ) + assert action["kind"] == "continuation" + assert action["blast_radius"] == 0.3 + assert action["requires_approval"] is True + assert action["status"] == STATUS_PENDING_REVIEW + + +# --------------------------------------------------------------------------- +# Test 2: Status transitions +# --------------------------------------------------------------------------- + + +class TestStatusTransitions: + def test_pending_to_approved_to_executed(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = create_action(kind="maintenance", title="Fix stale") + append_action(data_dir, WS_ID, action) + aid = action["action_id"] + + # Approve + updated = update_action(data_dir, WS_ID, aid, {"status": STATUS_APPROVED}) + assert updated is not None + assert updated["status"] == STATUS_APPROVED + + # Execute + updated = update_action(data_dir, WS_ID, aid, {"status": STATUS_EXECUTED}) + assert updated is not None + assert updated["status"] == STATUS_EXECUTED + + def test_pending_to_rejected(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = create_action(kind="maintenance", title="Risky") + append_action(data_dir, WS_ID, action) + + updated = update_action( + data_dir, WS_ID, action["action_id"], + {"status": STATUS_REJECTED, "operator_reason": "Too risky"}, + ) + assert updated is not None + assert updated["status"] == STATUS_REJECTED + assert updated["operator_reason"] == "Too risky" + + def test_update_nonexistent(self, tmp_path: Path) -> None: + result = update_action(str(tmp_path), WS_ID, "fake-id", {"status": "approved"}) + assert result is None + + +# --------------------------------------------------------------------------- +# Test 3: Approve/reject endpoints return structured JSON +# --------------------------------------------------------------------------- + + +class TestEndpoints: + def _make_app(self, data_dir: str) -> Any: + from starlette.applications import Starlette + from starlette.requests import Request + from starlette.responses import JSONResponse + from starlette.routing import Route + + async def list_op_actions(request: Request) -> JSONResponse: + status_filter = request.query_params.get("status", "") + kind_filter = request.query_params.get("kind", "") + limit = int(request.query_params.get("limit", "100")) + result = list_actions( + data_dir, request.path_params["workspace_id"], + status=status_filter, kind=kind_filter, limit=limit, + ) + return JSONResponse(result) + + async def approve_ep(request: Request) -> JSONResponse: + ws = request.path_params["workspace_id"] + aid = request.path_params["action_id"] + updated = update_action(data_dir, ws, aid, {"status": STATUS_APPROVED}) + if updated is None: + return JSONResponse({"error": "not found"}, status_code=404) + return JSONResponse({"ok": True, "action_id": aid}) + + async def reject_ep(request: Request) -> JSONResponse: + ws = request.path_params["workspace_id"] + aid = request.path_params["action_id"] + body: dict[str, Any] = {} + try: + body = await request.json() + except Exception: + pass + reason = body.get("reason", "") + updated = update_action( + data_dir, ws, aid, + {"status": STATUS_REJECTED, "operator_reason": reason}, + ) + if updated is None: + return JSONResponse({"error": "not found"}, status_code=404) + return JSONResponse({"ok": True, "action_id": aid}) + + return Starlette(routes=[ + Route( + "/api/v1/workspaces/{workspace_id}/operations/actions", + list_op_actions, methods=["GET"], + ), + Route( + "/api/v1/workspaces/{workspace_id}/operations/actions/{action_id}/approve", + approve_ep, methods=["POST"], + ), + Route( + "/api/v1/workspaces/{workspace_id}/operations/actions/{action_id}/reject", + reject_ep, methods=["POST"], + ), + ]) + + def test_list_endpoint(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + for i in range(3): + append_action( + data_dir, WS_ID, + create_action(kind="maintenance", title=f"A{i}"), + ) + app = self._make_app(data_dir) + client = TestClient(app) + resp = client.get(f"/api/v1/workspaces/{WS_ID}/operations/actions") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] == 3 + assert "counts_by_status" in data + assert "counts_by_kind" in data + + def test_list_filter_by_status(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + a1 = create_action(kind="maintenance", title="A1") + a2 = create_action(kind="maintenance", title="A2") + a2["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, a1) + append_action(data_dir, WS_ID, a2) + + app = self._make_app(data_dir) + client = TestClient(app) + resp = client.get( + f"/api/v1/workspaces/{WS_ID}/operations/actions?status=approved", + ) + data = resp.json() + assert data["total"] == 1 + assert data["actions"][0]["title"] == "A2" + + def test_approve_endpoint(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = create_action(kind="maintenance", title="Approvable") + append_action(data_dir, WS_ID, action) + aid = action["action_id"] + + app = self._make_app(data_dir) + client = TestClient(app) + resp = client.post( + f"/api/v1/workspaces/{WS_ID}/operations/actions/{aid}/approve", + ) + assert resp.status_code == 200 + assert resp.json()["ok"] is True + + # Verify persisted + actions = read_actions(data_dir, WS_ID) + assert actions[0]["status"] == STATUS_APPROVED + + def test_reject_with_reason(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = create_action(kind="maintenance", title="Rejectable") + append_action(data_dir, WS_ID, action) + aid = action["action_id"] + + app = self._make_app(data_dir) + client = TestClient(app) + resp = client.post( + f"/api/v1/workspaces/{WS_ID}/operations/actions/{aid}/reject", + json={"reason": "Not needed"}, + ) + assert resp.status_code == 200 + + actions = read_actions(data_dir, WS_ID) + assert actions[0]["status"] == STATUS_REJECTED + assert actions[0]["operator_reason"] == "Not needed" + + def test_approve_nonexistent(self, tmp_path: Path) -> None: + app = self._make_app(str(tmp_path)) + client = TestClient(app) + resp = client.post( + f"/api/v1/workspaces/{WS_ID}/operations/actions/fake-id/approve", + ) + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# Test 4: Compact action log archives old entries +# --------------------------------------------------------------------------- + + +class TestCompaction: + def test_compact_under_threshold(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + for i in range(50): + append_action( + data_dir, WS_ID, + create_action(kind="maintenance", title=f"A{i}"), + ) + result = compact_action_log(data_dir, WS_ID) + assert result is False # below threshold + + def test_compact_above_threshold(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + for i in range(1100): + append_action( + data_dir, WS_ID, + create_action(kind="maintenance", title=f"A{i}"), + ) + + result = compact_action_log(data_dir, WS_ID) + assert result is True + + # Verify active file has only 500 entries + remaining = read_actions(data_dir, WS_ID) + assert len(remaining) == 500 + + # Verify archive exists + ops_dir = tmp_path / ".formicos" / "operations" / WS_ID + archives = list(ops_dir.glob("actions.*.jsonl.gz")) + assert len(archives) == 1 + + # Verify archive content + with gzip.open(archives[0], "rt", encoding="utf-8") as gz: + archived_lines = gz.readlines() + assert len(archived_lines) == 600 + + +# --------------------------------------------------------------------------- +# Test 5: End-to-end operational loop +# --------------------------------------------------------------------------- + + +class TestOperationalLoop: + def test_insight_queued_on_blast_radius_escalation( + self, tmp_path: Path, + ) -> None: + """Proactive insight with escalate blast radius -> queued as pending_review.""" + data_dir = str(tmp_path) + + action = queue_from_insight( + data_dir, WS_ID, + insight_category="contradiction", + insight_title="Contradicting entries found", + insight_detail="Entries X and Y disagree", + suggested_colony={ + "caste": "researcher", + "strategy": "sequential", + "max_rounds": 3, + "task": "Resolve contradiction", + "estimated_cost": 0.10, + }, + blast_radius=0.7, + estimated_cost=0.10, + reason="Blast radius escalation (score=0.70)", + ) + + assert action["status"] == STATUS_PENDING_REVIEW + assert action["kind"] == "maintenance" + assert action["blast_radius"] == 0.7 + + # Approve it + updated = update_action( + data_dir, WS_ID, action["action_id"], + {"status": STATUS_APPROVED}, + ) + assert updated is not None + assert updated["status"] == STATUS_APPROVED + + # Mark executed (simulating sweep) + updated = update_action( + data_dir, WS_ID, action["action_id"], + {"status": STATUS_EXECUTED, "executed_at": "2026-03-26T12:00:00Z"}, + ) + assert updated is not None + assert updated["status"] == STATUS_EXECUTED + + # Verify full history is durable + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 1 + assert actions[0]["status"] == STATUS_EXECUTED + + +# --------------------------------------------------------------------------- +# Test 6: Self-rejected actions recorded with reason +# --------------------------------------------------------------------------- + + +class TestSelfRejected: + def test_self_rejected_with_reason(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = queue_from_insight( + data_dir, WS_ID, + insight_category="stale_cluster", + insight_title="Stale cluster detected", + reason="Suggest-only autonomy level", + self_rejected=True, + ) + + assert action["status"] == STATUS_SELF_REJECTED + assert action["operator_reason"] == "Suggest-only autonomy level" + + # Verify persisted + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 1 + assert actions[0]["status"] == STATUS_SELF_REJECTED + + def test_category_not_in_auto_actions_queued(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + action = queue_from_insight( + data_dir, WS_ID, + insight_category="coverage_gap", + insight_title="Coverage gap in domain X", + reason="Category 'coverage_gap' not in auto_actions", + self_rejected=True, + ) + assert action["status"] == STATUS_SELF_REJECTED + assert "auto_actions" in action["operator_reason"] diff --git a/tests/unit/surface/test_autonomous_continuation.py b/tests/unit/surface/test_autonomous_continuation.py new file mode 100644 index 0000000..5a9403d --- /dev/null +++ b/tests/unit/surface/test_autonomous_continuation.py @@ -0,0 +1,612 @@ +"""Tests for Wave 72 Team B: Autonomous continuation engine.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any +from unittest.mock import AsyncMock, MagicMock, patch + +if TYPE_CHECKING: + from pathlib import Path + +import pytest + +from formicos.surface.action_queue import ( + STATUS_APPROVED, + STATUS_EXECUTED, + STATUS_PENDING_REVIEW, + append_action, + create_action, + read_actions, +) +from formicos.surface.continuation import ( + build_warm_start_cue, + execute_idle_continuations, + queue_continuation_proposals, +) + +WS_ID = "ws-cont-test" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_dispatcher( + *, + autonomy_level: str = "suggest", + daily_budget: float = 5.0, + daily_spend: float = 0.0, +) -> MagicMock: + """Build a mock MaintenanceDispatcher with configurable policy.""" + dispatcher = MagicMock() + dispatcher._daily_spend = {WS_ID: daily_spend} + dispatcher._runtime = MagicMock() + dispatcher._runtime.spawn_colony = AsyncMock(return_value="colony-test-1") + + from formicos.core.types import AutonomyLevel, MaintenancePolicy + + policy = MaintenancePolicy( + autonomy_level=AutonomyLevel(autonomy_level), + daily_maintenance_budget=daily_budget, + ) + dispatcher._get_policy = MagicMock(return_value=policy) + return dispatcher + + +def _make_projections() -> MagicMock: + return MagicMock() + + +def _make_summary( + *, + operator_active: bool = False, + idle_for_minutes: int | None = 120, + candidates: list[dict[str, Any]] | None = None, +) -> dict[str, Any]: + return { + "workspace_id": WS_ID, + "pending_review_count": 0, + "active_milestone_count": 0, + "stalled_thread_count": 0, + "last_operator_activity_at": None, + "idle_for_minutes": idle_for_minutes, + "operator_active": operator_active, + "continuation_candidates": candidates or [], + "sync_issues": [], + "recent_progress": [], + } + + +def _make_blast_radius(score: float = 0.2) -> Any: + from formicos.surface.self_maintenance import BlastRadiusEstimate + + level = "low" if score < 0.3 else ("medium" if score < 0.6 else "high") + rec = "proceed" if score < 0.3 else ("notify" if score < 0.6 else "escalate") + return BlastRadiusEstimate( + score=score, level=level, factors=[], recommendation=rec, + ) + + +# --------------------------------------------------------------------------- +# Test 1: queue_continuation_proposals +# --------------------------------------------------------------------------- + + +class TestQueueContinuationProposals: + @pytest.mark.asyncio + async def test_queues_proposals_for_candidates(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + projections = _make_projections() + dispatcher = _make_dispatcher() + candidates = [ + { + "thread_id": "thread-abc", + "description": "Thread abc: 2/5 steps done", + "ready_for_autonomy": True, + "blocked_reason": "", + "priority": "high", + }, + ] + summary = _make_summary(candidates=candidates) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.15), + ), + ): + count = await queue_continuation_proposals( + data_dir, WS_ID, projections, dispatcher, + ) + + assert count == 1 + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 1 + assert actions[0]["kind"] == "continuation" + assert actions[0]["thread_id"] == "thread-abc" + assert actions[0]["status"] == STATUS_PENDING_REVIEW + assert "suggested_colony" in actions[0]["payload"] + + @pytest.mark.asyncio + async def test_skips_when_operator_active(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + summary = _make_summary(operator_active=True, candidates=[ + {"thread_id": "t1", "description": "x", "priority": "high", + "ready_for_autonomy": True, "blocked_reason": ""}, + ]) + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + count = await queue_continuation_proposals( + data_dir, WS_ID, _make_projections(), _make_dispatcher(), + ) + assert count == 0 + assert read_actions(data_dir, WS_ID) == [] + + @pytest.mark.asyncio + async def test_deduplicates_by_thread_id(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + # Pre-seed a pending continuation for thread-abc + existing = create_action( + kind="continuation", title="Existing", + thread_id="thread-abc", + ) + append_action(data_dir, WS_ID, existing) + + candidates = [ + {"thread_id": "thread-abc", "description": "dup", + "ready_for_autonomy": True, "blocked_reason": "", "priority": "high"}, + {"thread_id": "thread-xyz", "description": "new", + "ready_for_autonomy": True, "blocked_reason": "", "priority": "medium"}, + ] + summary = _make_summary(candidates=candidates) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.1), + ), + ): + count = await queue_continuation_proposals( + data_dir, WS_ID, _make_projections(), _make_dispatcher(), + ) + + assert count == 1 # Only thread-xyz queued + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 2 + thread_ids = {a["thread_id"] for a in actions} + assert thread_ids == {"thread-abc", "thread-xyz"} + + @pytest.mark.asyncio + async def test_empty_data_dir_returns_zero(self) -> None: + count = await queue_continuation_proposals( + "", WS_ID, _make_projections(), _make_dispatcher(), + ) + assert count == 0 + + +# --------------------------------------------------------------------------- +# Test 2: execute_idle_continuations +# --------------------------------------------------------------------------- + + +class TestExecuteIdleContinuations: + @pytest.mark.asyncio + async def test_executes_approved_continuation(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher( + autonomy_level="autonomous", daily_budget=5.0, + ) + + # Create an approved continuation action + action = create_action( + kind="continuation", + title="Continue thread work", + thread_id="thread-exec", + payload={ + "suggested_colony": { + "task": "Continue work", + "caste": "coder", + "strategy": "sequential", + "max_rounds": 3, + }, + }, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=120) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.2), + ), + patch( + "formicos.surface.continuation.append_journal_entry", + ) as mock_journal, + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + + assert executed == 1 + actions = read_actions(data_dir, WS_ID) + assert actions[0]["status"] == STATUS_EXECUTED + assert actions[0]["executed_at"] != "" + dispatcher._runtime.spawn_colony.assert_called_once() + mock_journal.assert_called_once() + + @pytest.mark.asyncio + async def test_blocks_when_not_autonomous(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="auto_notify") + + action = create_action( + kind="continuation", title="X", + payload={"suggested_colony": {"task": "X", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + assert executed == 0 + + @pytest.mark.asyncio + async def test_blocks_when_operator_not_idle_enough(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="autonomous") + + action = create_action( + kind="continuation", title="X", + payload={"suggested_colony": {"task": "X", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=10) # Below threshold + + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + assert executed == 0 + + @pytest.mark.asyncio + async def test_blocks_when_pending_review_exists(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="autonomous") + + # An approved continuation + approved = create_action( + kind="continuation", title="Ready", + payload={"suggested_colony": {"task": "X", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + approved["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, approved) + + # A pending review action (different kind) + pending = create_action(kind="maintenance", title="Needs review") + append_action(data_dir, WS_ID, pending) + + summary = _make_summary(idle_for_minutes=120) + + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + assert executed == 0 + + @pytest.mark.asyncio + async def test_blocks_high_blast_radius(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="autonomous") + + action = create_action( + kind="continuation", title="Risky", + payload={"suggested_colony": {"task": "Risky", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=120) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.7), + ), + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + assert executed == 0 + + @pytest.mark.asyncio + async def test_blocks_when_budget_exhausted(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher( + autonomy_level="autonomous", daily_budget=0.10, daily_spend=0.10, + ) + + action = create_action( + kind="continuation", title="Costly", + estimated_cost=0.36, + payload={"suggested_colony": {"task": "X", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=120) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.1), + ), + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + assert executed == 0 + + @pytest.mark.asyncio + async def test_journals_autonomous_continuation(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="autonomous") + + action = create_action( + kind="continuation", title="Journal test", + thread_id="t-journal", + payload={"suggested_colony": {"task": "Work", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=120) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.1), + ), + patch( + "formicos.surface.continuation.append_journal_entry", + ) as mock_journal, + ): + await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + ) + + mock_journal.assert_called_once() + call_kwargs = mock_journal.call_args + assert call_kwargs[1]["source"] == "continuation" + assert "Auto-executed" in call_kwargs[1]["message"] + + @pytest.mark.asyncio + async def test_max_per_sweep_limits_execution(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + dispatcher = _make_dispatcher(autonomy_level="autonomous") + + # Create 3 approved continuations + for i in range(3): + action = create_action( + kind="continuation", title=f"Work {i}", + thread_id=f"t-{i}", + payload={"suggested_colony": {"task": f"Work {i}", "caste": "coder", + "strategy": "sequential", "max_rounds": 3}}, + ) + action["status"] = STATUS_APPROVED + append_action(data_dir, WS_ID, action) + + summary = _make_summary(idle_for_minutes=120) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.1), + ), + patch( + "formicos.surface.continuation.append_journal_entry", + ), + ): + executed = await execute_idle_continuations( + data_dir, WS_ID, _make_projections(), dispatcher, + max_per_sweep=1, + ) + assert executed == 1 # Only 1 despite 3 available + + +# --------------------------------------------------------------------------- +# Test 3: build_warm_start_cue +# --------------------------------------------------------------------------- + + +class TestBuildWarmStartCue: + def test_builds_cue_from_candidates(self, tmp_path: Path) -> None: + projections = _make_projections() + candidates = [ + {"thread_id": "t1", "description": "Thread t1: 2/5 done", + "ready_for_autonomy": True, "blocked_reason": "", "priority": "high"}, + {"thread_id": "t2", "description": "Thread t2: 1 failed", + "ready_for_autonomy": False, "blocked_reason": "prior failures", + "priority": "medium"}, + ] + summary = _make_summary(candidates=candidates) + + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + cue = build_warm_start_cue( + str(tmp_path), WS_ID, projections, + ) + + assert "Continuation Opportunities" in cue + assert "[READY]" in cue + assert "[BLOCKED: prior failures]" in cue + assert "Thread t1" in cue + + def test_empty_when_no_candidates(self, tmp_path: Path) -> None: + summary = _make_summary(candidates=[]) + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + cue = build_warm_start_cue( + str(tmp_path), WS_ID, _make_projections(), + ) + assert cue == "" + + def test_empty_when_no_data_dir(self) -> None: + cue = build_warm_start_cue("", WS_ID, _make_projections()) + assert cue == "" + + def test_caps_candidates(self, tmp_path: Path) -> None: + candidates = [ + {"thread_id": f"t{i}", "description": f"Thread {i}", + "ready_for_autonomy": True, "blocked_reason": "", "priority": "medium"} + for i in range(10) + ] + summary = _make_summary(candidates=candidates) + + with patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ): + cue = build_warm_start_cue( + str(tmp_path), WS_ID, _make_projections(), + max_candidates=3, + ) + + assert "+7 more" in cue + + +# --------------------------------------------------------------------------- +# Test 4: Integration — proposal flows through approve_action contract +# --------------------------------------------------------------------------- + + +class TestProposalApprovalIntegration: + @pytest.mark.asyncio + async def test_proposal_has_suggested_colony_for_approve_action( + self, tmp_path: Path, + ) -> None: + """Verify that queued continuation actions carry suggested_colony + so that approve_action() can dispatch them without a second mechanism. + """ + data_dir = str(tmp_path) + candidates = [ + {"thread_id": "t-int", "description": "Integration test thread", + "ready_for_autonomy": True, "blocked_reason": "", "priority": "high"}, + ] + summary = _make_summary(candidates=candidates) + + with ( + patch( + "formicos.surface.continuation.build_operations_summary", + return_value=summary, + ), + patch( + "formicos.surface.continuation.estimate_blast_radius", + return_value=_make_blast_radius(0.15), + ), + ): + await queue_continuation_proposals( + data_dir, WS_ID, _make_projections(), _make_dispatcher(), + ) + + actions = read_actions(data_dir, WS_ID) + assert len(actions) == 1 + sc = actions[0]["payload"]["suggested_colony"] + assert sc["caste"] == "coder" + assert sc["strategy"] == "sequential" + assert sc["max_rounds"] == 3 + assert sc["task"] == "Integration test thread" + + +# --------------------------------------------------------------------------- +# Test 5: Scheduler consolidation — proactive dispatch not in maintenance +# --------------------------------------------------------------------------- + + +class TestSchedulerConsolidation: + """Verify app.py structural assertions via source inspection.""" + + def test_maintenance_loop_does_not_call_proactive_dispatch(self) -> None: + """Maintenance loop should only run consolidation services.""" + import inspect + + from formicos.surface import app + + source = inspect.getsource(app) + # Find the maintenance loop function + maint_start = source.find("async def _maintenance_loop") + assert maint_start != -1, "_maintenance_loop not found" + # Find the next function after it + maint_end = source.find("_maint_task = asyncio.create_task", maint_start) + assert maint_end != -1 + maint_body = source[maint_start:maint_end] + assert "run_proactive_dispatch" not in maint_body, ( + "Proactive dispatch should not be in _maintenance_loop (Wave 72)" + ) + + def test_ops_sweep_calls_proactive_dispatch(self) -> None: + """Operational sweep should include proactive dispatch.""" + import inspect + + from formicos.surface import app + + source = inspect.getsource(app) + ops_start = source.find("async def _operational_sweep_loop") + assert ops_start != -1 + ops_end = source.find("_ops_sweep_task = asyncio.create_task", ops_start) + assert ops_end != -1 + ops_body = source[ops_start:ops_end] + assert "run_proactive_dispatch" in ops_body + assert "queue_continuation_proposals" in ops_body + assert "execute_idle_continuations" in ops_body diff --git a/tests/unit/surface/test_autonomy_guardrails.py b/tests/unit/surface/test_autonomy_guardrails.py new file mode 100644 index 0000000..b8adc84 --- /dev/null +++ b/tests/unit/surface/test_autonomy_guardrails.py @@ -0,0 +1,612 @@ +"""Unit tests for Wave 70.0 Team C: autonomy guardrails. + +Covers blast radius estimation, autonomy scoring, check_autonomy_budget +Queen tool, dispatch gate integration, and the autonomy-status endpoint. +""" + +from __future__ import annotations + +import json +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from formicos.surface.self_maintenance import ( + AutonomyScore, + BlastRadiusEstimate, + compute_autonomy_score, + estimate_blast_radius, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_outcome( + *, + colony_id: str = "c1", + workspace_id: str = "ws1", + succeeded: bool = True, + total_cost: float = 0.10, + quality_score: float = 0.8, + strategy: str = "sequential", + caste_composition: list[str] | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + colony_id=colony_id, + workspace_id=workspace_id, + thread_id="t1", + succeeded=succeeded, + total_rounds=3, + total_cost=total_cost, + duration_ms=5000, + entries_extracted=1, + entries_accessed=2, + quality_score=quality_score, + caste_composition=caste_composition or ["coder"], + strategy=strategy, + maintenance_source=None, + ) + + +def _make_projections( + outcomes: list[Any] | None = None, + *, + acted_on: dict[str, int] | None = None, + kills: int = 0, +) -> MagicMock: + proj = MagicMock() + colony_outcomes: dict[str, Any] = {} + if outcomes: + for o in outcomes: + colony_outcomes[o.colony_id] = o + proj.colony_outcomes = colony_outcomes + + # operator_behavior + behavior = MagicMock() + behavior.suggestion_categories_acted_on = acted_on or {} + behavior.kill_records = [MagicMock() for _ in range(kills)] + proj.operator_behavior = behavior + + # outcome_stats replicates the real implementation's shape + def _outcome_stats(ws_id: str) -> list[dict[str, Any]]: + ws_outcomes = [o for o in colony_outcomes.values() if o.workspace_id == ws_id] + if not ws_outcomes: + return [] + buckets: dict[tuple[str, str], list[Any]] = {} + for o in ws_outcomes: + key = (o.strategy, ",".join(sorted(o.caste_composition))) + buckets.setdefault(key, []).append(o) + stats = [] + for (strategy, caste_mix), group in buckets.items(): + successes = sum(1 for o in group if o.succeeded) + stats.append({ + "strategy": strategy, + "caste_mix": caste_mix, + "total": len(group), + "success_rate": successes / len(group), + "avg_rounds": sum(o.total_rounds for o in group) / len(group), + "avg_cost": sum(o.total_cost for o in group) / len(group), + }) + return stats + + proj.outcome_stats = _outcome_stats + return proj + + +def _make_runtime_with_workspace( + *, + policy: dict[str, Any] | None = None, + outcomes: list[Any] | None = None, + acted_on: dict[str, int] | None = None, + kills: int = 0, + daily_spend: float = 0.0, +) -> MagicMock: + """Build a mock runtime with workspace, projections, and maintenance dispatcher.""" + runtime = MagicMock() + runtime.parse_tool_input = MagicMock(side_effect=lambda tc: tc.get("input", {})) + + proj = _make_projections(outcomes, acted_on=acted_on, kills=kills) + runtime.projections = proj + + ws_config: dict[str, Any] = {} + if policy: + ws_config["maintenance_policy"] = json.dumps(policy) + + ws = SimpleNamespace( + config=ws_config, + budget=SimpleNamespace(total_cost=1.50), + ) + proj.workspaces = {"ws1": ws} + + # Maintenance dispatcher mock + dispatcher = MagicMock() + dispatcher._daily_spend = {"ws1": daily_spend} + dispatcher._reset_daily_budget_if_needed = MagicMock() + dispatcher._count_active_maintenance_colonies = MagicMock(return_value=1) + runtime.maintenance_dispatcher = dispatcher + + return runtime + + +# --------------------------------------------------------------------------- +# Track 8: Blast Radius Estimator +# --------------------------------------------------------------------------- + + +class TestEstimateBlastRadius: + def test_low_blast_radius(self) -> None: + """Simple task, researcher caste, 2 rounds -> low.""" + result = estimate_blast_radius( + task="Check test coverage", + caste="researcher", + max_rounds=2, + ) + assert isinstance(result, BlastRadiusEstimate) + assert result.score < 0.3 + assert result.level == "low" + assert result.recommendation == "proceed" + + def test_high_blast_radius(self) -> None: + """Long task with danger keywords, coder caste, high rounds, stigmergic -> high.""" + long_task = ( + "Delete all database tables and migrate the schema. " + "Also refactor the auth module and deploy to production. " * 5 + ) + result = estimate_blast_radius( + task=long_task, + caste="coder", + max_rounds=8, + strategy="stigmergic", + ) + assert result.score >= 0.6 + assert result.level == "high" + assert result.recommendation == "escalate" + + def test_medium_blast_radius(self) -> None: + """Moderate task, coder caste, 3 rounds -> medium.""" + result = estimate_blast_radius( + task="Update the logging configuration", + caste="coder", + max_rounds=3, + ) + assert 0.3 <= result.score < 0.6 + assert result.level == "medium" + assert result.recommendation == "notify" + + def test_uses_outcome_history(self) -> None: + """Low historical success rate increases score.""" + outcomes = [ + _make_outcome(colony_id=f"c{i}", succeeded=False) + for i in range(4) + ] + proj = _make_projections(outcomes) + + result_with = estimate_blast_radius( + task="Fix a bug", + caste="coder", + max_rounds=3, + strategy="sequential", + workspace_id="ws1", + projections=proj, + ) + result_without = estimate_blast_radius( + task="Fix a bug", + caste="coder", + max_rounds=3, + strategy="sequential", + ) + assert result_with.score > result_without.score + + def test_score_clamped_to_1(self) -> None: + """Score is never above 1.0 even with many risk factors.""" + long_task = "delete database schema deploy production auth security " * 20 + result = estimate_blast_radius( + task=long_task, + caste="coder", + max_rounds=10, + strategy="stigmergic", + ) + assert result.score <= 1.0 + + def test_factors_populated(self) -> None: + """Factors list explains what contributed to the score.""" + result = estimate_blast_radius( + task="delete the database", + caste="coder", + max_rounds=2, + ) + assert any("delete" in f.lower() for f in result.factors) + assert any("coder" in f.lower() for f in result.factors) + + +# --------------------------------------------------------------------------- +# Track 9: Graduated Autonomy Scoring +# --------------------------------------------------------------------------- + + +class TestComputeAutonomyScore: + def test_no_outcomes(self) -> None: + """Empty outcome history -> score 0, grade F.""" + proj = _make_projections([]) + result = compute_autonomy_score("ws1", proj) + assert isinstance(result, AutonomyScore) + assert result.score == 0 + assert result.grade == "F" + assert "No outcome history" in result.recommendation + + def test_perfect_track_record(self) -> None: + """All successes, many colonies, low cost, positive trust -> A.""" + outcomes = [ + _make_outcome(colony_id=f"c{i}", total_cost=0.02) + for i in range(30) + ] + proj = _make_projections( + outcomes, + acted_on={"coverage": 10, "staleness": 5}, + kills=0, + ) + result = compute_autonomy_score("ws1", proj) + assert result.score >= 80 + assert result.grade == "A" + + def test_mixed_results(self) -> None: + """50% success, moderate volume -> C or D.""" + outcomes = [ + _make_outcome(colony_id=f"c{i}", succeeded=(i % 2 == 0)) + for i in range(10) + ] + proj = _make_projections(outcomes, acted_on={"coverage": 3}, kills=3) + result = compute_autonomy_score("ws1", proj) + assert result.score < 80 + # With 50% success, moderate volume, 50/50 trust: should be C or D + assert result.grade in ("C", "D") + + def test_components_present(self) -> None: + """All four components are returned.""" + outcomes = [_make_outcome()] + proj = _make_projections(outcomes) + result = compute_autonomy_score("ws1", proj) + assert "success_rate" in result.components + assert "volume" in result.components + assert "cost_efficiency" in result.components + assert "operator_trust" in result.components + + def test_wrong_workspace_ignored(self) -> None: + """Outcomes from other workspaces are ignored.""" + outcomes = [_make_outcome(workspace_id="ws_other")] + proj = _make_projections(outcomes) + result = compute_autonomy_score("ws1", proj) + assert result.score == 0 + + +# --------------------------------------------------------------------------- +# Track 7: check_autonomy_budget Queen tool +# --------------------------------------------------------------------------- + + +class TestCheckAutonomyBudget: + def test_returns_budget_status(self) -> None: + """Tool returns daily budget info and autonomy level.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = _make_runtime_with_workspace( + policy={ + "autonomy_level": "auto_notify", + "daily_maintenance_budget": 5.0, + "auto_actions": ["coverage", "staleness"], + }, + outcomes=[_make_outcome()], + daily_spend=1.50, + ) + + dispatcher = QueenToolDispatcher(runtime) + result_text, _action = dispatcher._check_autonomy_budget( + {}, "ws1", "t1", + ) + + assert "auto_notify" in result_text + assert "$5.00" in result_text # daily budget + assert "$1.50" in result_text # spent today + assert "$3.50" in result_text # remaining + assert "Autonomy Score" in result_text + + def test_budget_exhausted_message(self) -> None: + """When daily spend equals budget, show exhausted warning.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = _make_runtime_with_workspace( + policy={"daily_maintenance_budget": 2.0}, + daily_spend=2.0, + ) + + dispatcher = QueenToolDispatcher(runtime) + result_text, _ = dispatcher._check_autonomy_budget({}, "ws1", "t1") + + assert "exhausted" in result_text.lower() + + def test_blast_radius_in_output_when_task_provided(self) -> None: + """When task is provided, blast radius estimate is included.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = _make_runtime_with_workspace( + outcomes=[_make_outcome()], + ) + + dispatcher = QueenToolDispatcher(runtime) + result_text, _ = dispatcher._check_autonomy_budget( + {"task": "delete the database schema"}, + "ws1", "t1", + ) + + assert "Blast Radius Estimate" in result_text + assert "Score:" in result_text + + def test_workspace_not_found(self) -> None: + """Returns error when workspace doesn't exist.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = _make_runtime_with_workspace() + runtime.projections.workspaces = {} + + dispatcher = QueenToolDispatcher(runtime) + result_text, _ = dispatcher._check_autonomy_budget({}, "ws1", "t1") + + assert "not found" in result_text.lower() + + +# --------------------------------------------------------------------------- +# Track 8: Blast radius blocks dispatch +# --------------------------------------------------------------------------- + + +class TestBlastRadiusDispatchGate: + @pytest.mark.anyio() + async def test_high_blast_radius_blocks_dispatch(self) -> None: + """Autonomous dispatch is skipped when blast radius is 'escalate'.""" + from formicos.addons.proactive_intelligence.rules import ( + KnowledgeInsight, + SuggestedColony, + ) + from formicos.surface.self_maintenance import MaintenanceDispatcher + + runtime = MagicMock() + # Build workspace with autonomous policy + ws = SimpleNamespace( + config={ + "maintenance_policy": json.dumps({ + "autonomy_level": "autonomous", + "daily_maintenance_budget": 10.0, + "auto_actions": ["coverage"], + }), + }, + ) + runtime.projections.workspaces = {"ws1": ws} + runtime.projections.colonies = MagicMock() + runtime.projections.colonies.values = MagicMock(return_value=[]) + runtime.projections.colony_outcomes = {} + runtime.projections.operator_behavior = MagicMock() + runtime.projections.operator_behavior.suggestion_categories_acted_on = {} + runtime.projections.operator_behavior.kill_records = [] + + # outcome_stats returns empty + runtime.projections.outcome_stats = MagicMock(return_value=[]) + + dispatcher = MaintenanceDispatcher(runtime) + runtime.spawn_colony = AsyncMock(return_value="new_colony") + + # Create a high-risk insight + long_task = ( + "Delete all database tables and migrate the schema to new format. " + "Refactor authentication module and deploy to production. " * 5 + ) + from formicos.addons.proactive_intelligence.rules import ProactiveBriefing + + briefing = ProactiveBriefing( + workspace_id="ws1", + generated_at="2026-03-26T00:00:00Z", + total_entries=10, + entries_by_status={"verified": 5, "candidate": 5}, + avg_confidence=0.7, + prediction_error_rate=0.1, + active_clusters=2, + insights=[ + KnowledgeInsight( + severity="attention", + category="coverage", + title="High-risk task", + detail="Test", + affected_entries=[], + suggested_action="Check", + suggested_colony=SuggestedColony( + task=long_task, + caste="coder", + strategy="stigmergic", + max_rounds=8, + rationale="Test", + estimated_cost=0.50, + ), + ), + ], + ) + + dispatched = await dispatcher.evaluate_and_dispatch("ws1", briefing) + # Colony should NOT be spawned due to high blast radius + assert len(dispatched) == 0 + runtime.spawn_colony.assert_not_called() + + @pytest.mark.anyio() + async def test_low_blast_radius_allows_dispatch(self) -> None: + """Autonomous dispatch proceeds when blast radius is low.""" + from formicos.addons.proactive_intelligence.rules import ( + KnowledgeInsight, + ProactiveBriefing, + SuggestedColony, + ) + from formicos.surface.self_maintenance import MaintenanceDispatcher + + runtime = MagicMock() + ws = SimpleNamespace( + config={ + "maintenance_policy": json.dumps({ + "autonomy_level": "autonomous", + "daily_maintenance_budget": 10.0, + }), + }, + ) + runtime.projections.workspaces = {"ws1": ws} + runtime.projections.colonies = MagicMock() + runtime.projections.colonies.values = MagicMock(return_value=[]) + runtime.projections.colony_outcomes = {} + runtime.projections.operator_behavior = MagicMock() + runtime.projections.operator_behavior.suggestion_categories_acted_on = {} + runtime.projections.operator_behavior.kill_records = [] + runtime.projections.outcome_stats = MagicMock(return_value=[]) + + dispatcher = MaintenanceDispatcher(runtime) + runtime.spawn_colony = AsyncMock(return_value="new_colony") + + briefing = ProactiveBriefing( + workspace_id="ws1", + generated_at="2026-03-26T00:00:00Z", + total_entries=10, + entries_by_status={"verified": 5, "candidate": 5}, + avg_confidence=0.7, + prediction_error_rate=0.1, + active_clusters=2, + insights=[ + KnowledgeInsight( + severity="info", + category="staleness", + title="Check coverage", + detail="Test", + affected_entries=[], + suggested_action="Check", + suggested_colony=SuggestedColony( + task="Check test coverage", + caste="researcher", + strategy="sequential", + max_rounds=2, + rationale="Test", + estimated_cost=0.10, + ), + ), + ], + ) + + dispatched = await dispatcher.evaluate_and_dispatch("ws1", briefing) + assert len(dispatched) == 1 + runtime.spawn_colony.assert_called_once() + + +# --------------------------------------------------------------------------- +# Track 8: Proposal metadata carries blast-radius truth +# --------------------------------------------------------------------------- + + +class TestProposalMetadata: + def test_propose_plan_carries_blast_radius(self) -> None: + """The action dict from _propose_plan includes blast_radius and autonomy_score.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = _make_runtime_with_workspace( + outcomes=[_make_outcome()], + ) + runtime.settings.system.data_dir = "" + runtime.settings.governance.max_rounds_per_colony = 20 + runtime.settings.models.defaults.coder = "local/qwen3" + runtime.settings.models.registry = [] + + dispatcher = QueenToolDispatcher(runtime) + + # Call _propose_plan directly (sync part only) + result_text, action = dispatcher._propose_plan( + { + "summary": "Add unit tests for auth module", + "options": [], + "questions": [], + "recommendation": "Sequential coder colony", + }, + "ws1", + "t1", + ) + + assert action is not None + assert "blast_radius" in action + br = action["blast_radius"] + assert "score" in br + assert "level" in br + assert "factors" in br + assert "recommendation" in br + + assert "autonomy_score" in action + auto = action["autonomy_score"] + assert "score" in auto + assert "grade" in auto + assert "components" in auto + + +# --------------------------------------------------------------------------- +# Track 9: Autonomy status endpoint +# --------------------------------------------------------------------------- + + +class TestAutonomyStatusEndpoint: + def test_autonomy_status_response_shape(self) -> None: + """Autonomy status computation returns expected shape.""" + # Test the scoring/budget logic that the endpoint would return, + # without needing the full Starlette route wiring. + from formicos.surface.self_maintenance import compute_autonomy_score + + runtime = _make_runtime_with_workspace( + policy={ + "autonomy_level": "auto_notify", + "daily_maintenance_budget": 5.0, + "auto_actions": ["coverage"], + "max_maintenance_colonies": 3, + }, + outcomes=[_make_outcome()], + daily_spend=1.20, + ) + + # Simulate what the endpoint does + ws = runtime.projections.workspaces["ws1"] + raw_policy = ws.config.get("maintenance_policy") + + from formicos.core.types import MaintenancePolicy + + policy = MaintenancePolicy(**json.loads(raw_policy)) + + dispatcher = runtime.maintenance_dispatcher + daily_spend = dispatcher._daily_spend.get("ws1", 0.0) + remaining = max(0.0, policy.daily_maintenance_budget - daily_spend) + + auto_score = compute_autonomy_score("ws1", runtime.projections) + + data = { + "level": str(policy.autonomy_level), + "score": auto_score.score, + "grade": auto_score.grade, + "daily_budget": policy.daily_maintenance_budget, + "daily_spend": round(daily_spend, 4), + "remaining": round(remaining, 4), + "components": auto_score.components, + "recommendation": auto_score.recommendation, + "auto_actions": policy.auto_actions, + "recent_actions": [], + } + + assert data["level"] == "auto_notify" + assert data["daily_budget"] == 5.0 + assert data["daily_spend"] == 1.20 + assert data["remaining"] == 3.80 + assert "score" in data + assert "grade" in data + assert "components" in data + assert "recommendation" in data + assert "recent_actions" in data + assert data["auto_actions"] == ["coverage"] diff --git a/tests/unit/surface/test_deliberation_frame.py b/tests/unit/surface/test_deliberation_frame.py new file mode 100644 index 0000000..062109f --- /dev/null +++ b/tests/unit/surface/test_deliberation_frame.py @@ -0,0 +1,283 @@ +"""Tests for _build_deliberation_frame (Wave 68 Track 4).""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any +from unittest.mock import MagicMock, patch + +from formicos.surface.projections import ColonyOutcome, ThreadProjection + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +WS_ID = "ws-1" +THREAD_ID = "thread-1" + + +def _make_entry( + workspace_id: str = WS_ID, + domains: list[str] | None = None, + alpha: float = 8.0, + beta: float = 2.0, +) -> dict[str, Any]: + return { + "workspace_id": workspace_id, + "domains": domains or ["python"], + "conf_alpha": alpha, + "conf_beta": beta, + } + + +def _make_outcome( + workspace_id: str = WS_ID, + succeeded: bool = True, + strategy: str = "stigmergic", + total_rounds: int = 3, + total_cost: float = 0.0012, +) -> ColonyOutcome: + return ColonyOutcome( + colony_id="col-1", + workspace_id=workspace_id, + thread_id=THREAD_ID, + succeeded=succeeded, + total_rounds=total_rounds, + total_cost=total_cost, + duration_ms=1000, + entries_extracted=1, + entries_accessed=2, + quality_score=0.8, + caste_composition=["coder"], + strategy=strategy, + maintenance_source=None, + ) + + +def _make_thread( + goal: str = "", + colony_count: int = 0, + completed: int = 0, + failed: int = 0, +) -> ThreadProjection: + return ThreadProjection( + id=THREAD_ID, + workspace_id=WS_ID, + name="test", + goal=goal, + colony_count=colony_count, + completed_colony_count=completed, + failed_colony_count=failed, + ) + + +@dataclass +class FakeProjections: + memory_entries: dict[str, dict[str, Any]] = field( + default_factory=dict, + ) + colony_outcomes: dict[str, ColonyOutcome] = field( + default_factory=dict, + ) + _thread: ThreadProjection | None = None + + def get_thread( + self, + workspace_id: str, + thread_id: str, + ) -> ThreadProjection | None: + return self._thread + + +def _make_queen( + projections: Any = None, + app_state: Any = None, +) -> Any: + """Build a minimal QueenAgent with mocked runtime.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + runtime.projections = projections or FakeProjections() + runtime.castes = None + runtime.settings.models.registry = [] + + if app_state is not None: + runtime.app.state = app_state + else: + runtime.app = None + + queen = QueenAgent.__new__(QueenAgent) + queen._runtime = runtime + return queen + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestDeliberationFrame: + def test_frame_includes_domains_and_outcomes(self) -> None: + proj = FakeProjections( + memory_entries={ + "e1": _make_entry(domains=["python", "testing"]), + "e2": _make_entry(domains=["python"]), + }, + colony_outcomes={ + "c1": _make_outcome(succeeded=True), + "c2": _make_outcome(succeeded=False), + }, + ) + queen = _make_queen(projections=proj) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + + assert "## Institutional Memory Coverage" in frame + assert "python" in frame + assert "## Recent Colony Outcomes" in frame + assert "[ok]" in frame + assert "[FAIL]" in frame + + def test_frame_caps_at_budget(self) -> None: + """Frame must be cappable (caller enforces budget).""" + proj = FakeProjections( + memory_entries={ + f"e{i}": _make_entry( + domains=[f"domain-{i}"], + ) + for i in range(50) + }, + ) + queen = _make_queen(projections=proj) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + # Frame is finite and non-empty + assert len(frame) > 0 + # Caller caps at budget.thread_context * 4; here we just + # verify the frame itself is bounded (top-10 domains). + assert frame.count("entries, avg confidence") <= 10 + + def test_deliberation_triggers_on_exploratory_message( + self, + ) -> None: + """_DELIBERATION_RE should match exploratory phrasing.""" + from formicos.adapters.queen_intent_parser import ( + _DELIBERATION_RE, + ) + + assert _DELIBERATION_RE.search("I think we could try X") + assert _DELIBERATION_RE.search( + "here are some options for the migration" + ) + assert not _DELIBERATION_RE.search("run tests now") + + def test_frame_empty_for_bare_workspace(self) -> None: + proj = FakeProjections() + queen = _make_queen(projections=proj) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + assert frame == "" + + def test_frame_prefers_capability_metadata(self) -> None: + """When addon manifests have content_kinds/path_globs, + the frame labels corpus coverage by source type.""" + + @dataclass + class FakeTool: + name: str = "search_docs" + + @dataclass + class FakeManifest: + name: str = "docs-index" + description: str = "Documentation index" + content_kinds: list[str] = field( + default_factory=lambda: ["documentation"], + ) + path_globs: list[str] = field( + default_factory=lambda: [ + "**/*.md", + "**/*.rst", + ], + ) + search_tool: str = "search_docs" + tools: list[Any] = field(default_factory=list) + + app_state = MagicMock() + app_state.addon_manifests = [FakeManifest()] + + proj = FakeProjections() + queen = _make_queen( + projections=proj, app_state=app_state, + ) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + + assert "## Addon Corpus Coverage" in frame + assert "docs-index" in frame + assert "documentation" in frame + assert "**/*.md" in frame + assert "search via search_docs" in frame + + def test_frame_falls_back_to_tool_descriptions(self) -> None: + """Without capability metadata, falls back to tool names.""" + + @dataclass + class FakeTool: + name: str = "semantic_search_code" + + @dataclass + class FakeManifest: + name: str = "codebase-index" + description: str = "Code search" + tools: list[Any] = field( + default_factory=lambda: [FakeTool()], + ) + + app_state = MagicMock() + app_state.addon_manifests = [FakeManifest()] + + proj = FakeProjections() + queen = _make_queen( + projections=proj, app_state=app_state, + ) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + + assert "codebase-index" in frame + assert "semantic_search_code" in frame + + def test_thread_progress_included(self) -> None: + proj = FakeProjections( + _thread=_make_thread( + goal="Migrate auth", + colony_count=5, + completed=3, + failed=1, + ), + ) + queen = _make_queen(projections=proj) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + + assert "## Thread Progress" in frame + assert "Migrate auth" in frame + assert "5 total" in frame + assert "3 completed" in frame + + @patch( + "formicos.surface.queen_runtime.generate_briefing", + ) + def test_active_alerts_included( + self, mock_briefing: MagicMock, + ) -> None: + @dataclass + class FakeInsight: + severity: str = "warning" + title: str = "Stale cluster" + detail: str = "3 entries need refresh" + category: str = "knowledge_health" + + mock_result = MagicMock() + mock_result.insights = [FakeInsight()] + mock_briefing.return_value = mock_result + + proj = FakeProjections() + queen = _make_queen(projections=proj) + frame = queen._build_deliberation_frame(WS_ID, THREAD_ID) + + assert "## Active Alerts" in frame + assert "Stale cluster" in frame diff --git a/tests/unit/surface/test_domain_normalization.py b/tests/unit/surface/test_domain_normalization.py new file mode 100644 index 0000000..a76c43a --- /dev/null +++ b/tests/unit/surface/test_domain_normalization.py @@ -0,0 +1,107 @@ +"""Tests for Wave 67 domain normalization in extraction prompt.""" + +from __future__ import annotations + +from typing import Any + +from formicos.surface.memory_extractor import build_extraction_prompt + + +def _entries_with_domains(*domain_lists: list[str]) -> list[dict[str, Any]]: + """Build minimal existing entries with specified domain tag lists.""" + return [ + { + "id": f"mem-{i}", + "title": f"entry {i}", + "confidence": 0.6, + "access_count": 1, + "content": "some content", + "domains": domains, + } + for i, domains in enumerate(domain_lists) + ] + + +class TestDomainNormalization: + def test_extraction_prompt_includes_existing_domains(self) -> None: + entries = _entries_with_domains( + ["python", "testing"], + ["auth"], + ["python", "networking"], + ) + prompt = build_extraction_prompt( + task="test task", + final_output="test output", + artifacts=[], + colony_status="completed", + failure_reason=None, + contract_result=None, + existing_entries=entries, + ) + assert "Use one of these existing domain tags" in prompt + assert "auth" in prompt + assert "python" in prompt + assert "testing" in prompt + assert "networking" in prompt + + def test_extraction_prompt_caps_domains_at_20(self) -> None: + # Create entries with 30 unique domains + domains_per_entry = [ + [f"domain_{i}", f"domain_{i + 10}", f"domain_{i + 20}"] + for i in range(10) + ] + entries = _entries_with_domains(*domains_per_entry) + prompt = build_extraction_prompt( + task="test task", + final_output="test output", + artifacts=[], + colony_status="completed", + failure_reason=None, + contract_result=None, + existing_entries=entries, + ) + assert "Use one of these existing domain tags" in prompt + # Count how many domain_N tags appear in the hint line + hint_line = [ + line for line in prompt.split("\n") + if "Use one of these existing domain tags" in line + ][0] + domain_count = hint_line.count("domain_") + assert domain_count <= 20 + + def test_extraction_prompt_no_domains_without_existing(self) -> None: + prompt = build_extraction_prompt( + task="test task", + final_output="test output", + artifacts=[], + colony_status="completed", + failure_reason=None, + contract_result=None, + existing_entries=None, + ) + assert "Use one of these existing domain tags" not in prompt + + def test_extraction_prompt_no_domains_with_empty_entries(self) -> None: + prompt = build_extraction_prompt( + task="test task", + final_output="test output", + artifacts=[], + colony_status="completed", + failure_reason=None, + contract_result=None, + existing_entries=[], + ) + assert "Use one of these existing domain tags" not in prompt + + def test_extraction_prompt_no_hint_when_entries_lack_domains(self) -> None: + entries = _entries_with_domains([], []) + prompt = build_extraction_prompt( + task="test task", + final_output="test output", + artifacts=[], + colony_status="completed", + failure_reason=None, + contract_result=None, + existing_entries=entries, + ) + assert "Use one of these existing domain tags" not in prompt diff --git a/tests/unit/surface/test_hierarchy.py b/tests/unit/surface/test_hierarchy.py new file mode 100644 index 0000000..2578aab --- /dev/null +++ b/tests/unit/surface/test_hierarchy.py @@ -0,0 +1,198 @@ +"""Tests for Wave 67 knowledge hierarchy — materialized paths + branch confidence. + +See ADR-049 for design rationale. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +from formicos.core.events import ( + MemoryEntryCreated, + WorkspaceCreated, +) +from formicos.core.events import WorkspaceConfigSnapshot +from formicos.surface.hierarchy import build_knowledge_tree, compute_branch_confidence +from formicos.surface.projections import ProjectionStore + +_NOW = datetime(2026, 3, 25, tzinfo=timezone.utc) +_WS = "ws-hier" +_WS_CONFIG = WorkspaceConfigSnapshot(budget=5.0, strategy="stigmergic") + + +def _store_with_workspace() -> ProjectionStore: + store = ProjectionStore() + store.apply(WorkspaceCreated( + seq=1, timestamp=_NOW, address=_WS, + name=_WS, config=_WS_CONFIG, + )) + return store + + +def _add_entry( + store: ProjectionStore, + entry_id: str, + domains: list[str] | None = None, + conf_alpha: float = 5.0, + conf_beta: float = 5.0, + workspace_id: str = _WS, +) -> None: + store.apply(MemoryEntryCreated( + seq=10, timestamp=_NOW, address=f"{workspace_id}/t-1", + workspace_id=workspace_id, + entry={ + "id": entry_id, + "entry_type": "skill", + "status": "candidate", + "polarity": "positive", + "title": f"Entry {entry_id}", + "content": f"Content for {entry_id}", + "source_colony_id": "col-1", + "source_artifact_ids": [], + "workspace_id": workspace_id, + "thread_id": "t-1", + "domains": domains or [], + "conf_alpha": conf_alpha, + "conf_beta": conf_beta, + "confidence": conf_alpha / (conf_alpha + conf_beta), + }, + )) + + +class TestHierarchyPathOnProjection: + """_on_memory_entry_created sets hierarchy_path from primary domain.""" + + def test_sets_hierarchy_path_from_primary_domain(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", domains=["Python Testing", "CI"]) + entry = store.memory_entries["e-1"] + assert entry["hierarchy_path"] == "/python_testing/" + assert entry["parent_id"] == "" + + def test_normalizes_hyphens_and_spaces(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-2", domains=["web-development"]) + entry = store.memory_entries["e-2"] + assert entry["hierarchy_path"] == "/web_development/" + + def test_no_domains_gets_uncategorized(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-3", domains=[]) + entry = store.memory_entries["e-3"] + assert entry["hierarchy_path"] == "/uncategorized/" + + def test_multiple_spaces_collapse_to_single_underscore(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-4", domains=["python testing"]) + entry = store.memory_entries["e-4"] + assert entry["hierarchy_path"] == "/python_testing/" + + +class TestBranchConfidenceAggregation: + """compute_branch_confidence aggregates children's Beta posteriors.""" + + def test_aggregates_children_evidence(self) -> None: + store = _store_with_workspace() + # 3 entries under /engineering/ with known alpha/beta + _add_entry(store, "e-1", domains=["engineering"], conf_alpha=10.0, conf_beta=3.0) + _add_entry(store, "e-2", domains=["engineering"], conf_alpha=8.0, conf_beta=4.0) + _add_entry(store, "e-3", domains=["engineering"], conf_alpha=7.0, conf_beta=2.0) + + result = compute_branch_confidence(store, "/engineering/") + assert result["count"] == 3 + # Evidence: alpha_ev = (10-5)+(8-5)+(7-5) = 10, beta_ev = (3-5)+(4-5)+(2-5) = -6 + # Aggregated: alpha = 5+10 = 15, beta = max(5+(-6), 1.0) = 1.0 (clamped) + assert result["alpha"] == 15.0 + assert result["beta"] == 1.0 # clamped from -1.0 + assert 0.0 <= result["mean"] <= 1.0 # valid probability + + def test_negative_evidence_clamps_to_floor(self) -> None: + """When many children have conf < prior, aggregated params stay valid.""" + store = _store_with_workspace() + # 5 entries with very low beta (1.0 each) -> beta evidence = 5*(1-5) = -20 + for i in range(5): + _add_entry( + store, f"e-{i}", domains=["lowbeta"], + conf_alpha=10.0, conf_beta=1.0, + ) + result = compute_branch_confidence(store, "/lowbeta/") + assert result["count"] == 5 + assert result["alpha"] >= 1.0 + assert result["beta"] >= 1.0 + assert 0.0 <= result["mean"] <= 1.0 + + def test_ess_cap_at_150(self) -> None: + store = _store_with_workspace() + # Create entries with high alpha/beta to exceed ESS 150 + _add_entry(store, "e-1", domains=["big"], conf_alpha=50.0, conf_beta=30.0) + _add_entry(store, "e-2", domains=["big"], conf_alpha=45.0, conf_beta=25.0) + _add_entry(store, "e-3", domains=["big"], conf_alpha=40.0, conf_beta=20.0) + + result = compute_branch_confidence(store, "/big/") + ess = result["alpha"] + result["beta"] + assert ess <= 150.0 + 0.01 # float tolerance + + def test_ess_cap_preserves_mean(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", domains=["cap"], conf_alpha=80.0, conf_beta=20.0) + _add_entry(store, "e-2", domains=["cap"], conf_alpha=70.0, conf_beta=15.0) + + result = compute_branch_confidence(store, "/cap/") + # Mean should be close to the uncapped mean + # Uncapped: alpha = 5+(80-5)+(70-5) = 145, beta = 5+(20-5)+(15-5) = 30 + # Uncapped mean = 145/175 ≈ 0.829 + # Capped: should preserve the ratio + assert abs(result["mean"] - 145.0 / 175.0) < 0.01 + + def test_excludes_topic_nodes(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", domains=["mixed"], conf_alpha=10.0, conf_beta=5.0) + # Manually inject a synthetic topic node + store.memory_entries["topic-1"] = { + "entry_type": "topic", + "hierarchy_path": "/mixed/", + "conf_alpha": 50.0, + "conf_beta": 10.0, + "workspace_id": _WS, + } + result = compute_branch_confidence(store, "/mixed/") + assert result["count"] == 1 # only the real entry, not the topic + + def test_empty_prefix_returns_default(self) -> None: + store = _store_with_workspace() + result = compute_branch_confidence(store, "/nonexistent/") + assert result["count"] == 0 + assert result["mean"] == 0.5 # default prior + + +class TestBuildKnowledgeTree: + """build_knowledge_tree builds tree from hierarchy paths.""" + + def test_builds_root_branches(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", domains=["engineering"]) + _add_entry(store, "e-2", domains=["engineering"]) + _add_entry(store, "e-3", domains=["testing"]) + + tree = build_knowledge_tree(store, _WS) + labels = [b["label"] for b in tree] + assert "engineering" in labels + assert "testing" in labels + eng = next(b for b in tree if b["label"] == "engineering") + assert eng["entryCount"] == 2 + + def test_filters_by_workspace(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", domains=["eng"], workspace_id=_WS) + _add_entry(store, "e-2", domains=["eng"], workspace_id="other-ws") + + tree = build_knowledge_tree(store, _WS) + if tree: + eng = next((b for b in tree if b["label"] == "eng"), None) + assert eng is not None + assert eng["entryCount"] == 1 + + def test_empty_workspace_returns_empty(self) -> None: + store = _store_with_workspace() + tree = build_knowledge_tree(store, _WS) + assert tree == [] diff --git a/tests/unit/surface/test_knowledge_review.py b/tests/unit/surface/test_knowledge_review.py new file mode 100644 index 0000000..16b992b --- /dev/null +++ b/tests/unit/surface/test_knowledge_review.py @@ -0,0 +1,204 @@ +"""Wave 72 Track 1: knowledge_review scanner tests.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from formicos.surface.action_queue import ( + STATUS_PENDING_REVIEW, + append_action, + create_action, + read_actions, +) +from formicos.surface.knowledge_review import scan_knowledge_for_review + + +def _make_projections( + entries: dict[str, dict[str, Any]] | None = None, + usage: dict[str, dict[str, Any]] | None = None, + outcomes: dict[str, Any] | None = None, + pinned: set[str] | None = None, +) -> MagicMock: + """Build a minimal mock ProjectionStore.""" + proj = MagicMock() + proj.memory_entries = entries or {} + proj.knowledge_entry_usage = usage or {} + proj.colony_outcomes = outcomes or {} + proj.operator_overlays = MagicMock() + proj.operator_overlays.pinned_entries = pinned or set() + return proj + + +def _make_entry( + *, + entry_id: str = "e1", + title: str = "Test entry", + workspace_id: str = "ws1", + conf_alpha: float = 5.0, + conf_beta: float = 5.0, + created_at: str = "", + decay_class: str = "stable", + created_by: str = "extraction", +) -> dict[str, Any]: + if not created_at: + created_at = datetime.now(UTC).isoformat() + return { + "entry_id": entry_id, + "title": title, + "workspace_id": workspace_id, + "content": f"Content for {title}", + "conf_alpha": conf_alpha, + "conf_beta": conf_beta, + "created_at": created_at, + "decay_class": decay_class, + "created_by": created_by, + } + + +def _make_outcome( + colony_id: str, + workspace_id: str = "ws1", + succeeded: bool = True, +) -> MagicMock: + outcome = MagicMock() + outcome.colony_id = colony_id + outcome.workspace_id = workspace_id + outcome.succeeded = succeeded + outcome.entries_accessed = 1 + return outcome + + +class TestOutcomeCorrelatedFailure: + @pytest.mark.asyncio + async def test_failure_correlated_entry_queues_review(self, tmp_path: Path) -> None: + entry = _make_entry(entry_id="e1") + # 4 colonies, 3 failed + outcomes = { + f"c{i}": _make_outcome(f"c{i}", succeeded=(i == 0)) + for i in range(4) + } + usage = { + "e1": {"count": 4, "last_accessed": datetime.now(UTC).isoformat(), + "colonies": ["c0", "c1", "c2", "c3"]}, + } + proj = _make_projections( + entries={"e1": entry}, + usage=usage, + outcomes=outcomes, + ) + + count = await scan_knowledge_for_review(str(tmp_path), "ws1", proj) + assert count >= 1 + actions = read_actions(str(tmp_path), "ws1") + review_actions = [a for a in actions if a["kind"] == "knowledge_review"] + assert len(review_actions) >= 1 + assert review_actions[0]["payload"]["review_reason"] == "outcome_correlated_failure" + + +class TestContradiction: + @pytest.mark.asyncio + async def test_contradiction_insight_becomes_review(self, tmp_path: Path) -> None: + entry = _make_entry(entry_id="e1") + proj = _make_projections(entries={"e1": entry}) + insights: list[dict[str, object]] = [ + { + "category": "contradiction", + "detail": f"Entries e1 contradict each other", + "entry_ids": ["e1"], + }, + ] + count = await scan_knowledge_for_review( + str(tmp_path), "ws1", proj, + briefing_insights=insights, + ) + assert count >= 1 + actions = read_actions(str(tmp_path), "ws1") + review_actions = [a for a in actions if a["kind"] == "knowledge_review"] + assert any(a["payload"]["review_reason"] == "contradiction" for a in review_actions) + + +class TestStaleAuthority: + @pytest.mark.asyncio + async def test_stale_authority_queues_review(self, tmp_path: Path) -> None: + old_date = (datetime.now(UTC) - timedelta(days=120)).isoformat() + entry = _make_entry( + entry_id="e1", conf_alpha=20.0, conf_beta=3.0, + created_at=old_date, + ) + usage = {"e1": {"count": 10, "last_accessed": old_date}} + proj = _make_projections(entries={"e1": entry}, usage=usage) + + count = await scan_knowledge_for_review(str(tmp_path), "ws1", proj) + assert count >= 1 + actions = read_actions(str(tmp_path), "ws1") + review_actions = [a for a in actions if a["kind"] == "knowledge_review"] + assert any(a["payload"]["review_reason"] == "stale_authority" for a in review_actions) + + @pytest.mark.asyncio + async def test_permanent_entries_excluded_from_stale(self, tmp_path: Path) -> None: + old_date = (datetime.now(UTC) - timedelta(days=120)).isoformat() + entry = _make_entry( + entry_id="e1", conf_alpha=20.0, conf_beta=3.0, + created_at=old_date, decay_class="permanent", + ) + usage = {"e1": {"count": 10, "last_accessed": old_date}} + proj = _make_projections(entries={"e1": entry}, usage=usage) + + count = await scan_knowledge_for_review(str(tmp_path), "ws1", proj) + # Should not queue stale review for permanent entries + actions = read_actions(str(tmp_path), "ws1") + stale = [ + a for a in actions + if a.get("kind") == "knowledge_review" + and a.get("payload", {}).get("review_reason") == "stale_authority" + ] + assert len(stale) == 0 + + +class TestUnconfirmedMachine: + @pytest.mark.asyncio + async def test_unconfirmed_machine_generated_queues_review(self, tmp_path: Path) -> None: + entry = _make_entry(entry_id="e1", created_by="extraction") + usage = {"e1": {"count": 10, "last_accessed": datetime.now(UTC).isoformat()}} + proj = _make_projections(entries={"e1": entry}, usage=usage) + + count = await scan_knowledge_for_review(str(tmp_path), "ws1", proj) + assert count >= 1 + actions = read_actions(str(tmp_path), "ws1") + review_actions = [a for a in actions if a["kind"] == "knowledge_review"] + assert any( + a["payload"]["review_reason"] == "unconfirmed_machine_generated" + for a in review_actions + ) + + +class TestDedupe: + @pytest.mark.asyncio + async def test_dedupe_skips_existing_pending_review(self, tmp_path: Path) -> None: + entry = _make_entry(entry_id="e1", created_by="extraction") + usage = {"e1": {"count": 10, "last_accessed": datetime.now(UTC).isoformat()}} + proj = _make_projections(entries={"e1": entry}, usage=usage) + + # Pre-create a pending review for e1 + existing = create_action( + kind="knowledge_review", + title="Existing review", + payload={"entry_id": "e1"}, + created_by="knowledge_review_scanner", + ) + append_action(str(tmp_path), "ws1", existing) + + count = await scan_knowledge_for_review(str(tmp_path), "ws1", proj) + # Should not queue a second review for e1 + actions = read_actions(str(tmp_path), "ws1") + review_actions = [ + a for a in actions + if a["kind"] == "knowledge_review" + and a["payload"].get("entry_id") == "e1" + ] + assert len(review_actions) == 1 # only the pre-existing one diff --git a/tests/unit/surface/test_operational_state.py b/tests/unit/surface/test_operational_state.py new file mode 100644 index 0000000..789c651 --- /dev/null +++ b/tests/unit/surface/test_operational_state.py @@ -0,0 +1,212 @@ +"""Wave 71.0 Team A: operational_state helper tests.""" + +from __future__ import annotations + +from pathlib import Path + +from formicos.surface.operational_state import ( + append_journal_entry, + append_procedure_rule, + get_journal_summary, + get_procedures_summary, + journal_path, + load_procedures, + parse_journal_entries, + procedures_path, + read_journal_tail, + render_journal_for_queen, + render_procedures_for_queen, + save_procedures, +) + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + + +class TestPaths: + def test_journal_path(self, tmp_path: Path) -> None: + p = journal_path(str(tmp_path), "ws1") + assert p == tmp_path / ".formicos" / "operations" / "ws1" / "queen_journal.md" + + def test_procedures_path(self, tmp_path: Path) -> None: + p = procedures_path(str(tmp_path), "ws1") + assert p == tmp_path / ".formicos" / "operations" / "ws1" / "operating_procedures.md" + + +# --------------------------------------------------------------------------- +# Procedures +# --------------------------------------------------------------------------- + + +class TestProcedures: + def test_load_absent(self, tmp_path: Path) -> None: + assert load_procedures(str(tmp_path), "ws1") == "" + + def test_save_and_load(self, tmp_path: Path) -> None: + save_procedures(str(tmp_path), "ws1", "## Rules\n- Do X\n") + text = load_procedures(str(tmp_path), "ws1") + assert "Do X" in text + + def test_append_rule_new_heading(self, tmp_path: Path) -> None: + result = append_procedure_rule( + str(tmp_path), "ws1", "Coding", "Always run tests", + ) + assert "## Coding" in result + assert "- Always run tests" in result + + def test_append_rule_existing_heading(self, tmp_path: Path) -> None: + save_procedures(str(tmp_path), "ws1", "## Coding\n- Existing rule\n") + result = append_procedure_rule( + str(tmp_path), "ws1", "Coding", "New rule", + ) + assert "- Existing rule" in result + assert "- New rule" in result + + def test_get_summary_empty(self, tmp_path: Path) -> None: + summary = get_procedures_summary(str(tmp_path), "ws1") + assert summary["exists"] is False + + def test_get_summary_with_content(self, tmp_path: Path) -> None: + save_procedures(str(tmp_path), "ws1", "## Rules\n- Be safe\n") + summary = get_procedures_summary(str(tmp_path), "ws1") + assert summary["exists"] is True + assert "Be safe" in summary["content"] + + +# --------------------------------------------------------------------------- +# Journal +# --------------------------------------------------------------------------- + + +class TestJournal: + def test_append_and_read(self, tmp_path: Path) -> None: + append_journal_entry(str(tmp_path), "ws1", "session", "Started work") + append_journal_entry(str(tmp_path), "ws1", "queen", "Spawned colony") + tail = read_journal_tail(str(tmp_path), "ws1") + assert "Started work" in tail + assert "Spawned colony" in tail + + def test_tail_limit(self, tmp_path: Path) -> None: + for i in range(50): + append_journal_entry(str(tmp_path), "ws1", "test", f"Entry {i}") + tail = read_journal_tail(str(tmp_path), "ws1", max_lines=5) + lines = tail.strip().splitlines() + assert len(lines) == 5 + assert "Entry 49" in lines[-1] + + def test_parse_entries(self) -> None: + text = ( + "- [2026-03-26 10:00] [session] First entry\n" + "- [2026-03-26 10:05] [queen] Second entry\n" + ) + entries = parse_journal_entries(text) + assert len(entries) == 2 + assert entries[0]["source"] == "session" + assert entries[1]["message"] == "Second entry" + + def test_get_summary_empty(self, tmp_path: Path) -> None: + summary = get_journal_summary(str(tmp_path), "ws1") + assert summary["exists"] is False + + def test_get_summary_with_entries(self, tmp_path: Path) -> None: + append_journal_entry(str(tmp_path), "ws1", "test", "Hello") + summary = get_journal_summary(str(tmp_path), "ws1") + assert summary["exists"] is True + assert summary["totalEntries"] == 1 + assert summary["entries"][0]["body"] == "Hello" + + +# --------------------------------------------------------------------------- +# Queen rendering +# --------------------------------------------------------------------------- + + +class TestQueenRendering: + def test_procedures_render_empty(self, tmp_path: Path) -> None: + assert render_procedures_for_queen(str(tmp_path), "ws1") == "" + + def test_procedures_render(self, tmp_path: Path) -> None: + save_procedures(str(tmp_path), "ws1", "## Rules\n- Be safe\n") + text = render_procedures_for_queen(str(tmp_path), "ws1") + assert text.startswith("# Operating Procedures") + assert "Be safe" in text + + def test_journal_render_empty(self, tmp_path: Path) -> None: + assert render_journal_for_queen(str(tmp_path), "ws1") == "" + + def test_journal_render(self, tmp_path: Path) -> None: + append_journal_entry(str(tmp_path), "ws1", "session", "Did things") + text = render_journal_for_queen(str(tmp_path), "ws1") + assert text.startswith("# Queen Journal") + assert "Did things" in text + + +# --------------------------------------------------------------------------- +# REST endpoint integration +# --------------------------------------------------------------------------- + + +class TestEndpoints: + def _make_client(self, tmp_path: Path): # noqa: ANN202 + from types import SimpleNamespace + from unittest.mock import MagicMock + + from starlette.applications import Starlette + from starlette.testclient import TestClient + + from formicos.surface.routes.api import routes + + settings_mock = MagicMock() + settings_mock.system = SimpleNamespace(data_dir=str(tmp_path)) + + route_list = routes( + runtime=MagicMock(), + settings=settings_mock, + castes=None, + castes_path="", + config_path="", + vector_store=None, + kg_adapter=None, + embed_client=None, + skill_collection="", + ws_manager=MagicMock(), + ) + app = Starlette(routes=route_list) + return TestClient(app) + + def test_get_journal_empty(self, tmp_path: Path) -> None: + client = self._make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws1/queen-journal") + assert resp.status_code == 200 + assert resp.json()["exists"] is False + + def test_get_journal_with_entries(self, tmp_path: Path) -> None: + append_journal_entry(str(tmp_path), "ws1", "test", "Hello world") + client = self._make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws1/queen-journal") + data = resp.json() + assert data["exists"] is True + assert data["totalEntries"] == 1 + + def test_get_procedures_empty(self, tmp_path: Path) -> None: + client = self._make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws1/operating-procedures") + assert resp.status_code == 200 + assert resp.json()["exists"] is False + + def test_put_procedures(self, tmp_path: Path) -> None: + client = self._make_client(tmp_path) + resp = client.put( + "/api/v1/workspaces/ws1/operating-procedures", + json={"content": "## Rules\n- Always test\n"}, + ) + assert resp.status_code == 200 + assert resp.json()["updated"] is True + + # Verify it persisted + resp2 = client.get("/api/v1/workspaces/ws1/operating-procedures") + data = resp2.json() + assert data["exists"] is True + assert "Always test" in data["content"] diff --git a/tests/unit/surface/test_operations_coordinator.py b/tests/unit/surface/test_operations_coordinator.py new file mode 100644 index 0000000..fc7202e --- /dev/null +++ b/tests/unit/surface/test_operations_coordinator.py @@ -0,0 +1,342 @@ +"""Tests for Wave 71.0 Track 7-9: thread_plan + operations_coordinator.""" + +from __future__ import annotations + +import textwrap +from pathlib import Path +from typing import Any + +import pytest + +from formicos.surface.thread_plan import ( + load_all_thread_plans, + load_thread_plan, + parse_thread_plan, + render_for_queen, + thread_plan_path, +) +from formicos.surface.operations_coordinator import ( + build_operations_summary, + render_continuity_block, +) + + +# --------------------------------------------------------------------------- +# thread_plan.py tests +# --------------------------------------------------------------------------- + + +class TestThreadPlanPath: + def test_canonical_path(self, tmp_path: Path) -> None: + p = thread_plan_path(str(tmp_path), "thr_abc123") + assert p == tmp_path / ".formicos" / "plans" / "thr_abc123.md" + + +class TestParseThreadPlan: + SAMPLE_PLAN = textwrap.dedent("""\ + # Thread Plan: Build the knowledge graph + Thread: thr_abc123 + + ## Steps + - [0] [completed] Set up database schema + - [1] [completed] Implement entity extraction + - [2] [pending] Wire up retrieval endpoint + - [3] [pending] Add integration tests + """) + + def test_parse_basic(self) -> None: + plan = parse_thread_plan(self.SAMPLE_PLAN) + assert plan["exists"] is True + assert plan["goal"] == "Build the knowledge graph" + assert plan["thread_id"] == "thr_abc123" + assert len(plan["steps"]) == 4 + + def test_parse_step_structure(self) -> None: + plan = parse_thread_plan(self.SAMPLE_PLAN) + step0 = plan["steps"][0] + assert step0["index"] == 0 + assert step0["status"] == "completed" + assert step0["description"] == "Set up database schema" + + def test_summary_counts(self) -> None: + plan = parse_thread_plan(self.SAMPLE_PLAN) + summary = plan["summary"] + assert summary["total"] == 4 + assert summary["completed"] == 2 + assert summary["pending"] == 2 + assert summary["failed"] == 0 + + def test_empty_text(self) -> None: + plan = parse_thread_plan("") + assert plan["exists"] is True + assert plan["steps"] == [] + assert plan["summary"]["total"] == 0 + + def test_plan_prefix(self) -> None: + text = "# Plan: Simple goal\n- [0] [pending] Do thing\n" + plan = parse_thread_plan(text) + assert plan["goal"] == "Simple goal" + assert len(plan["steps"]) == 1 + + +class TestLoadThreadPlan: + def test_load_existing(self, tmp_path: Path) -> None: + plans_dir = tmp_path / ".formicos" / "plans" + plans_dir.mkdir(parents=True) + (plans_dir / "thr_xyz.md").write_text( + "# Thread Plan: Test\n- [0] [pending] Do it\n", + encoding="utf-8", + ) + plan = load_thread_plan(str(tmp_path), "thr_xyz") + assert plan["exists"] is True + assert plan["thread_id"] == "thr_xyz" + assert len(plan["steps"]) == 1 + + def test_load_missing(self, tmp_path: Path) -> None: + plan = load_thread_plan(str(tmp_path), "thr_missing") + assert plan["exists"] is False + + def test_load_empty_args(self) -> None: + assert load_thread_plan("", "thr_a")["exists"] is False + assert load_thread_plan("/tmp", "")["exists"] is False + + +class TestLoadAllThreadPlans: + def test_load_multiple(self, tmp_path: Path) -> None: + plans_dir = tmp_path / ".formicos" / "plans" + plans_dir.mkdir(parents=True) + (plans_dir / "thr_a.md").write_text( + "# Thread Plan: A\n- [0] [pending] Step A\n", + encoding="utf-8", + ) + (plans_dir / "thr_b.md").write_text( + "# Thread Plan: B\n- [0] [completed] Step B\n", + encoding="utf-8", + ) + plans = load_all_thread_plans(str(tmp_path)) + assert len(plans) == 2 + thread_ids = {p["thread_id"] for p in plans} + assert "thr_a" in thread_ids + assert "thr_b" in thread_ids + + def test_empty_dir(self, tmp_path: Path) -> None: + assert load_all_thread_plans(str(tmp_path)) == [] + + +class TestRenderForQueen: + def test_render_basic(self) -> None: + plan = parse_thread_plan( + "# Thread Plan: Test\nThread: thr_abc\n" + "- [0] [completed] Done\n- [1] [pending] Next\n", + ) + text = render_for_queen(plan) + assert "[Plan:thr_abc]" in text + assert "Test" in text + assert "1/2" in text + # Only pending steps shown + assert "Next" in text + assert "Done" not in text + + def test_render_empty(self) -> None: + assert render_for_queen({"exists": False}) == "" + assert render_for_queen({"exists": True, "steps": []}) == "" + + +# --------------------------------------------------------------------------- +# operations_coordinator.py tests +# --------------------------------------------------------------------------- + + +def _make_workspace_dir( + tmp_path: Path, + *, + project_plan: str = "", + thread_plans: dict[str, str] | None = None, + sessions: dict[str, str] | None = None, +) -> str: + """Create a minimal .formicos directory structure for testing.""" + formicos = tmp_path / ".formicos" + + if project_plan: + (formicos / "project_plan.md").parent.mkdir(parents=True, exist_ok=True) + (formicos / "project_plan.md").write_text(project_plan, encoding="utf-8") + + if thread_plans: + plans_dir = formicos / "plans" + plans_dir.mkdir(parents=True, exist_ok=True) + for tid, content in thread_plans.items(): + (plans_dir / f"{tid}.md").write_text(content, encoding="utf-8") + + if sessions: + sessions_dir = formicos / "sessions" + sessions_dir.mkdir(parents=True, exist_ok=True) + for tid, content in sessions.items(): + (sessions_dir / f"{tid}.md").write_text(content, encoding="utf-8") + + return str(tmp_path) + + +class TestBuildOperationsSummary: + def test_empty_data_dir(self) -> None: + result = build_operations_summary("", "ws_1") + assert result["workspace_id"] == "ws_1" + assert result["pending_review_count"] == 0 + assert result["continuation_candidates"] == [] + + def test_with_project_plan(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir( + tmp_path, + project_plan=( + "# Project Plan: Test\n" + "- [0] [pending] First milestone\n" + "- [1] [completed] Second milestone\n" + ), + ) + result = build_operations_summary(data_dir, "ws_1") + assert result["active_milestone_count"] == 1 + + def test_continuation_with_pending_steps(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir( + tmp_path, + thread_plans={ + "thr_a": ( + "# Thread Plan: Alpha\nThread: thr_a\n" + "- [0] [completed] Step 1\n" + "- [1] [pending] Step 2\n" + ), + }, + sessions={ + "thr_a": "# Session Summary: Alpha\n", + }, + ) + result = build_operations_summary(data_dir, "ws_1") + candidates = result["continuation_candidates"] + assert len(candidates) >= 1 + assert candidates[0]["ready_for_autonomy"] is True + + def test_failed_steps_block_autonomy(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir( + tmp_path, + thread_plans={ + "thr_b": ( + "# Thread Plan: Beta\nThread: thr_b\n" + "- [0] [completed] Step 1\n" + "- [1] [failed] Step 2\n" + "- [2] [pending] Step 3\n" + ), + }, + ) + result = build_operations_summary(data_dir, "ws_1") + candidates = result["continuation_candidates"] + assert len(candidates) >= 1 + assert candidates[0]["ready_for_autonomy"] is False + assert "failures" in candidates[0]["blocked_reason"] + + def test_sync_issue_milestone_plan_mismatch(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir( + tmp_path, + project_plan=( + "# Project Plan: Test\n" + "- [0] [pending] Build alpha (thread thr_a)\n" + ), + thread_plans={ + "thr_a": ( + "# Thread Plan: Alpha\nThread: thr_a\n" + "- [0] [completed] Step 1\n" + "- [1] [completed] Step 2\n" + ), + }, + ) + result = build_operations_summary(data_dir, "ws_1") + assert len(result["sync_issues"]) >= 1 + assert result["sync_issues"][0]["type"] == "milestone_plan_mismatch" + + def test_stalled_thread_count(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir( + tmp_path, + thread_plans={ + "thr_a": ( + "# Thread Plan: A\nThread: thr_a\n" + "- [0] [pending] Waiting\n" + ), + "thr_b": ( + "# Thread Plan: B\nThread: thr_b\n" + "- [0] [completed] Done\n" + ), + }, + ) + result = build_operations_summary(data_dir, "ws_1") + assert result["stalled_thread_count"] == 1 + + def test_no_projections(self, tmp_path: Path) -> None: + data_dir = _make_workspace_dir(tmp_path) + result = build_operations_summary(data_dir, "ws_1", projections=None) + assert result["last_operator_activity_at"] is None + assert result["operator_active"] is False + + +class TestRenderContinuityBlock: + def test_empty_summary(self) -> None: + summary: dict[str, Any] = { + "pending_review_count": 0, + "active_milestone_count": 0, + "stalled_thread_count": 0, + "idle_for_minutes": None, + "continuation_candidates": [], + "sync_issues": [], + "recent_progress": [], + } + assert render_continuity_block(summary) == "" + + def test_with_counts(self) -> None: + summary: dict[str, Any] = { + "pending_review_count": 2, + "active_milestone_count": 1, + "stalled_thread_count": 0, + "idle_for_minutes": 47, + "continuation_candidates": [], + "sync_issues": [], + "recent_progress": [], + } + text = render_continuity_block(summary) + assert "# Operational Loop Summary" in text + assert "2 pending review" in text + assert "operator idle 47m" in text + + def test_with_candidates(self) -> None: + summary: dict[str, Any] = { + "pending_review_count": 0, + "active_milestone_count": 1, + "stalled_thread_count": 0, + "idle_for_minutes": None, + "continuation_candidates": [ + { + "description": "Thread thr_abc: 2/3 steps done", + "ready_for_autonomy": True, + "blocked_reason": "", + }, + ], + "sync_issues": [], + "recent_progress": [], + } + text = render_continuity_block(summary) + assert "Continuations:" in text + assert "[READY]" in text + + def test_with_sync_issues(self) -> None: + summary: dict[str, Any] = { + "pending_review_count": 0, + "active_milestone_count": 0, + "stalled_thread_count": 0, + "idle_for_minutes": None, + "continuation_candidates": [], + "sync_issues": [ + {"description": "Milestone pending but plan complete"}, + ], + "recent_progress": [ + {"description": "Thread thr_x: 3/3 steps completed"}, + ], + } + text = render_continuity_block(summary) + assert "Sync issues:" in text + assert "Recent:" in text diff --git a/tests/unit/surface/test_plan_attention.py b/tests/unit/surface/test_plan_attention.py new file mode 100644 index 0000000..993bfcc --- /dev/null +++ b/tests/unit/surface/test_plan_attention.py @@ -0,0 +1,229 @@ +"""Tests for Wave 68 plan file persistence and attention injection.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock + +from formicos.core.types import ModelRecord +from formicos.surface.queen_tools import QueenToolDispatcher + + +def _make_model_record( + address: str = "local/qwen3", +) -> ModelRecord: + provider = address.split("/", 1)[0] if "/" in address else address + return ModelRecord( + address=address, + provider=provider, + endpoint="http://localhost:8080", + context_window=32768, + supports_tools=True, + cost_per_input_token=0.0, + cost_per_output_token=0.0, + ) + + +def _make_runtime( + tmp_path: Path, +) -> MagicMock: + runtime = MagicMock() + runtime.settings.governance.max_rounds_per_colony = 20 + runtime.settings.models.defaults.coder = "local/qwen3" + runtime.settings.models.registry = [_make_model_record()] + runtime.settings.system.data_dir = str(tmp_path) + runtime.projections.queen_notes = {} + runtime.projections.outcome_stats.return_value = [] + return runtime + + +class TestProposePlanWritesPlanFile: + def test_propose_plan_writes_plan_file(self, tmp_path: Path) -> None: + runtime = _make_runtime(tmp_path) + dispatcher = QueenToolDispatcher(runtime) + thread_id = "thr-plan-1" + + dispatcher._propose_plan( # pyright: ignore[reportPrivateUsage] + { + "summary": "Build a CSV parser with validation", + "options": [ + {"label": "Quick", "description": "Single coder"}, + {"label": "Thorough", "description": "Coder + reviewer"}, + ], + "recommendation": "Quick is sufficient", + }, + workspace_id="ws-1", + thread_id=thread_id, + ) + + plan_path = tmp_path / ".formicos" / "plans" / f"{thread_id}.md" + assert plan_path.is_file() + content = plan_path.read_text(encoding="utf-8") + assert "# Plan: Build a CSV parser" in content + assert "**Approach:** Quick is sufficient" in content + assert "## Options" in content + assert "**Quick:**" in content + assert "**Thorough:**" in content + assert "## Steps" in content + + def test_propose_plan_without_thread_id_skips_file( + self, tmp_path: Path, + ) -> None: + runtime = _make_runtime(tmp_path) + dispatcher = QueenToolDispatcher(runtime) + + dispatcher._propose_plan( # pyright: ignore[reportPrivateUsage] + {"summary": "Test plan"}, + workspace_id="ws-1", + thread_id="", + ) + + plan_dir = tmp_path / ".formicos" / "plans" + assert not plan_dir.exists() or not list(plan_dir.iterdir()) + + +class TestMarkPlanStep: + def _write_plan(self, tmp_path: Path, thread_id: str) -> Path: + plan_dir = tmp_path / ".formicos" / "plans" + plan_dir.mkdir(parents=True, exist_ok=True) + plan_path = plan_dir / f"{thread_id}.md" + plan_path.write_text( + "# Plan: Test\n\n## Steps\n" + "- [0] [pending] Write auth module\n" + "- [1] [pending] Write tests\n", + encoding="utf-8", + ) + return plan_path + + def test_mark_plan_step_updates_file(self, tmp_path: Path) -> None: + runtime = _make_runtime(tmp_path) + dispatcher = QueenToolDispatcher(runtime) + thread_id = "thr-step-1" + plan_path = self._write_plan(tmp_path, thread_id) + + result, _ = dispatcher._mark_plan_step( # pyright: ignore[reportPrivateUsage] + {"step_index": 0, "status": "completed", "note": "Done"}, + workspace_id="ws-1", + thread_id=thread_id, + ) + + assert "marked as [completed]" in result + content = plan_path.read_text(encoding="utf-8") + assert "[0] [completed] Write auth module" in content + assert "Done" in content + # Step 1 unchanged + assert "[1] [pending] Write tests" in content + + def test_mark_plan_step_adds_new_step(self, tmp_path: Path) -> None: + runtime = _make_runtime(tmp_path) + dispatcher = QueenToolDispatcher(runtime) + thread_id = "thr-step-2" + plan_path = self._write_plan(tmp_path, thread_id) + + result, _ = dispatcher._mark_plan_step( # pyright: ignore[reportPrivateUsage] + { + "step_index": 2, + "status": "pending", + "description": "Deploy to staging", + }, + workspace_id="ws-1", + thread_id=thread_id, + ) + + assert "marked as [pending]" in result + content = plan_path.read_text(encoding="utf-8") + assert "[2] [pending] Deploy to staging" in content + + def test_mark_plan_step_no_plan_file(self, tmp_path: Path) -> None: + runtime = _make_runtime(tmp_path) + dispatcher = QueenToolDispatcher(runtime) + + result, _ = dispatcher._mark_plan_step( # pyright: ignore[reportPrivateUsage] + {"step_index": 0, "status": "started"}, + workspace_id="ws-1", + thread_id="nonexistent", + ) + + assert "No plan file" in result + + +class TestBuildThreadContextIncludesPlan: + def test_build_thread_context_includes_plan( + self, tmp_path: Path, + ) -> None: + """Plan file content appears in _build_thread_context output.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + runtime.settings.system.data_dir = str(tmp_path) + + # Set up a thread projection + thread = SimpleNamespace( + name="Test Thread", + goal="Build something", + status="active", + expected_outputs=[], + colony_count=0, + completed_colony_count=0, + failed_colony_count=0, + artifact_types_produced={}, + workflow_steps=[], + ) + ws = SimpleNamespace(threads={"thr-1": thread}, config={}) + runtime.projections.workspaces = {"ws-1": ws} + + # Write plan file + plan_dir = tmp_path / ".formicos" / "plans" + plan_dir.mkdir(parents=True, exist_ok=True) + plan_path = plan_dir / "thr-1.md" + plan_path.write_text( + "# Plan: Build CSV parser\n\n## Steps\n" + "- [0] [started] Implement parser\n", + encoding="utf-8", + ) + + agent = QueenAgent(runtime) + ctx = agent._build_thread_context("thr-1", "ws-1") # pyright: ignore[reportPrivateUsage] + + assert "Plan: Build CSV parser" in ctx + assert "[0] [started] Implement parser" in ctx + + def test_plan_injection_caps_at_2000_chars( + self, tmp_path: Path, + ) -> None: + """Oversized plan files are truncated to 2000 chars.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + runtime.settings.system.data_dir = str(tmp_path) + + thread = SimpleNamespace( + name="Big Plan Thread", + goal="Test truncation", + status="active", + expected_outputs=[], + colony_count=0, + completed_colony_count=0, + failed_colony_count=0, + artifact_types_produced={}, + workflow_steps=[], + ) + ws = SimpleNamespace(threads={"thr-big": thread}, config={}) + runtime.projections.workspaces = {"ws-1": ws} + + # Write oversized plan file + plan_dir = tmp_path / ".formicos" / "plans" + plan_dir.mkdir(parents=True, exist_ok=True) + plan_path = plan_dir / "thr-big.md" + plan_path.write_text("X" * 5000, encoding="utf-8") + + agent = QueenAgent(runtime) + ctx = agent._build_thread_context("thr-big", "ws-1") # pyright: ignore[reportPrivateUsage] + + # The plan injection is capped at 2000 chars + plan_portion = ctx.split("\n")[-1] if ctx else "" + # Total injected plan text should be at most 2000 chars + # (we just verify it's less than 5000, confirming truncation) + assert len(ctx) < 5000 diff --git a/tests/unit/surface/test_plan_read_endpoint.py b/tests/unit/surface/test_plan_read_endpoint.py new file mode 100644 index 0000000..c019403 --- /dev/null +++ b/tests/unit/surface/test_plan_read_endpoint.py @@ -0,0 +1,117 @@ +"""Wave 69 Track 4: GET /workspaces/{id}/threads/{id}/plan endpoint tests.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +from starlette.testclient import TestClient + +from formicos.surface.routes.api import routes + + +def _make_client(tmp_path: Path) -> TestClient: + from starlette.applications import Starlette + + runtime = MagicMock() + runtime.projections = MagicMock() + + settings = MagicMock() + settings.system = SimpleNamespace(data_dir=str(tmp_path)) + + route_list = routes( + runtime=runtime, + settings=settings, + castes=None, + castes_path="", + config_path="", + vector_store=None, + kg_adapter=None, + embed_client=None, + skill_collection="", + ws_manager=MagicMock(), + ) + app = Starlette(routes=route_list) + return TestClient(app) + + +def _write_plan(tmp_path: Path, thread_id: str, content: str) -> Path: + plan_dir = tmp_path / ".formicos" / "plans" + plan_dir.mkdir(parents=True, exist_ok=True) + plan_path = plan_dir / f"{thread_id}.md" + plan_path.write_text(content, encoding="utf-8") + return plan_path + + +class TestPlanReadEndpoint: + def test_plan_endpoint_returns_parsed_steps( + self, tmp_path: Path, + ) -> None: + _write_plan( + tmp_path, + "thr-1", + "# Plan: Implement auth module\n\n" + "**Approach:** Use OAuth2 with JWT tokens\n\n" + "## Steps\n" + "- [0] [completed] Set up OAuth provider (colony abc123)\n" + "- [1] [started] Write integration tests\n" + "- [2] [pending] Update API docs\n", + ) + client = _make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws-1/threads/thr-1/plan") + assert resp.status_code == 200 + data = resp.json() + assert data["exists"] is True + assert data["title"] == "Implement auth module" + assert data["approach"] == "Use OAuth2 with JWT tokens" + assert len(data["steps"]) == 3 + assert data["steps"][0]["index"] == 0 + assert data["steps"][0]["status"] == "completed" + assert "Set up OAuth provider" in data["steps"][0]["description"] + assert data["steps"][1]["status"] == "started" + assert data["steps"][2]["status"] == "pending" + + def test_plan_endpoint_no_file_returns_not_exists( + self, tmp_path: Path, + ) -> None: + client = _make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws-1/threads/thr-none/plan") + assert resp.status_code == 200 + data = resp.json() + assert data["exists"] is False + + def test_plan_endpoint_parses_colony_ids( + self, tmp_path: Path, + ) -> None: + _write_plan( + tmp_path, + "thr-2", + "# Plan: Test\n\n## Steps\n" + "- [0] [started] Implement parser (colony abc123)\n", + ) + client = _make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws-1/threads/thr-2/plan") + data = resp.json() + assert data["steps"][0]["colony_id"] == "abc123" + + def test_plan_endpoint_handles_malformed_gracefully( + self, tmp_path: Path, + ) -> None: + _write_plan( + tmp_path, + "thr-3", + "This is not a valid plan file\n" + "Some random content here\n" + "- [0] [completed] Only valid step\n", + ) + client = _make_client(tmp_path) + resp = client.get("/api/v1/workspaces/ws-1/threads/thr-3/plan") + assert resp.status_code == 200 + data = resp.json() + assert data["exists"] is True + # Title defaults when not found + assert data["title"] == "Plan" + # The valid step is still parsed + assert len(data["steps"]) == 1 + assert data["steps"][0]["status"] == "completed" diff --git a/tests/unit/surface/test_project_plan.py b/tests/unit/surface/test_project_plan.py new file mode 100644 index 0000000..89b31da --- /dev/null +++ b/tests/unit/surface/test_project_plan.py @@ -0,0 +1,262 @@ +"""Tests for Wave 70.0 Team B: Project plan helper, tools, endpoint, budget.""" + +from __future__ import annotations + +import textwrap +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest +from starlette.testclient import TestClient + +from formicos.surface.project_plan import ( + add_milestone, + complete_milestone, + load_project_plan, + parse_project_plan, + render_for_queen, +) + + +# --------------------------------------------------------------------------- +# Test 1: Parser returns structured milestones from markdown +# --------------------------------------------------------------------------- + + +class TestParser: + def test_parse_milestones(self) -> None: + text = textwrap.dedent("""\ + # Project Plan: Build the thing + Updated: 2026-03-26T10:00:00Z + + ## Milestones + - [0] [completed] Set up repo (thread t-1) [completed_at 2026-03-25T09:00:00Z] + - [1] [pending] Implement core logic (thread t-2) + - [2] [pending] Write tests \u2014 unit + integration + """) + plan = parse_project_plan(text) + assert plan["exists"] is True + assert plan["goal"] == "Build the thing" + assert plan["updated"] == "2026-03-26T10:00:00Z" + assert len(plan["milestones"]) == 3 + + ms0 = plan["milestones"][0] + assert ms0["index"] == 0 + assert ms0["status"] == "completed" + assert ms0["thread_id"] == "t-1" + assert ms0["completed_at"] == "2026-03-25T09:00:00Z" + + ms1 = plan["milestones"][1] + assert ms1["index"] == 1 + assert ms1["status"] == "pending" + assert ms1["thread_id"] == "t-2" + + ms2 = plan["milestones"][2] + assert ms2["index"] == 2 + assert ms2.get("note") == "unit + integration" + + def test_malformed_markdown_handled_gracefully(self) -> None: + """Garbage input returns exists=True with empty milestones.""" + plan = parse_project_plan("random garbage\nno milestones here\n") + assert plan["exists"] is True + assert plan["milestones"] == [] + assert plan["goal"] == "" + + def test_empty_input(self) -> None: + plan = parse_project_plan("") + assert plan["exists"] is True + assert plan["milestones"] == [] + + +# --------------------------------------------------------------------------- +# Test 2: Milestone tools create/update the plan file correctly +# --------------------------------------------------------------------------- + + +class TestMilestoneTools: + def test_add_milestone_creates_file(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + plan = add_milestone( + data_dir, "First milestone", goal="My project", + ) + assert plan["exists"] is True + assert plan["goal"] == "My project" + assert len(plan["milestones"]) == 1 + assert plan["milestones"][0]["status"] == "pending" + assert plan["milestones"][0]["description"].startswith( + "First milestone", + ) + # File actually exists + from formicos.surface.project_plan import project_plan_path + + assert project_plan_path(data_dir).is_file() + + def test_add_multiple_milestones(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + add_milestone(data_dir, "Step A", goal="Plan") + plan = add_milestone(data_dir, "Step B") + assert len(plan["milestones"]) == 2 + assert plan["milestones"][0]["index"] == 0 + assert plan["milestones"][1]["index"] == 1 + + def test_complete_milestone(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + add_milestone(data_dir, "Do the thing", goal="G") + plan = complete_milestone(data_dir, 0, note="Done!") + assert plan["milestones"][0]["status"] == "completed" + assert plan["milestones"][0].get("completed_at") is not None + + def test_complete_missing_milestone(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + add_milestone(data_dir, "Only one", goal="G") + result = complete_milestone(data_dir, 99) + assert "error" in result + + def test_complete_no_plan_file(self, tmp_path: Path) -> None: + result = complete_milestone(str(tmp_path), 0) + assert result["exists"] is False + assert "error" in result + + def test_add_milestone_stamps_thread_id(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + plan = add_milestone( + data_dir, "Threaded work", + thread_id="th-abc", goal="G", + ) + ms = plan["milestones"][0] + assert ms.get("thread_id") == "th-abc" + + +# --------------------------------------------------------------------------- +# Test 3: GET /api/v1/project-plan returns helper-derived JSON +# --------------------------------------------------------------------------- + + +class TestEndpoint: + def _make_app(self, data_dir: str) -> Any: + from starlette.applications import Starlette + from starlette.requests import Request + from starlette.responses import JSONResponse + from starlette.routing import Route + + from formicos.surface.project_plan import load_project_plan + + async def get_project_plan(request: Request) -> JSONResponse: + return JSONResponse(load_project_plan(data_dir)) + + return Starlette(routes=[ + Route("/api/v1/project-plan", get_project_plan, methods=["GET"]), + ]) + + def test_endpoint_returns_plan(self, tmp_path: Path) -> None: + data_dir = str(tmp_path) + add_milestone(data_dir, "MS1", goal="Test project") + app = self._make_app(data_dir) + client = TestClient(app) + resp = client.get("/api/v1/project-plan") + assert resp.status_code == 200 + data = resp.json() + assert data["exists"] is True + assert data["goal"] == "Test project" + assert len(data["milestones"]) == 1 + + def test_endpoint_no_plan(self, tmp_path: Path) -> None: + app = self._make_app(str(tmp_path)) + client = TestClient(app) + resp = client.get("/api/v1/project-plan") + assert resp.status_code == 200 + data = resp.json() + assert data["exists"] is False + + +# --------------------------------------------------------------------------- +# Test 4: Malformed markdown handled gracefully (covered in TestParser) +# --------------------------------------------------------------------------- + +# See TestParser.test_malformed_markdown_handled_gracefully above. + + +# --------------------------------------------------------------------------- +# Test 5: Queen budget includes a dedicated project_plan slot +# --------------------------------------------------------------------------- + + +class TestBudgetSlot: + def test_budget_has_project_plan_field(self) -> None: + from formicos.surface.queen_budget import ( + FALLBACK_BUDGET, + _FALLBACKS, + _FRACTIONS, + compute_queen_budget, + ) + + # Verify the slot exists in fractions and fallbacks + assert "project_plan" in _FRACTIONS + assert "project_plan" in _FALLBACKS + assert _FALLBACKS["project_plan"] == 400 + assert _FRACTIONS["project_plan"] == 0.05 + + # Verify fractions sum to 1.0 + assert abs(sum(_FRACTIONS.values()) - 1.0) < 1e-9 + + # Verify fallback budget has the field + assert hasattr(FALLBACK_BUDGET, "project_plan") + assert FALLBACK_BUDGET.project_plan == 400 + + def test_computed_budget_includes_project_plan(self) -> None: + from formicos.surface.queen_budget import compute_queen_budget + + budget = compute_queen_budget(200_000, 4096) + assert budget.project_plan > 0 + # 5% of (200000 - 4096) = 9795, should be above fallback + assert budget.project_plan >= 400 + + def test_project_plan_separate_from_project_context(self) -> None: + from formicos.surface.queen_budget import compute_queen_budget + + budget = compute_queen_budget(100_000, 4096) + # They should be different slots with different allocations + assert budget.project_plan != budget.project_context + + +# --------------------------------------------------------------------------- +# Test 6: Project-plan injection uses the project-plan budget +# --------------------------------------------------------------------------- + + +class TestInjection: + def test_render_for_queen_output(self) -> None: + plan = { + "exists": True, + "goal": "Ship v1", + "milestones": [ + {"index": 0, "status": "completed", "description": "Setup"}, + {"index": 1, "status": "pending", "description": "Core"}, + ], + } + text = render_for_queen(plan) + assert "# Project Plan (cross-thread)" in text + assert "Goal: Ship v1" in text + assert "\u2713" in text # completed marker + assert "\u25cb" in text # pending marker + + def test_render_empty_plan(self) -> None: + assert render_for_queen({"exists": False}) == "" + assert render_for_queen({"exists": True, "milestones": []}) == "" + + def test_render_respects_budget_cap(self) -> None: + """Rendered text is truncatable by budget * 4 chars.""" + plan = { + "exists": True, + "goal": "A" * 500, + "milestones": [ + {"index": i, "status": "pending", "description": f"M{i} " + "x" * 200} + for i in range(20) + ], + } + text = render_for_queen(plan) + # Budget cap of 400 tokens * 4 chars = 1600 chars + capped = text[:400 * 4] + assert len(capped) <= 1600 + assert capped.startswith("# Project Plan") diff --git a/tests/unit/surface/test_provenance_chain.py b/tests/unit/surface/test_provenance_chain.py new file mode 100644 index 0000000..fcbb965 --- /dev/null +++ b/tests/unit/surface/test_provenance_chain.py @@ -0,0 +1,195 @@ +"""Tests for Wave 67.5 provenance chain on projections.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +from formicos.core.events import ( + KnowledgeEntryAnnotated, + KnowledgeEntryOperatorAction, + MemoryConfidenceUpdated, + MemoryEntryCreated, + MemoryEntryMerged, + WorkspaceCreated, +) +from formicos.core.events import WorkspaceConfigSnapshot +from formicos.surface.projections import ProjectionStore + +_NOW = datetime(2026, 3, 25, tzinfo=timezone.utc) +_WS = "ws-prov" +_WS_CONFIG = WorkspaceConfigSnapshot(budget=5.0, strategy="stigmergic") + + +def _store_with_workspace() -> ProjectionStore: + store = ProjectionStore() + store.apply(WorkspaceCreated( + seq=1, timestamp=_NOW, address=_WS, + name=_WS, config=_WS_CONFIG, + )) + return store + + +def _add_entry( + store: ProjectionStore, + entry_id: str = "e-1", + source_colony_id: str = "col-1", +) -> None: + store.apply(MemoryEntryCreated( + seq=10, timestamp=_NOW, address=f"{_WS}/t-1", + workspace_id=_WS, + entry={ + "id": entry_id, + "entry_type": "skill", + "status": "candidate", + "polarity": "positive", + "title": f"Entry {entry_id}", + "content": f"Content for {entry_id}", + "source_colony_id": source_colony_id, + "source_artifact_ids": [], + "workspace_id": _WS, + "thread_id": "t-1", + "domains": ["testing"], + "conf_alpha": 5.0, + "conf_beta": 5.0, + "confidence": 0.5, + }, + )) + + +class TestProvenanceChainOnCreation: + def test_memory_entry_created_seeds_provenance_chain(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1", source_colony_id="col-1") + entry = store.memory_entries["e-1"] + chain = entry.get("provenance_chain", []) + assert len(chain) == 1 + item = chain[0] + assert item["event_type"] == "MemoryEntryCreated" + assert item["actor_id"] == "col-1" + assert "Created by colony col-1" in item["detail"] + assert item["confidence_delta"] is None + + +class TestProvenanceChainOnConfidenceUpdate: + def test_memory_confidence_updated_appends_delta(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1") + store.apply(MemoryConfidenceUpdated( + seq=20, timestamp=_NOW, address=f"{_WS}/t-1/col-2", + entry_id="e-1", + colony_id="col-2", + colony_succeeded=True, + old_alpha=5.0, + old_beta=5.0, + new_alpha=6.0, + new_beta=5.0, + new_confidence=6.0 / 11.0, + workspace_id=_WS, + thread_id="t-1", + reason="colony_outcome", + )) + entry = store.memory_entries["e-1"] + chain = entry.get("provenance_chain", []) + assert len(chain) == 2 # created + confidence update + conf_item = chain[1] + assert conf_item["event_type"] == "MemoryConfidenceUpdated" + assert conf_item["actor_id"] == "col-2" + assert "colony_outcome" in conf_item["detail"] + assert conf_item["confidence_delta"] is not None + # old mean = 5/10 = 0.5, new mean = 6/11 ≈ 0.5455 + assert abs(conf_item["confidence_delta"] - (6.0 / 11.0 - 0.5)) < 0.001 + + +class TestProvenanceChainOnMerge: + def test_memory_entry_merged_updates_target_and_source_chains(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-target") + _add_entry(store, "e-source") + store.apply(MemoryEntryMerged( + seq=30, timestamp=_NOW, address=f"{_WS}/t-1", + target_id="e-target", + source_id="e-source", + merged_content="merged content", + merged_domains=["testing"], + merged_from=["e-source"], + content_strategy="keep_longer", + similarity=0.95, + merge_source="dedup", + workspace_id=_WS, + )) + + target_chain = store.memory_entries["e-target"].get("provenance_chain", []) + source_chain = store.memory_entries["e-source"].get("provenance_chain", []) + + # Target gets creation + merge + assert len(target_chain) == 2 + assert target_chain[1]["event_type"] == "MemoryEntryMerged" + assert "e-source" in target_chain[1]["detail"] + + # Source gets creation + merge + assert len(source_chain) == 2 + assert source_chain[1]["event_type"] == "MemoryEntryMerged" + assert "e-target" in source_chain[1]["detail"] + + +class TestProvenanceEndpoint: + def test_provenance_endpoint_returns_chain(self) -> None: + """Verify provenance_chain is accessible from projections (endpoint is thin wrapper).""" + store = _store_with_workspace() + _add_entry(store, "e-1", source_colony_id="col-1") + store.apply(MemoryConfidenceUpdated( + seq=20, timestamp=_NOW, address=f"{_WS}/t-1/col-2", + entry_id="e-1", + colony_id="col-2", + colony_succeeded=True, + old_alpha=5.0, old_beta=5.0, + new_alpha=6.0, new_beta=5.0, + new_confidence=6.0 / 11.0, + workspace_id=_WS, + thread_id="t-1", + reason="colony_outcome", + )) + entry = store.memory_entries["e-1"] + chain = entry.get("provenance_chain", []) + # Simulate endpoint response shape + response = {"entry_id": "e-1", "chain": chain, "total": len(chain)} + assert response["total"] == 2 + assert response["chain"][0]["event_type"] == "MemoryEntryCreated" + assert response["chain"][1]["event_type"] == "MemoryConfidenceUpdated" + + +class TestProvenanceOperatorAnnotation: + def test_operator_annotation_appends_provenance_item(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1") + store.apply(KnowledgeEntryAnnotated( + seq=40, timestamp=_NOW, address=f"{_WS}/e-1", + entry_id="e-1", + workspace_id=_WS, + annotation_text="Reviewed and confirmed", + tag="reviewed", + actor="operator", + )) + chain = store.memory_entries["e-1"].get("provenance_chain", []) + assert len(chain) == 2 # created + annotation + ann = chain[1] + assert ann["event_type"] == "KnowledgeEntryAnnotated" + assert ann["actor_id"] == "operator" + assert "[reviewed]" in ann["detail"] + + def test_operator_action_appends_provenance_item(self) -> None: + store = _store_with_workspace() + _add_entry(store, "e-1") + store.apply(KnowledgeEntryOperatorAction( + seq=50, timestamp=_NOW, address=f"{_WS}/e-1", + entry_id="e-1", + workspace_id=_WS, + action="pin", + actor="operator", + reason="important entry", + )) + chain = store.memory_entries["e-1"].get("provenance_chain", []) + assert len(chain) == 2 # created + operator action + act = chain[1] + assert act["event_type"] == "KnowledgeEntryOperatorAction" + assert "pin" in act["detail"] diff --git a/tests/unit/surface/test_queen_budget.py b/tests/unit/surface/test_queen_budget.py new file mode 100644 index 0000000..369a73f --- /dev/null +++ b/tests/unit/surface/test_queen_budget.py @@ -0,0 +1,110 @@ +"""Tests for queen_budget.py (Wave 68 Track 3, ADR-051; Wave 71.0 9-slot expansion).""" + +from __future__ import annotations + +from formicos.surface.queen_budget import ( + FALLBACK_BUDGET, + QueenContextBudget, + compute_queen_budget, +) + + +class TestQueenContextBudget: + """QueenContextBudget dataclass basics.""" + + def test_frozen(self) -> None: + b = QueenContextBudget( + system_prompt=100, + memory_retrieval=100, + project_context=100, + project_plan=100, + operating_procedures=100, + queen_journal=100, + thread_context=100, + tool_memory=100, + conversation_history=100, + ) + assert b.system_prompt == 100 + assert b.operating_procedures == 100 + assert b.queen_journal == 100 + + def test_fallback_values(self) -> None: + assert FALLBACK_BUDGET.system_prompt == 2000 + assert FALLBACK_BUDGET.memory_retrieval == 1500 + assert FALLBACK_BUDGET.project_context == 500 + assert FALLBACK_BUDGET.operating_procedures == 400 + assert FALLBACK_BUDGET.queen_journal == 300 + assert FALLBACK_BUDGET.thread_context == 1500 + assert FALLBACK_BUDGET.tool_memory == 4000 + assert FALLBACK_BUDGET.conversation_history == 6000 + + +class TestComputeQueenBudget: + """compute_queen_budget proportional scaling.""" + + def test_none_context_window_returns_fallback(self) -> None: + result = compute_queen_budget(None, 4096) + assert result is FALLBACK_BUDGET + + def test_zero_context_window_returns_fallback(self) -> None: + result = compute_queen_budget(0, 4096) + assert result is FALLBACK_BUDGET + + def test_negative_context_window_returns_fallback(self) -> None: + result = compute_queen_budget(-1, 4096) + assert result is FALLBACK_BUDGET + + def test_small_window_uses_floors(self) -> None: + """8K model with 4096 reserve -> available=4096, all floors.""" + result = compute_queen_budget(8192, 4096) + assert result.system_prompt == 2000 + assert result.memory_retrieval == 1500 + assert result.project_context == 500 + assert result.operating_procedures == 400 + assert result.queen_journal == 300 + assert result.thread_context == 1500 + assert result.tool_memory == 4000 + assert result.conversation_history == 6000 + + def test_large_window_scales_up(self) -> None: + """200K model -> proportional values exceed floors.""" + result = compute_queen_budget(200_000, 4096) + # available = 200_000 - 4096 = 195904 + # conversation_history = 28% of 195904 = 54853 + assert result.conversation_history >= 54000 + assert result.conversation_history > 6000 + # thread_context = 13% of 195904 = 25467 + assert result.thread_context >= 25000 + assert result.thread_context > 1500 + + def test_no_regression_guarantee(self) -> None: + """Every slot must use max(fallback, proportional).""" + result = compute_queen_budget(32768, 4096) + assert result.system_prompt >= 2000 + assert result.memory_retrieval >= 1500 + assert result.project_context >= 500 + assert result.operating_procedures >= 400 + assert result.queen_journal >= 300 + assert result.thread_context >= 1500 + assert result.tool_memory >= 4000 + assert result.conversation_history >= 6000 + + def test_output_reserve_exceeds_window(self) -> None: + """When reserve >= window, available=0 -> fallback.""" + result = compute_queen_budget(4096, 4096) + assert result is FALLBACK_BUDGET + + def test_32k_model_example(self) -> None: + """ADR-051 example: 32K model, 4096 reserve.""" + result = compute_queen_budget(32768, 4096) + available = 32768 - 4096 # 28672 + # conversation_history = 28% of 28672 = 8028 + assert result.conversation_history == max( + 6000, int(available * 0.28), + ) + # thread_context = 13% of 28672 = 3727 + assert result.thread_context == max( + 1500, int(available * 0.13), + ) + # tool_memory = 9% of 28672 = 2580 < 4000 floor + assert result.tool_memory == 4000 diff --git a/tests/unit/surface/test_queen_runtime.py b/tests/unit/surface/test_queen_runtime.py index 2afe804..2ac5799 100644 --- a/tests/unit/surface/test_queen_runtime.py +++ b/tests/unit/surface/test_queen_runtime.py @@ -1285,7 +1285,7 @@ async def test_tool_memory_collected(self) -> None: runtime.llm_router.complete = AsyncMock( side_effect=[llm_resp_with_tools, llm_final], ) - runtime.retrieve_relevant_memory = AsyncMock(return_value="") + runtime.retrieve_relevant_memory = AsyncMock(return_value=("", [])) queen = QueenAgent(runtime) # Mock the tool dispatcher to return a result diff --git a/tests/unit/surface/test_queen_runtime_intent_fallback.py b/tests/unit/surface/test_queen_runtime_intent_fallback.py index 248a27e..1aa8157 100644 --- a/tests/unit/surface/test_queen_runtime_intent_fallback.py +++ b/tests/unit/surface/test_queen_runtime_intent_fallback.py @@ -28,7 +28,7 @@ def _make_runtime_with_thread(thread: object, response: LLMResponse) -> MagicMoc runtime.settings.governance.max_redirects_per_colony = 1 runtime.settings.routing.tau_threshold = 0.5 runtime.vector_store = None - runtime.retrieve_relevant_memory = AsyncMock(return_value="") + runtime.retrieve_relevant_memory = AsyncMock(return_value=("", [])) return runtime diff --git a/tests/unit/surface/test_session_continuity.py b/tests/unit/surface/test_session_continuity.py new file mode 100644 index 0000000..69c6507 --- /dev/null +++ b/tests/unit/surface/test_session_continuity.py @@ -0,0 +1,140 @@ +"""Tests for Wave 68 session continuity — summary emission and injection.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +from formicos.surface.queen_runtime import QueenAgent + + +def _make_thread( + *, + name: str = "Test Thread", + status: str = "active", + queen_messages: list[SimpleNamespace] | None = None, + colony_count: int = 3, + completed_count: int = 2, + failed_count: int = 1, + workflow_steps: list[dict[str, str]] | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + name=name, + status=status, + goal="Build a thing", + queen_messages=queen_messages or [], + colony_count=colony_count, + completed_colony_count=completed_count, + failed_colony_count=failed_count, + workflow_steps=workflow_steps or [], + expected_outputs=[], + artifact_types_produced={}, + ) + + +def _make_runtime(tmp_path: Path) -> MagicMock: + runtime = MagicMock() + runtime.settings.system.data_dir = str(tmp_path) + return runtime + + +class TestEmitSessionSummary: + def test_emit_session_summary_writes_file( + self, tmp_path: Path, + ) -> None: + runtime = _make_runtime(tmp_path) + thread = _make_thread( + queen_messages=[ + SimpleNamespace( + role="queen", + content="I recommend option A for this task.", + timestamp="2026-03-25T12:00:00+00:00", + ), + SimpleNamespace( + role="operator", + content="Go ahead", + timestamp="2026-03-25T12:01:00+00:00", + ), + SimpleNamespace( + role="queen", + content="Colony spawned for auth module.", + timestamp="2026-03-25T12:02:00+00:00", + ), + ], + workflow_steps=[ + {"status": "completed", "description": "Step 1"}, + {"status": "pending", "description": "Step 2"}, + ], + ) + runtime.projections.get_thread.return_value = thread + + agent = QueenAgent(runtime) + agent.emit_session_summary("ws-1", "thr-1") + + session_path = ( + tmp_path / ".formicos" / "sessions" / "thr-1.md" + ) + assert session_path.is_file() + content = session_path.read_text(encoding="utf-8") + assert "# Session Summary: Test Thread" in content + assert "**Status:** active" in content + assert "2 completed, 1 failed, 3 total" in content + assert "1 steps completed, 1 pending" in content + assert "## Recent Queen Activity" in content + assert "I recommend option A" in content + assert "Colony spawned" in content + + def test_emit_session_summary_includes_plan_state( + self, tmp_path: Path, + ) -> None: + runtime = _make_runtime(tmp_path) + thread = _make_thread() + runtime.projections.get_thread.return_value = thread + + # Write a plan file + plan_dir = tmp_path / ".formicos" / "plans" + plan_dir.mkdir(parents=True, exist_ok=True) + (plan_dir / "thr-1.md").write_text( + "# Plan: Build CSV parser\n\n## Steps\n" + "- [0] [completed] Parse headers\n", + encoding="utf-8", + ) + + agent = QueenAgent(runtime) + agent.emit_session_summary("ws-1", "thr-1") + + session_path = ( + tmp_path / ".formicos" / "sessions" / "thr-1.md" + ) + content = session_path.read_text(encoding="utf-8") + assert "## Active Plan" in content + assert "Plan: Build CSV parser" in content + + def test_emit_session_summary_no_thread( + self, tmp_path: Path, + ) -> None: + runtime = _make_runtime(tmp_path) + runtime.projections.get_thread.return_value = None + + agent = QueenAgent(runtime) + agent.emit_session_summary("ws-1", "thr-none") + + session_dir = tmp_path / ".formicos" / "sessions" + assert not session_dir.exists() + + def test_session_injection_caps_at_4000_chars( + self, tmp_path: Path, + ) -> None: + """Session file content is truncated to 4000 chars when injected.""" + # Write an oversized session file + session_dir = tmp_path / ".formicos" / "sessions" + session_dir.mkdir(parents=True, exist_ok=True) + (session_dir / "thr-big.md").write_text( + "X" * 8000, encoding="utf-8", + ) + + # Verify the file read + truncation logic + session_path = session_dir / "thr-big.md" + text = session_path.read_text(encoding="utf-8")[:4000] + assert len(text) == 4000 diff --git a/tests/unit/surface/test_two_pass_retrieval.py b/tests/unit/surface/test_two_pass_retrieval.py new file mode 100644 index 0000000..72da5c1 --- /dev/null +++ b/tests/unit/surface/test_two_pass_retrieval.py @@ -0,0 +1,426 @@ +"""Tests for Wave 67.5 — Two-Pass Retrieval with Personalized PageRank (ADR-050).""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from formicos.adapters.knowledge_graph import KnowledgeGraphAdapter +from formicos.surface.knowledge_catalog import KnowledgeCatalog + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_INSERT_NODE = ( + "INSERT INTO kg_nodes" + " (id, name, entity_type, summary, workspace_id)" + " VALUES (?, ?, ?, ?, ?)" +) +_INSERT_NODE_SHORT = ( + "INSERT INTO kg_nodes" + " (id, name, entity_type, workspace_id)" + " VALUES (?, ?, ?, ?)" +) +_INSERT_EDGE = ( + "INSERT INTO kg_edges" + " (id, from_node, to_node, predicate, workspace_id)" + " VALUES (?, ?, ?, ?, ?)" +) + + +def _make_kg_adapter( + *, + entities: list[dict[str, Any]] | None = None, + edges: dict[str, list[dict[str, Any]]] | None = None, +) -> MagicMock: + """Build a mock KnowledgeGraphAdapter with controllable behavior.""" + adapter = MagicMock(spec=KnowledgeGraphAdapter) + _edges = edges or {} + + async def _match( + query: str, workspace_id: str, *, limit: int = 5, + ) -> list[dict[str, Any]]: + return (entities or [])[:limit] + + adapter.match_entities_by_embedding = AsyncMock(side_effect=_match) + + async def _get_neighbors( + entity_id: str, depth: int = 1, + workspace_id: str | None = None, + *, include_invalidated: bool = False, + valid_before: str | None = None, + ) -> list[dict[str, Any]]: + return _edges.get(entity_id, []) + + adapter.get_neighbors = AsyncMock(side_effect=_get_neighbors) + + async def _ppr( + seed_ids: list[str], workspace_id: str, + *, damping: float = 0.5, iterations: int = 20, + ) -> dict[str, float]: + result: dict[str, float] = {} + for sid in seed_ids: + result[sid] = 1.0 + for nbr in _edges.get(sid, []): + other = ( + nbr["to_node"] + if nbr["from_node"] == sid + else nbr["from_node"] + ) + result.setdefault(other, 0.5) + max_s = max(result.values()) if result else 1.0 + if max_s > 0: + result = {k: v / max_s for k, v in result.items()} + return result + + adapter.personalized_pagerank = AsyncMock(side_effect=_ppr) + return adapter + + +def _make_projections( + *, + entries: dict[str, dict[str, Any]] | None = None, + entry_kg_nodes: dict[str, str] | None = None, +) -> MagicMock: + """Build a mock ProjectionStore.""" + proj = MagicMock() + proj.memory_entries = entries or {} + proj.entry_kg_nodes = entry_kg_nodes or {} + proj.cooccurrence_weights = {} + proj.workspace_configs = {} + return proj + + +def _make_memory_store( + results: list[dict[str, Any]] | None = None, +) -> MagicMock: + """Build a mock MemoryStore.""" + store = MagicMock() + + async def _search(**kwargs: Any) -> list[dict[str, Any]]: + return results or [] + + store.search = AsyncMock(side_effect=_search) + return store + + +def _make_catalog( + *, + memory_results: list[dict[str, Any]] | None = None, + kg_entities: list[dict[str, Any]] | None = None, + kg_edges: dict[str, list[dict[str, Any]]] | None = None, + entries: dict[str, dict[str, Any]] | None = None, + entry_kg_nodes: dict[str, str] | None = None, +) -> KnowledgeCatalog: + """Build a KnowledgeCatalog with mocked dependencies.""" + ms = _make_memory_store(memory_results) + proj = _make_projections( + entries=entries, entry_kg_nodes=entry_kg_nodes, + ) + kg = _make_kg_adapter(entities=kg_entities, edges=kg_edges) + return KnowledgeCatalog( + memory_store=ms, + vector_port=None, + skill_collection="test", + projections=proj, + kg_adapter=kg, + ) + + +def _mock_institutional( + mem_results: list[dict[str, Any]], +) -> AsyncMock: + """Return an AsyncMock for _search_institutional.""" + from formicos.surface.knowledge_catalog import ( # noqa: PLC0415 + _normalize_institutional, + ) + + async def _impl( + *args: Any, **kwargs: Any, + ) -> list[dict[str, Any]]: + return [ + _normalize_institutional(r, score=float(r.get("score", 0))) + for r in mem_results + ] + + return AsyncMock(side_effect=_impl) + + +# --------------------------------------------------------------------------- +# Step 1: match_entities_by_embedding +# --------------------------------------------------------------------------- + + +class TestMatchEntitiesByEmbedding: + """ADR-050 D2: entity matching for PPR seeding.""" + + @pytest.mark.asyncio + async def test_returns_semantically_relevant( + self, tmp_path: Any, + ) -> None: + """Embedding path sorts by cosine similarity.""" + db_path = tmp_path / "test.db" + + def sync_embed(texts: list[str]) -> list[list[float]]: + vecs = [] + for t in texts: + if "auth" in t.lower(): + vecs.append([0.9, 0.44]) + elif "logging" in t.lower(): + vecs.append([0.1, 0.99]) + else: + vecs.append([1.0, 0.0]) + return vecs + + kg = KnowledgeGraphAdapter(db_path, embed_fn=sync_embed) + await kg._ensure_db() + db = kg._db + assert db is not None + await db.execute( + _INSERT_NODE, + ["e1", "AuthMiddleware", "MODULE", "auth handler", "ws1"], + ) + await db.execute( + _INSERT_NODE, + ["e2", "LoggingService", "MODULE", "logging infra", "ws1"], + ) + await db.commit() + + results = await kg.match_entities_by_embedding( + "auth validation", "ws1", + ) + assert len(results) == 2 + assert results[0]["id"] == "e1" + assert results[0]["score"] > results[1]["score"] + await kg.close() + + @pytest.mark.asyncio + async def test_falls_back_to_substring( + self, tmp_path: Any, + ) -> None: + """Without embedding function, falls back to substring match.""" + db_path = tmp_path / "test.db" + kg = KnowledgeGraphAdapter(db_path) + await kg._ensure_db() + db = kg._db + assert db is not None + await db.execute( + _INSERT_NODE, + ["e1", "auth", "CONCEPT", "authentication", "ws1"], + ) + await db.execute( + _INSERT_NODE, + ["e2", "logging", "CONCEPT", "log system", "ws1"], + ) + await db.commit() + + results = await kg.match_entities_by_embedding( + "check auth handler", "ws1", + ) + assert len(results) == 1 + assert results[0]["id"] == "e1" + await kg.close() + + +# --------------------------------------------------------------------------- +# Step 2: Personalized PageRank +# --------------------------------------------------------------------------- + + +class TestPersonalizedPageRank: + """ADR-050 D1: PPR replaces BFS with hop-decay.""" + + @pytest.mark.asyncio + async def test_seed_nodes_highest(self, tmp_path: Any) -> None: + """Seed nodes should have the highest PPR score.""" + db_path = tmp_path / "test.db" + kg = KnowledgeGraphAdapter(db_path) + await kg._ensure_db() + db = kg._db + assert db is not None + + for eid, name in [("e1", "A"), ("e2", "B"), ("e3", "C")]: + await db.execute( + _INSERT_NODE_SHORT, [eid, name, "CONCEPT", "ws1"], + ) + await db.execute( + _INSERT_EDGE, + ["edge1", "e1", "e2", "RELATED_TO", "ws1"], + ) + await db.execute( + _INSERT_EDGE, + ["edge2", "e2", "e3", "RELATED_TO", "ws1"], + ) + await db.commit() + + scores = await kg.personalized_pagerank(["e1"], "ws1") + assert scores.get("e1", 0.0) == pytest.approx(1.0) + assert scores.get("e2", 0.0) > scores.get("e3", 0.0) + for v in scores.values(): + assert 0.0 <= v <= 1.0 + await kg.close() + + @pytest.mark.asyncio + async def test_empty_seeds_returns_empty( + self, tmp_path: Any, + ) -> None: + db_path = tmp_path / "test.db" + kg = KnowledgeGraphAdapter(db_path) + scores = await kg.personalized_pagerank([], "ws1") + assert scores == {} + await kg.close() + + +# --------------------------------------------------------------------------- +# Step 4: _search_vector populates graph proximity +# --------------------------------------------------------------------------- + + +class TestSearchVectorGraphProximity: + """Wave 67.5: standard retrieval path gets real graph proximity.""" + + @pytest.mark.asyncio + async def test_populates_graph_proximity(self) -> None: + """Non-thread results carry non-zero _graph_proximity.""" + mem_results = [ + { + "id": "entry1", "entry_type": "skill", + "status": "verified", "confidence": 0.8, + "title": "Auth patterns", "summary": "JWT auth", + "content": "...", "source_colony_id": "c1", + "domains": ["auth"], + "created_at": "2026-01-01T00:00:00+00:00", + "conf_alpha": 10.0, "conf_beta": 2.0, "score": 0.9, + }, + ] + catalog = _make_catalog( + memory_results=mem_results, + kg_entities=[{ + "id": "kg1", "name": "AuthMiddleware", + "entity_type": "MODULE", "score": 0.95, + }], + entry_kg_nodes={"entry1": "kg1"}, + ) + catalog._search_institutional = _mock_institutional(mem_results) # type: ignore[method-assign] + + async def _mock_gs( + query: str, workspace_id: str, + ) -> dict[str, float]: + return {"entry1": 0.85} + + catalog._compute_graph_scores = AsyncMock(side_effect=_mock_gs) # type: ignore[method-assign] + + results = await catalog.search( + "auth validation", workspace_id="ws1", + ) + assert len(results) > 0 + assert results[0].get("_graph_proximity", 0.0) == pytest.approx( + 0.85, + ) + + @pytest.mark.asyncio + async def test_emits_score_breakdown_parity(self) -> None: + """Standard path results include _score_breakdown.""" + mem_results = [ + { + "id": "entry1", "entry_type": "skill", + "status": "verified", "confidence": 0.8, + "title": "Test", "summary": "test", "content": "...", + "source_colony_id": "c1", "domains": [], + "created_at": "2026-01-01T00:00:00+00:00", + "conf_alpha": 10.0, "conf_beta": 2.0, "score": 0.9, + }, + ] + catalog = _make_catalog(memory_results=mem_results) + catalog._search_institutional = _mock_institutional(mem_results) # type: ignore[method-assign] + + async def _mock_gs( + query: str, workspace_id: str, + ) -> dict[str, float]: + return {"entry1": 0.42} + + catalog._compute_graph_scores = AsyncMock(side_effect=_mock_gs) # type: ignore[method-assign] + + results = await catalog.search("test query", workspace_id="ws1") + assert len(results) > 0 + breakdown = results[0].get("_score_breakdown") + assert breakdown is not None + assert breakdown["graph_proximity"] == pytest.approx(0.42) + assert "semantic" in breakdown + assert "thompson" in breakdown + assert "weights" in breakdown + + +# --------------------------------------------------------------------------- +# Step 5: thread-boosted uses shared graph enrichment +# --------------------------------------------------------------------------- + + +class TestSearchThreadBoostedSharedHelper: + """Wave 67.5: thread path uses shared _enrich_with_graph_scores.""" + + @pytest.mark.asyncio + async def test_uses_shared_graph_enrichment(self) -> None: + """Thread retrieval calls _enrich_with_graph_scores.""" + mem_results = [ + { + "id": "entry1", "entry_type": "skill", + "status": "verified", "confidence": 0.8, + "title": "Auth", "summary": "auth patterns", + "content": "...", "source_colony_id": "c1", + "domains": ["auth"], + "created_at": "2026-01-01T00:00:00+00:00", + "conf_alpha": 10.0, "conf_beta": 2.0, "score": 0.9, + }, + ] + catalog = _make_catalog( + memory_results=mem_results, + entry_kg_nodes={"entry1": "kg1"}, + entries={"entry1": mem_results[0]}, + ) + catalog._search_institutional = _mock_institutional(mem_results) # type: ignore[method-assign] + + original_enrich = catalog._enrich_with_graph_scores + catalog._enrich_with_graph_scores = AsyncMock( # type: ignore[method-assign] + side_effect=original_enrich, + ) + + await catalog.search( + "auth validation", + workspace_id="ws1", + thread_id="thread1", + ) + + catalog._enrich_with_graph_scores.assert_awaited() # type: ignore[union-attr] + + +# --------------------------------------------------------------------------- +# Graceful degradation +# --------------------------------------------------------------------------- + + +class TestGracefulDegradation: + """Graph scoring degrades gracefully when KG adapter is unavailable.""" + + @pytest.mark.asyncio + async def test_no_kg_adapter_returns_empty(self) -> None: + """Without KG adapter, _compute_graph_scores returns {}.""" + catalog = KnowledgeCatalog( + memory_store=None, + vector_port=None, + skill_collection="test", + projections=None, + kg_adapter=None, + ) + scores = await catalog._compute_graph_scores("test query", "ws1") + assert scores == {} + + @pytest.mark.asyncio + async def test_enrich_with_no_seeds_returns_empty(self) -> None: + """_enrich_with_graph_scores with empty seeds returns {}.""" + catalog = _make_catalog() + scores = await catalog._enrich_with_graph_scores([], "ws1") + assert scores == {} diff --git a/tests/unit/surface/test_unified_search.py b/tests/unit/surface/test_unified_search.py new file mode 100644 index 0000000..21922e1 --- /dev/null +++ b/tests/unit/surface/test_unified_search.py @@ -0,0 +1,283 @@ +"""Tests for Wave 69 Track 6: Unified search endpoint.""" + +from __future__ import annotations + +import asyncio +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from starlette.testclient import TestClient + +from formicos.surface.routes.knowledge_api import ( + _parse_addon_results, + routes, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +WS_ID = "ws-test-1" + + +def _make_app( + *, + catalog_results: list[dict[str, Any]] | None = None, + addon_manifests: list[Any] | None = None, + addon_registrations: list[Any] | None = None, +) -> Any: + """Build a minimal Starlette app with unified search wired up.""" + from starlette.applications import Starlette + from starlette.routing import Route + + catalog = None + if catalog_results is not None: + catalog = AsyncMock() + catalog.search = AsyncMock(return_value=catalog_results) + + route_list = routes( + knowledge_catalog=catalog, + runtime=None, + projections=None, + ) + app = Starlette(routes=route_list) + app.state.addon_manifests = addon_manifests or [] # type: ignore[attr-defined] + app.state.addon_registrations = addon_registrations or [] # type: ignore[attr-defined] + return app + + +def _make_manifest( + name: str = "docs-index", + description: str = "Documentation index", + content_kinds: list[str] | None = None, + path_globs: list[str] | None = None, + search_tool: str = "semantic_search_docs", + tools: list[Any] | None = None, +) -> Any: + """Build a fake addon manifest.""" + m = MagicMock() + m.name = name + m.description = description + m.content_kinds = content_kinds or ["documentation"] + m.path_globs = path_globs or ["**/*.md"] + m.search_tool = search_tool + + if tools is None: + tool = MagicMock() + tool.name = search_tool + tool.handler = "search.py::handle_semantic_search" + m.tools = [tool] + else: + m.tools = tools + return m + + +def _make_registration(manifest: Any) -> Any: + """Build a fake addon registration.""" + reg = MagicMock() + reg.manifest = manifest + reg.runtime_context = {"vector_port": MagicMock()} + return reg + + +# --------------------------------------------------------------------------- +# Tests: Memory results shape +# --------------------------------------------------------------------------- + + +class TestMemoryResults: + def test_memory_results_correct_shape(self) -> None: + memory_items = [ + { + "id": "entry-1", + "title": "Python best practices", + "summary": "Use type hints and docstrings.", + "score": 0.85, + "confidence": 0.72, + "status": "verified", + "domains": ["python", "testing"], + "sub_type": "convention", + }, + { + "id": "entry-2", + "title": "Git workflow", + "summary": "Use feature branches.", + "score": 0.65, + "confidence": 0.55, + "status": "candidate", + "domains": ["git"], + "sub_type": "learning", + }, + ] + app = _make_app(catalog_results=memory_items) + client = TestClient(app) + resp = client.get( + f"/api/v1/workspaces/{WS_ID}/search?q=python", + ) + assert resp.status_code == 200 + data = resp.json() + assert "results" in data + assert "total" in data + assert data["total"] >= 2 + + # Check first result shape + r = data["results"][0] + assert r["source"] == "memory" + assert r["source_label"] == "Institutional Memory" + assert r["id"] == "entry-1" + assert r["title"] == "Python best practices" + assert len(r["snippet"]) <= 200 + assert "metadata" in r + assert r["metadata"]["confidence"] == 0.72 + assert r["metadata"]["status"] == "verified" + assert "python" in r["metadata"]["domains"] + + def test_sources_memory_only_skips_addons(self) -> None: + """?sources=memory should not call addon handlers.""" + manifest = _make_manifest() + reg = _make_registration(manifest) + + app = _make_app( + catalog_results=[{"id": "e1", "title": "t", "score": 0.5}], + addon_manifests=[manifest], + addon_registrations=[reg], + ) + client = TestClient(app) + resp = client.get( + f"/api/v1/workspaces/{WS_ID}/search?q=test&sources=memory", + ) + assert resp.status_code == 200 + data = resp.json() + # Should have memory results only + sources = {r["source"] for r in data["results"]} + assert sources == {"memory"} + + +# --------------------------------------------------------------------------- +# Tests: Addon markdown parsing +# --------------------------------------------------------------------------- + + +class TestAddonMarkdownParsing: + def test_parse_code_block_results(self) -> None: + """Markdown with bold path + code block is parsed correctly.""" + raw = ( + "**src/main.py:10-25** (score: 0.832)\n" + "```python\n" + "def hello():\n" + " return 'world'\n" + "```\n" + "\n" + "**src/utils.py:5-8** (score: 0.710)\n" + "```python\n" + "import os\n" + "```" + ) + results = _parse_addon_results( + raw, "codebase-index", "Code search", + ["source_code"], 10, + ) + assert len(results) == 2 + assert results[0]["source"] == "codebase-index" + assert results[0]["title"] == "src/main.py:10-25" + assert results[0]["score"] == 0.832 + assert results[0]["metadata"]["file_path"] == "src/main.py" + assert results[0]["metadata"]["line_range"] == "10-25" + assert "hello" in results[0]["snippet"] + + def test_parse_limit_respected(self) -> None: + raw = "\n\n".join( + f"**file{i}.py** (score: 0.5)\ncontent {i}" + for i in range(20) + ) + results = _parse_addon_results( + raw, "test", "Test", [], 5, + ) + assert len(results) <= 5 + + +# --------------------------------------------------------------------------- +# Tests: Addon handler failure resilience +# --------------------------------------------------------------------------- + + +class TestAddonFailureResilience: + @patch( + "formicos.surface.addon_loader._resolve_handler", + ) + def test_addon_handler_raises_memory_still_returned( + self, mock_resolve: MagicMock, + ) -> None: + """When addon handler raises, memory results are still returned.""" + async def _failing_handler(*args: Any, **kwargs: Any) -> str: + msg = "index not available" + raise RuntimeError(msg) + + mock_resolve.return_value = _failing_handler + + manifest = _make_manifest() + reg = _make_registration(manifest) + + app = _make_app( + catalog_results=[ + {"id": "e1", "title": "good entry", "score": 0.9}, + ], + addon_manifests=[manifest], + addon_registrations=[reg], + ) + client = TestClient(app) + resp = client.get( + f"/api/v1/workspaces/{WS_ID}/search?q=test", + ) + assert resp.status_code == 200 + data = resp.json() + # Memory results survived even though addon failed + assert data["total"] >= 1 + assert any(r["source"] == "memory" for r in data["results"]) + + +# --------------------------------------------------------------------------- +# Tests: Source grouping +# --------------------------------------------------------------------------- + + +class TestSourceGrouping: + def test_results_grouped_by_source(self) -> None: + """Memory results come first, then addon groups.""" + memory_items = [ + {"id": "m1", "title": "mem", "score": 0.5}, + {"id": "m2", "title": "mem2", "score": 0.3}, + ] + app = _make_app(catalog_results=memory_items) + client = TestClient(app) + resp = client.get( + f"/api/v1/workspaces/{WS_ID}/search?q=test", + ) + data = resp.json() + # All memory results should appear before any addon results + sources = [r["source"] for r in data["results"]] + memory_indices = [i for i, s in enumerate(sources) if s == "memory"] + other_indices = [i for i, s in enumerate(sources) if s != "memory"] + if memory_indices and other_indices: + assert max(memory_indices) < min(other_indices) + + +# --------------------------------------------------------------------------- +# Tests: Missing query +# --------------------------------------------------------------------------- + + +class TestMissingQuery: + def test_empty_query_returns_error(self) -> None: + app = _make_app(catalog_results=[]) + client = TestClient(app) + resp = client.get(f"/api/v1/workspaces/{WS_ID}/search?q=") + assert resp.status_code >= 400 + + def test_no_q_param_returns_error(self) -> None: + app = _make_app(catalog_results=[]) + client = TestClient(app) + resp = client.get(f"/api/v1/workspaces/{WS_ID}/search") + assert resp.status_code >= 400 diff --git a/tests/unit/surface/test_wave65_queen_agency.py b/tests/unit/surface/test_wave65_queen_agency.py index 56ff6ac..d35aded 100644 --- a/tests/unit/surface/test_wave65_queen_agency.py +++ b/tests/unit/surface/test_wave65_queen_agency.py @@ -240,15 +240,29 @@ def test_draft_document_prepend_mode(self, tmp_path: Path) -> None: class TestListAddons: def test_list_addons_shows_addon_tools(self) -> None: - """list_addons reports registered addon tool specs.""" + """list_addons reports registered addon manifests with capabilities.""" + from formicos.surface.addon_loader import AddonManifest, AddonToolSpec + runtime = _make_runtime() dispatcher = QueenToolDispatcher(runtime) - dispatcher._addon_tool_specs = [ # pyright: ignore[reportPrivateUsage] - {"name": "semantic_search", "description": "Search code"}, + dispatcher._addon_manifests = [ # pyright: ignore[reportPrivateUsage] + AddonManifest( + name="codebase-index", + description="Search code", + content_kinds=["source_code"], + search_tool="semantic_search", + tools=[ + AddonToolSpec( + name="semantic_search", + description="Search code", + handler="search.py::handle", + ), + ], + ), ] result, meta = dispatcher._list_addons() # pyright: ignore[reportPrivateUsage] assert "semantic_search" in result - assert "Addon Tools (1)" in result + assert "codebase-index" in result assert "Search code" in result diff --git a/tests/unit/surface/test_workflow_learning.py b/tests/unit/surface/test_workflow_learning.py new file mode 100644 index 0000000..ad0d090 --- /dev/null +++ b/tests/unit/surface/test_workflow_learning.py @@ -0,0 +1,208 @@ +"""Tests for workflow_learning — Track 8 (patterns) and Track 9 (procedures).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from pathlib import Path + +import pytest + +from formicos.surface.workflow_learning import ( + _MIN_BEHAVIOR_COUNT, + _MIN_DISTINCT_THREADS, + _MIN_SUCCESS_COUNT, + detect_operator_patterns, + extract_workflow_patterns, +) + + +@pytest.fixture() +def data_dir(tmp_path: Path) -> str: + return str(tmp_path) + + +WS = "ws-test" + + +def _outcome( + *, + succeeded: bool = True, + strategy: str = "stigmergic", + castes: dict[str, int] | None = None, + total_cost: float = 0.01, + thread_id: str = "", + colony_id: str = "col-1", +) -> dict[str, Any]: + return { + "succeeded": succeeded, + "strategy": strategy, + "caste_composition": castes or {"coder": 1}, + "total_cost": total_cost, + "thread_id": thread_id, + "colony_id": colony_id, + } + + +# ── Track 8: Workflow pattern recognition ── + + +class TestExtractWorkflowPatterns: + def test_no_outcomes_returns_empty(self, data_dir: str) -> None: + assert extract_workflow_patterns(data_dir, WS, []) == [] + + def test_below_threshold_returns_empty(self, data_dir: str) -> None: + outcomes = [ + _outcome(thread_id="t1"), + _outcome(thread_id="t2"), + ] + assert len(outcomes) < _MIN_SUCCESS_COUNT + assert extract_workflow_patterns(data_dir, WS, outcomes) == [] + + def test_single_thread_returns_empty(self, data_dir: str) -> None: + """Even with enough count, need distinct threads.""" + outcomes = [ + _outcome(thread_id="t1") for _ in range(_MIN_SUCCESS_COUNT) + ] + assert extract_workflow_patterns(data_dir, WS, outcomes) == [] + + def test_successful_pattern_proposed(self, data_dir: str) -> None: + outcomes = [] + for i in range(_MIN_SUCCESS_COUNT): + tid = f"t{i % _MIN_DISTINCT_THREADS}" + outcomes.append(_outcome(thread_id=tid, colony_id=f"col-{i}")) + + proposals = extract_workflow_patterns(data_dir, WS, outcomes) + assert len(proposals) == 1 + p = proposals[0] + assert p["kind"] == "workflow_template" + assert p["payload"]["strategy"] == "stigmergic" + assert "coder" in p["payload"]["castes"] + + def test_failed_outcomes_ignored(self, data_dir: str) -> None: + outcomes = [ + _outcome(succeeded=False, thread_id=f"t{i % 2}", colony_id=f"col-{i}") + for i in range(_MIN_SUCCESS_COUNT + 1) + ] + assert extract_workflow_patterns(data_dir, WS, outcomes) == [] + + def test_deduplicates_against_existing_templates(self, data_dir: str) -> None: + outcomes = [ + _outcome(thread_id=f"t{i % _MIN_DISTINCT_THREADS}", colony_id=f"col-{i}") + for i in range(_MIN_SUCCESS_COUNT) + ] + + class FakeTemplate: + strategy = "stigmergic" + castes = ["coder"] + + templates = [FakeTemplate()] + proposals = extract_workflow_patterns(data_dir, WS, outcomes, existing_templates=templates) + assert proposals == [] + + def test_deduplicates_against_pending_actions(self, data_dir: str) -> None: + # First call creates the proposal + outcomes = [ + _outcome(thread_id=f"t{i % _MIN_DISTINCT_THREADS}", colony_id=f"col-{i}") + for i in range(_MIN_SUCCESS_COUNT) + ] + first = extract_workflow_patterns(data_dir, WS, outcomes) + assert len(first) == 1 + + # Second call with same data should not duplicate + second = extract_workflow_patterns(data_dir, WS, outcomes) + assert second == [] + + def test_empty_data_dir_returns_empty(self) -> None: + assert extract_workflow_patterns("", WS, [_outcome()]) == [] + + def test_empty_workspace_returns_empty(self, data_dir: str) -> None: + assert extract_workflow_patterns(data_dir, "", [_outcome()]) == [] + + def test_multiple_distinct_patterns(self, data_dir: str) -> None: + outcomes = [] + for i in range(_MIN_SUCCESS_COUNT): + tid = f"t{i % _MIN_DISTINCT_THREADS}" + outcomes.append(_outcome( + strategy="stigmergic", castes={"coder": 1}, + thread_id=tid, colony_id=f"a-{i}", + )) + outcomes.append(_outcome( + strategy="sequential", castes={"researcher": 1}, + thread_id=tid, colony_id=f"b-{i}", + )) + + proposals = extract_workflow_patterns(data_dir, WS, outcomes) + assert len(proposals) == 2 + kinds = {p["payload"]["strategy"] for p in proposals} + assert kinds == {"stigmergic", "sequential"} + + +# ── Track 9: Procedure suggestions ── + + +def _action( + *, + kind: str = "maintenance", + status: str = "rejected", + source_category: str = "proactive_intelligence", +) -> dict[str, Any]: + return { + "action_id": "act-1", + "kind": kind, + "status": status, + "source_category": source_category, + "payload": {}, + } + + +class TestDetectOperatorPatterns: + def test_no_actions_returns_empty(self, data_dir: str) -> None: + assert detect_operator_patterns(data_dir, WS, actions=[]) == [] + + def test_below_threshold_returns_empty(self, data_dir: str) -> None: + actions = [_action() for _ in range(_MIN_BEHAVIOR_COUNT - 1)] + assert detect_operator_patterns(data_dir, WS, actions=actions) == [] + + def test_rejection_pattern_proposed(self, data_dir: str) -> None: + actions = [ + _action(status="rejected", source_category="health_check") + for _ in range(_MIN_BEHAVIOR_COUNT) + ] + proposals = detect_operator_patterns(data_dir, WS, actions=actions) + assert len(proposals) == 1 + p = proposals[0] + assert p["kind"] == "procedure_suggestion" + assert p["payload"]["pattern_type"] == "rejection" + assert p["payload"]["category"] == "health_check" + + def test_review_pattern_proposed(self, data_dir: str) -> None: + actions = [ + _action(kind="maintenance", status="approved", source_category="stale_sweep") + for _ in range(_MIN_BEHAVIOR_COUNT) + ] + proposals = detect_operator_patterns(data_dir, WS, actions=actions) + assert len(proposals) == 1 + p = proposals[0] + assert p["kind"] == "procedure_suggestion" + assert p["payload"]["pattern_type"] == "review" + + def test_deduplicates_pending_suggestions(self, data_dir: str) -> None: + actions = [ + _action(status="rejected", source_category="health_check") + for _ in range(_MIN_BEHAVIOR_COUNT) + ] + first = detect_operator_patterns(data_dir, WS, actions=actions) + assert len(first) == 1 + + # Simulate the pending suggestion by including it in actions + pending = first[0] + pending["status"] = "pending_review" + actions.append(pending) + second = detect_operator_patterns(data_dir, WS, actions=actions) + assert second == [] + + def test_empty_guards(self) -> None: + assert detect_operator_patterns("", WS) == [] + assert detect_operator_patterns("/tmp", "") == [] diff --git a/tests/unit/surface/test_workspace_taxonomy.py b/tests/unit/surface/test_workspace_taxonomy.py new file mode 100644 index 0000000..34dbb90 --- /dev/null +++ b/tests/unit/surface/test_workspace_taxonomy.py @@ -0,0 +1,197 @@ +"""Tests for Wave 68 Track 6: soft workspace taxonomy.""" + +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock + +import pytest + + +class TestSetWorkspaceTags: + @pytest.mark.asyncio + async def test_emits_config_event(self) -> None: + """set_workspace_tags emits WorkspaceConfigChanged with correct fields.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = MagicMock() + ws = MagicMock() + ws.config = {} + runtime.projections.workspaces = {"ws-1": ws} + runtime.emit_and_broadcast = AsyncMock() + + dispatcher = QueenToolDispatcher(runtime) + result_text, meta = await dispatcher._set_workspace_tags( + {"tags": ["python", "web-api"]}, + workspace_id="ws-1", + thread_id="th-1", + ) + + runtime.emit_and_broadcast.assert_called_once() + event = runtime.emit_and_broadcast.call_args[0][0] + assert event.type == "WorkspaceConfigChanged" + assert event.field == "taxonomy_tags" + assert event.workspace_id == "ws-1" + tags = json.loads(event.new_value) + assert tags == ["python", "web-api"] + assert "python" in result_text + assert "web-api" in result_text + + @pytest.mark.asyncio + async def test_tags_normalized_and_capped(self) -> None: + """Tags are lowercased, stripped, deduped, capped at 20/50chars.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = MagicMock() + ws = MagicMock() + ws.config = {} + runtime.projections.workspaces = {"ws-1": ws} + runtime.emit_and_broadcast = AsyncMock() + + dispatcher = QueenToolDispatcher(runtime) + + # Test normalization + result_text, _ = await dispatcher._set_workspace_tags( + {"tags": [" Python ", "PYTHON", "Auth", "a" * 100]}, + workspace_id="ws-1", + thread_id="th-1", + ) + + event = runtime.emit_and_broadcast.call_args[0][0] + tags = json.loads(event.new_value) + # Dedup: "python" appears once (case-insensitive) + assert tags.count("python") == 1 + assert "auth" in tags + # Long tag capped at 50 chars + long_tag = [t for t in tags if len(t) == 50] + assert len(long_tag) == 1 + + @pytest.mark.asyncio + async def test_max_20_tags(self) -> None: + """No more than 20 tags accepted.""" + from formicos.surface.queen_tools import QueenToolDispatcher + + runtime = MagicMock() + ws = MagicMock() + ws.config = {} + runtime.projections.workspaces = {"ws-1": ws} + runtime.emit_and_broadcast = AsyncMock() + + dispatcher = QueenToolDispatcher(runtime) + await dispatcher._set_workspace_tags( + {"tags": [f"tag-{i}" for i in range(30)]}, + workspace_id="ws-1", + thread_id="th-1", + ) + + event = runtime.emit_and_broadcast.call_args[0][0] + tags = json.loads(event.new_value) + assert len(tags) == 20 + + +class TestThreadContextIncludesTags: + def test_tags_injected(self) -> None: + """Thread context includes tags when workspace has them.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + ws = MagicMock() + ws.config = {"taxonomy_tags": json.dumps(["python", "auth"])} + thread = MagicMock() + thread.name = "test-thread" + thread.goal = "Build auth system" + thread.status = "active" + thread.expected_outputs = [] + thread.colony_count = 0 + thread.completed_colony_count = 0 + thread.failed_colony_count = 0 + thread.workflow_steps = [] + ws.threads = {"th-1": thread} + runtime.projections.workspaces = {"ws-1": ws} + runtime.settings.system.data_dir = "" + + agent = QueenAgent.__new__(QueenAgent) + agent._runtime = runtime + + ctx = agent._build_thread_context("th-1", "ws-1") + assert "Tags: python, auth" in ctx + + def test_no_tags_no_line(self) -> None: + """No tags line when workspace has no taxonomy_tags.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + ws = MagicMock() + ws.config = {} + thread = MagicMock() + thread.name = "test-thread" + thread.goal = "Do something" + thread.status = "active" + thread.expected_outputs = [] + thread.colony_count = 0 + thread.completed_colony_count = 0 + thread.failed_colony_count = 0 + thread.workflow_steps = [] + ws.threads = {"th-1": thread} + runtime.projections.workspaces = {"ws-1": ws} + runtime.settings.system.data_dir = "" + + agent = QueenAgent.__new__(QueenAgent) + agent._runtime = runtime + + ctx = agent._build_thread_context("th-1", "ws-1") + assert "Tags:" not in ctx + + +class TestAutoSuggestNudge: + def test_nudge_for_tagless_workspace(self) -> None: + """Tagless workspace with < 3 threads gets nudge.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + ws = MagicMock() + ws.config = {} + thread = MagicMock() + thread.name = "t1" + thread.goal = "Test" + thread.status = "active" + thread.expected_outputs = [] + thread.colony_count = 0 + thread.completed_colony_count = 0 + thread.failed_colony_count = 0 + thread.workflow_steps = [] + ws.threads = {"th-1": thread} + runtime.projections.workspaces = {"ws-1": ws} + runtime.settings.system.data_dir = "" + + agent = QueenAgent.__new__(QueenAgent) + agent._runtime = runtime + + ctx = agent._build_thread_context("th-1", "ws-1") + assert "set_workspace_tags" in ctx + + def test_no_nudge_when_tags_exist(self) -> None: + """No nudge when workspace has tags.""" + from formicos.surface.queen_runtime import QueenAgent + + runtime = MagicMock() + ws = MagicMock() + ws.config = {"taxonomy_tags": json.dumps(["python"])} + thread = MagicMock() + thread.name = "t1" + thread.goal = "Test" + thread.status = "active" + thread.expected_outputs = [] + thread.colony_count = 0 + thread.completed_colony_count = 0 + thread.failed_colony_count = 0 + thread.workflow_steps = [] + ws.threads = {"th-1": thread} + runtime.projections.workspaces = {"ws-1": ws} + runtime.settings.system.data_dir = "" + + agent = QueenAgent.__new__(QueenAgent) + agent._runtime = runtime + + ctx = agent._build_thread_context("th-1", "ws-1") + assert "set_workspace_tags" not in ctx