From 029f8a77d8c14290bf2d29506f3afee16f55acd1 Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Wed, 24 Jun 2026 23:33:55 -0500 Subject: [PATCH 01/76] feat(pianola): add Encore flag and pure classifier/policy core Pianola is a new Encore-gated autonomous manager agent that watches agent tabs, detects when an agent is awaiting the user, classifies the ask and its risk, and either auto-answers low-risk prompts from the user's rules or escalates uncertain/high-risk ones. Foundation (steps 0-1 of Plans/pianola-implementation-plan.md): - Encore flag `pianola`, off by default, end-to-end: EncoreFeatureFlags type, DEFAULT_ENCORE_FEATURES and settingsMetadata defaults, CLI encore FEATURES + aliases (auto-pilot/autopilot/pilot/manager), and a Settings -> Encore toggle. - Shared contracts in src/shared/pianola/types.ts. - Pure, I/O-free classifier: prefers a structured AwaitingInputSignal, falls back to conservative heuristics, rates risk via a focused lexicon. - Pure, safety-first policy engine: high-risk always escalates; auto-answer only on an explicit matching rule; default is escalate. - 31 classifier/policy unit tests, plus CLI alias/default coverage. Nothing consumes the flag yet, so the feature is fully inert when off. Plans/ captures the verified codebase investigation and the build plan. --- .../autonomous-manager-agent-investigation.md | 300 ++++++++++++++++++ Plans/autopilot-codebase-findings.md | 184 +++++++++++ Plans/pianola-implementation-plan.md | 76 +++++ src/__tests__/cli/commands/encore.test.ts | 17 + .../main/pianola/pianola-classifier.test.ts | 143 +++++++++ .../main/pianola/pianola-policy.test.ts | 177 +++++++++++ src/cli/commands/encore.ts | 6 + src/main/pianola/pianola-classifier.ts | 263 +++++++++++++++ src/main/pianola/pianola-policy.ts | 133 ++++++++ .../components/Settings/tabs/EncoreTab.tsx | 64 ++++ src/renderer/stores/settingsStore.ts | 1 + src/renderer/types/index.ts | 1 + src/shared/pianola/types.ts | 116 +++++++ src/shared/settingsMetadata.ts | 8 +- 14 files changed, 1488 insertions(+), 1 deletion(-) create mode 100644 Plans/autonomous-manager-agent-investigation.md create mode 100644 Plans/autopilot-codebase-findings.md create mode 100644 Plans/pianola-implementation-plan.md create mode 100644 src/__tests__/main/pianola/pianola-classifier.test.ts create mode 100644 src/__tests__/main/pianola/pianola-policy.test.ts create mode 100644 src/main/pianola/pianola-classifier.ts create mode 100644 src/main/pianola/pianola-policy.ts create mode 100644 src/shared/pianola/types.ts diff --git a/Plans/autonomous-manager-agent-investigation.md b/Plans/autonomous-manager-agent-investigation.md new file mode 100644 index 0000000000..7b3d62e3b3 --- /dev/null +++ b/Plans/autonomous-manager-agent-investigation.md @@ -0,0 +1,300 @@ +# Autonomous Manager Agent Investigation + +Date: 2026-06-24 +Branch: `feat/autonomous-manager-agent` +Worktree: `C:\Users\sydor\Software\Maestro\.worktrees\autonomous-manager-agent` + +## Goal + +Expand Maestro from an orchestration desktop app into a progressively autonomous “manager agent” that can watch agent sessions, decide when to answer/escalate, dispatch follow-up work, trigger recipes/workflows, and eventually integrate external agent/provider protocols. + +The user-provided rough order is directionally good: + +1. Autopilot watcher MVP using `maestro-cli session show` + `dispatch`. +2. Preference/decision memory store with editable rules. +3. Risk classifier + answer/escalate policy. +4. UI panel for pending escalations and past auto-answers. +5. Recipe abstraction that wraps Autopilot, Cue, and Auto Run. +6. ACP client support. +7. Generic CLI adapter generator. +8. Webhook Cue trigger. +9. Existing provider-agent import. +10. Advanced control markers and delegate/retry/spawn-worktree actions. + +This document captures the initial codebase investigation and a concrete path to implement the first milestone without prematurely redesigning everything. + +## Current building blocks + +### CLI session inspection and dispatch already exist + +Relevant files: + +- `src/cli/index.ts` +- `src/cli/commands/session.ts` +- `src/cli/commands/dispatch.ts` +- `src/cli/services/maestro-client.ts` +- `src/main/web-server/handlers/messageHandlers.ts` + +`maestro-cli session show ` is a read-only WebSocket command into the running desktop app. It returns a JSON payload with `tabId`, `sessionId`, `agentId`, `agentSessionId`, and `messages[]`. It supports: + +- `--since `: desktop-side cursor filtering. +- `--tail `: desktop-side truncation. +- `--json`: machine-readable output. + +`maestro-cli dispatch ` is the write side. It supports: + +- `--new-tab`: create a fresh AI tab and send the prompt atomically. +- `--tab `: dispatch into a known tab. +- `--force`: bypass busy-state guard when `allowConcurrentSend` is enabled. + +The desktop WebSocket handler routes these through: + +- `send_command` → `handleSendCommand()`. +- `new_ai_tab_with_prompt` → `handleNewAITabWithPrompt()`. +- `get_session_history` → `handleGetSessionHistory()`. + +This is enough for an external watcher MVP: poll `session show`, classify new messages, and call `dispatch` when safe. + +### Encore Feature gating is mandatory for Autopilot + +Autopilot/autonomous-manager behavior must be an Encore Feature from the first commit. This feature can dispatch prompts without a direct user action, so it is much closer to Maestro Cue than to a passive UI panel. It must be disabled by default and completely invisible/inert when off. + +Relevant files/docs: + +- `CLAUDE-PATTERNS.md` § “Encore Features (Feature Gating)” — canonical checklist. +- `docs/agent-guides/UI-PATTERNS.md` § “Encore Features”. +- `docs/encore-features.md` — user-facing documentation. +- `src/renderer/types/index.ts` — `EncoreFeatureFlags` interface. +- `src/renderer/stores/settingsStore.ts` — `DEFAULT_ENCORE_FEATURES` source of truth in the current codebase. +- `src/renderer/hooks/settings/useSettings.ts` — exposes `encoreFeatures` to app surfaces. +- `src/renderer/App.tsx` — reference cleanup/gating when an Encore flag is turned off. +- `src/cli/commands/encore.ts` and `src/cli/index.ts` — `maestro-cli encore list|set` support. +- `src/main/cue/cue-telemetry.ts` — reference for runtime gating through an injected `isEncoreEnabled()` predicate. + +Current Encore flags are: + +- `directorNotes` +- `usageStats` +- `symphony` +- `maestroCue` + +Autopilot should add a new flag, tentatively: + +```ts +autopilot: boolean; +``` + +Default must be `false` in `DEFAULT_ENCORE_FEATURES`. + +Gating requirements: + +1. **Type/default** — add `autopilot` to `EncoreFeatureFlags` and `DEFAULT_ENCORE_FEATURES` with `false`. +2. **Settings UI** — add an Encore Features toggle labelled something like “Autopilot / Manager Agent”. Because this feature can send messages automatically, the description should explicitly say it can auto-answer low-risk prompts and escalate uncertain/high-risk prompts. +3. **CLI management** — add `autopilot` to `src/cli/commands/encore.ts` `FEATURES` and aliases such as `manager`, `manager-agent`, and `auto-pilot`. +4. **CLI command hard gate** — every `maestro-cli autopilot ...` command must check `readSettingValue('encoreFeatures.autopilot')` before doing work. If disabled, return a clear error such as: `Autopilot is not enabled. Enable it with: maestro-cli encore set autopilot on`. +5. **Main-process hard gate** — any future in-app daemon/service must receive an `isEncoreEnabled` predicate or read settings at the boundary before starting watchers or dispatching. Treat this like Cue telemetry: read on every start/dispatch-relevant path so toggles apply live. +6. **Renderer/UI gate** — no panel, modal, right-bar item, shortcut, hamburger menu item, or command-palette entry should render unless `encoreFeatures.autopilot` is true. If the flag is turned off while surfaces/watchers are open, close/stop them like App.tsx currently does for Symphony, Usage Dashboard, and Cue. +7. **No background work when off** — do not poll `session show`, do not classify, do not write decision memory, do not record telemetry, and do not register webhook/Cue recipe runtime surfaces when disabled. +8. **Tests** — include tests for disabled behavior at the CLI/service boundary, not just hidden UI. + +Security/safety note: an Encore flag is necessary but not sufficient. Autopilot still needs per-tab/session policy, risk classification, audit logs, and user-visible controls. The flag only controls feature availability. + +### Cue is the event-driven automation engine + +Relevant files/docs: + +- `CLAUDE-CUE.md` +- `docs/agent-guides/CUE-PIPELINE.md` +- `src/main/cue/cue-engine.ts` +- `src/main/cue/cue-dispatch-service.ts` +- `src/main/cue/cue-run-manager.ts` +- `src/main/cue/triggers/*` + +Cue already provides: + +- Event sources: app startup, heartbeat, schedule, file changes, GitHub PR/issues, markdown task scanner, agent completion. +- Dispatch and fan-out/fan-in. +- SQLite journal/queue persistence. +- Concurrency gating and run lifecycle. +- Cue UI/dashboard. + +Autopilot should not be bolted directly into Cue at first. It should start as a narrow service/CLI that uses the stable session/dispatch commands. Once behavior is proven, Cue can trigger Autopilot recipes, and a `webhook.received` trigger can be added as another Cue trigger source. + +### Auto Run / Playbooks already cover task execution loops + +Relevant docs/files: + +- `docs/agent-guides/CLI-PLAYBOOKS.md` +- `src/cli/commands/run-doc.ts` +- `src/cli/services/batch-processor.ts` +- `src/cli/services/goal-runner.ts` +- `src/shared/goalDriven/*` +- `src/renderer/hooks/batch/*` + +Auto Run is already a robust execution primitive: checklist documents, goal-driven iterations, resumable CLI headless execution, history, and busy-state handling. The “recipe abstraction” should wrap this rather than replace it. + +### Preference and settings infrastructure exists, but decision memory does not + +Relevant files: + +- `src/shared/settingsMetadata.ts` +- `src/renderer/stores/settingsStore.ts` +- `src/cli/commands/settings-*` +- `src/main/ipc/handlers/settings.ts` + +Settings are a good place for simple enablement flags and default policy knobs. Decision memory should be its own storage domain because it needs audit/history semantics, editable rules, confidence, examples, and possibly per-project scoping. + +Recommended storage shape for first pass: + +- Main-process JSON or SQLite store under userData, not renderer-only state. +- `rules[]`: editable user preferences, e.g. “Always answer dependency version questions from package.json without asking me”. +- `decisions[]`: append-only observed decisions/auto-answers/escalations with timestamps and evidence. +- `scopes`: global, project root, agent/session/tab. + +### Agent/provider abstraction is mature enough for imports and adapter generation + +Relevant files/docs: + +- `AGENT_SUPPORT.md` +- `docs/agent-guides/AGENT-INFRA.md` +- `src/shared/agentIds.ts` +- `src/main/agents/definitions.ts` +- `src/main/agents/capabilities.ts` +- `src/main/storage/index.ts` +- `src/main/parsers/*` + +Adding a first-class provider still requires several coordinated edits. A generic CLI adapter generator should produce a new adapter definition plus tests/docs from a declarative spec, but that is not the first milestone. + +## Proposed architecture + +### Phase 1: External Autopilot watcher MVP + +Add a new CLI command, tentatively: + +```bash +maestro-cli autopilot watch --agent [--interval 2s] [--dry-run] [--rules ] +``` + +Before any polling begins, the command must hard-check `encoreFeatures.autopilot`. This is not just a UI feature flag; it prevents a headless CLI from running autonomous behavior on installs that have not explicitly opted in. + +Responsibilities: + +1. Poll `session show --since --json`. +2. Detect unresolved assistant questions or blocked states. +3. Classify into one of: + - `auto_answer`: safe answer can be generated from rules/static context. + - `escalate`: needs user approval/input. + - `ignore`: no actionable question. +4. For `auto_answer`, call `runDispatch(agentId, answer, { tab: tabId })` or shell out to `maestro-cli dispatch`. +5. Record every decision to a local log. + +Why CLI first: + +- Avoids renderer lifecycle/state complexity. +- Reuses the already-stable desktop WebSocket contract. +- Can be tested as a normal Node CLI/service. +- Creates a migration path for a future in-app daemon. + +Minimum classifier should be deterministic first, LLM-assisted later: + +- Identify question marks and phrases like “which would you prefer”, “should I”, “need confirmation”, “please choose”, “blocked”, “I need”. +- Ignore tool output/thinking sources unless final assistant content is asking. +- Risk label by keyword/intent: + - Low: formatting, naming, obvious convention, docs wording, non-destructive choices covered by explicit rules. + - Medium: package upgrades, test strategy, file organization, multiple plausible implementation paths. + - High: destructive changes, secrets, auth/payment/legal/security, deleting data, force push, production deploy. + +Initial policy: + +- Low + matching rule → auto-answer. +- Medium → escalate unless rule explicitly allows. +- High → always escalate. + +### Phase 2: Main-process Autopilot service and storage + +Move the watcher into the app as `src/main/autopilot/*` with IPC and CLI commands. Suggested modules: + +- `autopilot-types.ts`: shared contracts. +- `autopilot-store.ts`: persisted rules, decisions, escalations. +- `autopilot-classifier.ts`: deterministic classifier and policy engine. +- `autopilot-watcher.ts`: polling/session-history cursor logic. +- `autopilot-dispatcher.ts`: safe dispatch wrapper. +- `autopilot-ipc.ts`: renderer APIs. + +Service construction should mirror the Cue pattern: inject or provide a small `isEncoreEnabled()` function and check it on start, resume, watcher registration, and before dispatching any auto-answer. Disabling `encoreFeatures.autopilot` should stop active watchers and reject new IPC/CLI requests with a clear disabled-feature error. + +The service can initially still call the same internal callbacks behind `get_session_history`/`send_command`, then later avoid WebSocket hop entirely. + +### Phase 3: UI panel for control and audit + +Add a right-panel or modal surface showing: + +- Active watched tabs. +- Pending escalations. +- Past auto-answers. +- Rule that matched each auto-answer. +- “Approve and remember”, “Answer once”, “Edit rule”, “Disable autopilot for tab”. + +Use existing Zustand/modal patterns from: + +- `docs/agent-guides/STATE-PATTERNS.md` +- `docs/agent-guides/UI-PATTERNS.md` +- Cue modal/dashboard patterns. + +This entire surface must be gated by `encoreFeatures.autopilot`. When disabled, it should disappear from all access points rather than showing an empty/disabled shell. + +### Phase 4: Recipes as orchestration manifests + +Define a recipe as a durable YAML/JSON manifest that can reference existing primitives: + +- Autopilot watch policy. +- Cue subscriptions/triggers. +- Auto Run docs/playbooks. +- Goal-run iterations. +- Worktree spawn/delegate actions. + +Keep recipes declarative and compile them into existing engines rather than inventing another runner immediately. + +### Later phases + +1. **ACP client support**: add as a provider/protocol layer after Autopilot’s internal contracts are stable. +2. **Generic CLI adapter generator**: generate files currently listed in `AGENT_SUPPORT.md` from a manifest. +3. **Webhook Cue trigger**: add `webhook.received` trigger source under `src/main/cue/triggers/`, backed by Fastify route/token validation and Cue event dispatch. +4. **Existing provider-agent import**: map external provider configs into `SessionInfo` plus agent definitions/capabilities. +5. **Advanced control markers**: extend existing goal-driven/Auto Run marker parsing to support `delegate`, `retry`, `spawn-worktree`, and possibly `requires-human`. + +## First implementation slice recommendation + +Implement and test only the CLI MVP first: + +0. Add the `autopilot` Encore flag and expose it through Settings + `maestro-cli encore` first. +1. Add `src/cli/commands/autopilot-watch.ts` with an early disabled-feature check. +2. Add `src/cli/services/autopilot/` with: + - classifier/policy pure functions, + - cursor state, + - decision log writer, + - dispatch wrapper using existing `runDispatch()`. +3. Register `maestro-cli autopilot watch` in `src/cli/index.ts`. +4. Add unit tests for disabled-feature behavior plus classifier/policy/cursor behavior. +5. Manual validation with a running desktop app: + - verify `maestro-cli autopilot watch ...` fails while `encoreFeatures.autopilot` is off, + - enable with `maestro-cli encore set autopilot on`, + - create or identify a tab, + - run watcher in `--dry-run`, + - verify detection, + - run watcher without dry-run against a low-risk fixture/rule. + +## Open questions before coding beyond MVP + +1. Should the Encore feature be named `autopilot`, `managerAgent`, or `autonomousManager` in code? `autopilot` is short and matches the requested MVP, but “Manager Agent” may be clearer in UI. +2. Should Autopilot be per-tab opt-in only after the global Encore flag, or allow project/global defaults? +3. Should the first memory store be JSON for user-editability or SQLite for audit/querying? +4. Should auto-answers be generated by a local deterministic template, by a designated manager agent, or by the same target agent using a meta-prompt? +5. Should escalations be desktop-only initially, or expose CLI/web/mobile notifications from day one? +6. How close should “goose parity” be to goose concepts versus Maestro-native naming/UX? + +## Investigation notes + +- The main checkout had unrelated uncommitted changes, so the worktree was created from `rc` HEAD and left isolated. +- `rg` is not installed in this Windows environment; targeted PowerShell and tool-based searches were used instead. +- No source code implementation was attempted yet beyond this planning artifact. diff --git a/Plans/autopilot-codebase-findings.md b/Plans/autopilot-codebase-findings.md new file mode 100644 index 0000000000..4e43ec1709 --- /dev/null +++ b/Plans/autopilot-codebase-findings.md @@ -0,0 +1,184 @@ +# Autopilot - Verified Codebase Findings + +Date: 2026-06-24 +Companion to: `autonomous-manager-agent-investigation.md` (this file is additive, it does not replace it) +Method: six parallel read-only crawlers over `src/`, plus direct source verification of contested claims. + +> Purpose: replace the first writeup's assumptions with grep-verified facts, and pin down the +> single most maintainable, additive way to build Pianola. Where this doc and the original +> disagree, this doc wins (it was checked against source). + +## Locked decisions (2026-06-24) + +- **Name: Pianola** (the self-playing piano). Encore flag key + module dir + CLI verb all use `pianola`. + "Autopilot" appears below as the prior codename; read it as Pianola. Final rename pass before code. +- **Awaiting-input detection: structured signal, narrow scope** (option B in section 0). Build a real + per-agent marker for the unambiguous cases first; heuristics only for the long tail. +- Storage: hybrid (rules JSON, audit SQLite) - confirmed. +- Architecture: standalone `src/main/pianola/` service mirroring Cue's structure, not a Cue trigger. + +--- + +## 0. The one finding that reframes everything + +**There is no structured "agent is asking a question / awaiting input" signal in Maestro.** + +- `SessionState` in `src/renderer/types/index.ts:61` lists `'waiting_input'`, but it is a **dead enum value**: a repo-wide search for any assignment of `'waiting_input'` returns zero hits. Nothing ever sets it. +- The desktop renderer only ever distinguishes `busy` vs not. The web/CLI contract collapses further: `src/main/web-server/web-server-factory.ts:275` maps `tab.state === 'busy' ? 'busy' : 'idle'`, and both `DesktopSessionEntry.state` and `SessionHistoryResult` expose only `'idle' | 'busy'` (`src/main/web-server/types.ts:52,520`). +- `LogEntry` has `interactive?: boolean` and `options?: string[]` (`src/renderer/types/index.ts:211-212`), which is the closest thing to a "this needs an answer" marker, but we must confirm which parsers actually populate it (Claude `--print`/JSON mode likely never does, since permission prompts don't surface as structured output in non-interactive mode). + +**Consequence:** the classifier's hardest job (knowing an agent is actually blocked on the user, vs still working, vs done) has no ready-made input. We have two ways forward, and this is decision #1 below: + +- **(A) Heuristic inference** from message text + `busy→idle` transitions + idle-time thresholds. Cheap, additive, but brittle and exactly the kind of nondeterminism the codebase tries to avoid. +- **(B) Add a real signal at the parser layer** (`ParsedEvent` gains an `awaitingInput`/`question` discriminant; parsers set it; it flows through the log entry and the WebSocket history payload). More upfront work, but deterministic, reusable beyond Autopilot, and the "code before prompts / as deterministic as possible" way. + +Recommendation: **(B), scoped narrowly.** Start by detecting the unambiguous cases per-agent (Claude plan-mode confirmation, explicit `[y/n]`-style prompts, known permission strings via the existing `error-patterns.ts` regex infra) and emit a structured marker. Fall back to heuristics only for the long tail. This is the foundation; everything else is plumbing. + +--- + +## 1. CLI + desktop contract (what we can drive today) + +All verified in `src/cli/` and `src/main/web-server/handlers/messageHandlers.ts`. + +| Capability | Command / verb | Entry point | Notes | +| -------------------- | --------------------------------------------- | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | -------------- | +| Read transcript | `session show --since --tail --json` | `sessionShow()` `src/cli/commands/session.ts:163` | WS `get_session_history` -> `session_history_result`. Returns `{tabId, sessionId, agentId, agentSessionId, messages[]}`. `--since` is the poll cursor (ISO or epoch). | +| List open tabs | `session list` | `sessionList()` `src/cli/commands/session.ts:117` | WS `list_desktop_sessions` -> `desktop_sessions_list`. Each entry: `DesktopSessionEntry` (`web-server/types.ts:509`) incl. `state: 'idle' | 'busy'`, `agentSessionId`, `starred`. | +| Send to existing tab | `dispatch --tab [--force]` | `runDispatch()` `src/cli/commands/dispatch.ts:36` | WS `send_command`. `--force` requires `allowConcurrentSend` setting; busy guard at `messageHandlers.ts:870`. | +| New tab + prompt | `dispatch --new-tab` | same | WS `new_ai_tab_with_prompt` -> returns `tabId` (escalation surface). | +| Read/write settings | `encore list | set`, `settings get | set` | `src/cli/commands/encore.ts`, `storage.ts` | See section 4. | + +**Transport:** `MaestroClient` (`src/cli/services/maestro-client.ts`), `withMaestroClient()` wrapper, request/response matched by `requestId`, 10s default timeout. Discovery via `cli-server.json` in the config dir (`src/shared/cli-server-discovery.ts`); throws if app not running / PID stale. + +**`runDispatch()` is importable and returns a structured `DispatchResponse`** - the Autopilot service can call it directly rather than shelling out. This is the reusable action primitive; we do **not** need to build dispatch. + +**Gap for a watcher:** every `session show` is independent. There is no subscribe/stream for "new message arrived". MVP = poll `--since `. A later improvement is a WS subscription verb, but it is not required for v1. + +--- + +## 2. Architecture decision: standalone service, not a Cue trigger + +The Cue crawler recommended building Autopilot as a new Cue trigger type (reuse 70% of Cue). **I disagree for v1, and recommend a standalone `src/main/autopilot/` service that mirrors Cue's structure without depending on its engine.** Reasoning, grounded in what Cue actually is: + +- **Cue is project-root + YAML-config + external-event oriented.** Triggers are file/schedule/GitHub/task/completion sources defined per project in `.maestro/cue.yaml`. Autopilot is **per-tab/per-session, state-reactive, and policy+memory driven**. Forcing it into a YAML subscription model fights the grain. +- **Autopilot's "action" is a one-shot dispatch**, not a managed long-running spawned run. Cue's heavy machinery (CueRunManager concurrency gating, queue persistence, fan-in tracker, chain-depth guard, two-phase output runs) is mostly irrelevant to "answer this question in this tab". Inheriting it means inheriting its constraints and its 10 gotchas (CLAUDE-CUE.md) for little gain. +- **Coupling risk:** a Cue trigger means Autopilot can't evolve its dispatch/queue semantics without touching shared Cue code, and vice versa. The user explicitly asked for additive + maintainable + extendable. A decoupled sibling is more additive than a graft. + +**What we reuse from Cue (pattern, not code-dependency):** + +- Service-oriented decomposition: thin engine facade + focused single-responsibility services. Mirror, don't import. +- The Encore runtime-gating pattern: inject an `isEncoreEnabled()` predicate, check it on every start/dispatch path (`cue-telemetry.ts`, `cue-stats.ts:81` `isCueStatsEnabled`). +- The storage split (section 3). +- The dispatch primitive itself (`runDispatch()` from the CLI service, or the same internal `send_command` path). + +**What we leave a door open for (extensibility):** once Autopilot's internal contracts are stable, Cue can gain an `agent.awaiting` trigger that fires off the _same_ structured signal from section 0, and an Autopilot action. That is the right time to integrate, not now. + +Proposed module layout (mirrors Cue's shape): + +``` +src/main/autopilot/ + autopilot-types.ts # shared contracts (shared/ if renderer needs them) + autopilot-engine.ts # thin facade: start/stop/refresh, owns isEncoreEnabled gate + autopilot-watcher.ts # per-tab poll loop + cursor state (session show --since) + autopilot-classifier.ts # PURE functions: message[] -> {kind, risk, topic} + autopilot-policy.ts # PURE functions: (classification, rules) -> action + autopilot-dispatcher.ts # safe wrapper over runDispatch / send_command + autopilot-rules-store.ts # JSON (electron-store) - editable rules + autopilot-decisions-db.ts # SQLite - append-only audit log + autopilot-ipc.ts # renderer APIs (later phase) +``` + +Classifier and policy as **pure functions** is the key maintainability move: they are the brain, they are the part most likely to change, and they are trivially unit-testable with fixture transcripts (no app, no WS). + +--- + +## 3. Storage: hybrid (verified patterns) + +Two distinct needs, two proven patterns already in the repo: + +- **Editable rules -> JSON via `electron-store`.** Small, human-editable, version-stable. Copy the store pattern in `src/main/stores/instances.ts`; for concurrent-safe file mutation use `atomicWriteJson` + `createKeyedWriteQueue` from `src/main/utils/atomic-json-store.ts`. Path under the synced data dir so rules follow the user. +- **Append-only decision audit -> SQLite via `better-sqlite3`.** Unbounded, time-indexed, queryable. Copy `src/main/stats/stats-db.ts` (WAL mode, corruption recovery, backups) + the versioned migration system in `src/main/stats/migrations.ts` + schema-as-constants in `src/main/stats/schema.ts`. `cue-db.ts` is the lighter reference. DB file under `app.getPath('userData')`. + +Scopes (`global | project | agent-session-tab`) are **data, not schema**: store a `scope` + `scopeId` column/field and resolve applicable rules in app logic (priority-sorted). Do not model scopes as separate tables. + +Director's Notes (`src/main/ipc/handlers/director-notes.ts`) is the freshest end-to-end feature template (IPC + storage + progress streaming) if we want a recent example to copy wholesale. + +--- + +## 4. Encore gating: exact checklist (verified file paths) + +Autopilot MUST be Encore-gated, default `false`, inert when off. Trace of an existing flag end-to-end: + +1. **Type:** add `autopilot: boolean` to `EncoreFeatureFlags`, `src/renderer/types/index.ts:1064`. +2. **Defaults (KEEP IN SYNC - duplication trap):** + - `DEFAULT_ENCORE_FEATURES`, `src/renderer/stores/settingsStore.ts:210` -> `autopilot: false`. + - `SETTINGS_METADATA.encoreFeatures.default`, `src/shared/settingsMetadata.ts:1014` -> add `autopilot: false`. + - (Confirm whether `src/main/stores/defaults.ts` also mirrors this; main reads persisted settings, so only needed if a main default is referenced before first persist.) +3. **Store plumbing:** already generic - `setEncoreFeatures` persists the whole object (`settingsStore.ts:1398`). No per-flag code. Flows through `useSettings()` automatically (`src/renderer/hooks/settings/useSettings.ts:368`). +4. **Settings UI toggle:** add a section in `src/renderer/components/Settings/tabs/EncoreTab.tsx` (copy the maestroCue block). Description must state it can auto-send messages. +5. **CLI:** add `autopilot` to `FEATURES` and aliases in `src/cli/commands/encore.ts:11-31` (`autopilot`, `auto-pilot`, maybe `pilot`). +6. **CLI hard gate:** every `autopilot` CLI command checks `readSettingValue('encoreFeatures.autopilot')` first; if off, error: enable with `maestro-cli encore set autopilot on`. +7. **Main service gate:** in `src/main/index.ts` startup (pattern near the Cue start, ~line 2415), only `autopilotEngine.start()` when the flag is on; pass an `isEncoreEnabled()` predicate the engine re-reads on every poll/dispatch so toggles apply live. +8. **Renderer UI gate + cleanup:** in `App.tsx` add a cleanup effect closing any Autopilot surface when the flag flips off (mirror the Cue/Symphony effects ~line 477-490); gate menu items (`HamburgerMenuContent.tsx`), modal render (`AppStandaloneModals.tsx`), shortcuts. +9. **No background work when off:** no poll, no classify, no DB writes, no IPC work unless enabled (gate at handler entry, throw an `AutopilotDisabled` sentinel like `cue-stats.ts:110`). +10. **Tests:** assert disabled behavior at the CLI/service boundary, not just hidden UI (pattern: `src/__tests__/renderer/hooks/useCueAutoDiscovery.test.ts`). + +--- + +## 5. Classifier inputs (what's actually available) + +What the classifier can read per message (`LogEntry`, `src/renderer/types/index.ts:206`): `source` (`user|ai|thinking|tool|system|error|stdout|stderr`), `text`, `interactive?`, `options?`, `metadata.toolState.status`, `agentError`. Over the WS contract, `SessionHistoryMessage` is flattened to `{id, role, source, content, timestamp}` (`web-server/types.ts:533`) - note `interactive`/`options` are **not** in the WS payload today, so option (B) in section 0 would also mean threading those through the history serializer. + +Reusable prior art for the decision loop (do not reinvent): + +- **Error pattern matching:** `src/main/parsers/error-patterns.ts` - regex infra with `recoverable` flags and typed `AgentErrorType` (`auth_expired`, `rate_limited`, `token_exhaustion`, ...). Reuse for risk classification. +- **Goal-driven exit logic:** `src/shared/goalDriven/goalExitEvaluator.ts` `evaluateGoalExit()` + markers (``) in `goalMarkers.ts`. This is a working autonomous continue/stop decision engine - the closest existing analog to Autopilot's policy core. Study `STALL_THRESHOLD` and the priority-ordered decision. +- **Halt marker:** `detectHaltMarker()` `src/cli/services/batch-processor.ts:42`. + +Risk policy (from original doc, still sound): low+matching-rule -> auto-answer; medium -> escalate unless a rule allows; high (destructive/secrets/auth/deploy) -> always escalate. + +--- + +## 6. UI surface (later phase, patterns verified) + +- **Recommended placement:** a **Right Bar tab** for the live escalation list + quick actions (always-visible, lightweight), plus a **Modal** for the rule editor / decision-log drill-down. Right Bar tab type at `src/renderer/types/index.ts` (`RightPanelTab`), switch in `RightPanel.tsx`; modal pattern from `CueModal`. +- **IPC:** new `src/main/ipc/handlers/autopilot.ts` + `src/main/preload/autopilot.ts`, registered in the respective index files; copy `autorun.ts`. Main->renderer escalation pushes via `createSafeSend`. +- **Store:** `src/renderer/stores/autopilotStore.ts` (Zustand, copy `batchStore.ts`). +- **Notifications:** `notifyToast({color:'orange', dismissible:true, clickAction:{kind:'jump-session', sessionId, tabId}})` is purpose-built for "agent needs you" escalations. Center flash for "rule saved" acks. +- **Modal priority:** add to `src/renderer/constants/modalPriorities.ts`. Apply `select-none` to the click-driven root. + +--- + +## 7. Recommended first slice (revised) + +Ordered for additivity and to de-risk the hard part first: + +0. **Encore flag** end-to-end (section 4) - including the CLI hard gate and a disabled-behavior test. Nothing else runs until this exists. +1. **Structured awaiting-input signal (narrow):** extend `ParsedEvent` with an `awaitingInput`/`question` discriminant; populate it in the Claude parser for the unambiguous cases (plan-mode confirm, explicit prompts, known permission strings); thread it through `LogEntry` and the WS history payload. Unit-test against captured transcripts. _This is the keystone; if we punt to pure heuristics, say so explicitly and accept the brittleness._ +2. **Pure classifier + policy** functions (`autopilot-classifier.ts`, `autopilot-policy.ts`) with fixture tests. No app, no I/O. +3. **CLI `autopilot watch --agent --interval --dry-run --rules`**, gated, polling `session show --since`, calling the classifier, and (non-dry-run) `runDispatch()` for low-risk auto-answers. Decision-log writes to SQLite. +4. **Rules store (JSON)** + **decision audit (SQLite)** wired behind IPC-free service calls first. +5. Manual validation against a running app (dry-run -> low-risk rule -> real auto-answer), then the UI phase. + +Defer: main-process daemon (CLI service first), UI panel, Cue integration, ACP/adapter generator, webhook trigger. All remain clean follow-ons because the service is decoupled. + +--- + +## 8. Decisions + +Resolved: + +1. **Awaiting-input signal: structured, narrow (B).** LOCKED. +2. **Name: Pianola.** LOCKED. Flag key / module / CLI verb = `pianola`. +3. **Storage: hybrid** (rules JSON, audit SQLite). LOCKED. + +Still open (do not block the first slice; default chosen for v1): 4. Who generates auto-answers: deterministic template (v1 default), dedicated manager agent, or target agent via meta-prompt? Revisit after the classifier exists. 5. Per-tab opt-in after the global flag, vs project/global default policy. Default v1: global flag + per-tab opt-in. + +--- + +## 9. Corrections to the original writeup + +- "session state exposes busy/idle" - correct, and `waiting_input` is **dead**; the original implied richer state was available. It is not. +- The original treated the classifier as straightforward keyword matching on text. Verified reality: there is no reliable structured signal, so this is the single hardest and most important part, not an afterthought. +- The original left storage as an open question (JSON vs SQLite). Verified: both patterns exist and mature; hybrid is the clear answer. +- The original suggested Cue could later "trigger Autopilot recipes". Endorsed - but as a _later_ integration via a shared structured signal, not as the v1 substrate. diff --git a/Plans/pianola-implementation-plan.md b/Plans/pianola-implementation-plan.md new file mode 100644 index 0000000000..ec83ec4b65 --- /dev/null +++ b/Plans/pianola-implementation-plan.md @@ -0,0 +1,76 @@ +# Pianola - Implementation Plan + +Date: 2026-06-24 +Branch: `feat/autonomous-manager-agent` +Grounded in: `autopilot-codebase-findings.md` (verified findings). Name = **Pianola**. + +Pianola is a standalone, Encore-gated manager agent that watches agent tabs, detects when an +agent is awaiting the user, classifies the ask + its risk, and either auto-answers low-risk +prompts from rules or escalates. Built additively as `src/main/pianola/`, decoupled from Cue, +reusing the existing dispatch primitive, parser infra, and storage patterns. + +## Module layout (target) + +``` +src/shared/pianola/ + types.ts # contracts shared renderer<->main<->cli (classification, rules, decisions) +src/main/pianola/ + pianola-engine.ts # thin facade; owns isEncoreEnabled gate, start/stop + pianola-watcher.ts # per-tab poll loop + cursor (session history --since) + pianola-classifier.ts # PURE: messages -> { kind, risk, topic, confidence } + pianola-policy.ts # PURE: (classification, rules) -> action (auto-answer|escalate|ignore) + pianola-rules-store.ts # JSON via electron-store (editable rules) + pianola-decisions-db.ts # SQLite via better-sqlite3 (append-only audit) + pianola-dispatcher.ts # safe wrapper over runDispatch / send_command + pianola-ipc.ts # renderer APIs (UI phase) +src/cli/commands/pianola-watch.ts # gated `maestro-cli pianola watch` +``` + +## Build order (each step independently shippable + tested) + +### Step 0 - Encore flag `pianola` (foundation) [THIS SESSION] + +Files (verified line refs): + +- `src/renderer/types/index.ts:1064` - add `pianola: boolean` to `EncoreFeatureFlags`. +- `src/renderer/stores/settingsStore.ts:210` - add `pianola: false` to `DEFAULT_ENCORE_FEATURES`. +- `src/shared/settingsMetadata.ts:1014` - add `pianola: false` to `encoreFeatures.default`. +- `src/cli/commands/encore.ts:11,18` - add to `FEATURES` + `ALIASES` (`pianola`, `auto-pilot`, `pilot`, `manager`). +- `src/renderer/components/Settings/tabs/EncoreTab.tsx` - add toggle block (insert before final close, ~1258). Uses `Music` icon (already imported). Description states it can auto-send messages. +- Test: extend `src/__tests__/.../encore` (or add) - default off + alias resolution. + Inert-when-off is automatic: nothing consumes the flag yet. + +### Step 1 - Shared contracts + PURE classifier & policy [THIS SESSION] + +- `src/shared/pianola/types.ts` - `AwaitingSignal`, `PianolaClassification`, `PianolaRule`, `PianolaDecision`, `RiskLevel`, `ActionKind`. +- `src/main/pianola/pianola-classifier.ts` - pure fn over a normalized message list + optional structured signal -> classification. Reuses `error-patterns.ts` regex infra for risk. +- `src/main/pianola/pianola-policy.ts` - pure fn: low+matching-rule -> auto-answer; medium -> escalate unless rule allows; high -> always escalate. +- Tests: fixture transcripts in `src/__tests__/main/pianola/` covering question/blocked/none + low/med/high. + Pure functions, no I/O, no app - the brain, fully unit-tested first. + +### Step 2 - Structured awaiting-input signal (narrow) [NEXT] + +- Extend `ParsedEvent` (`src/main/parsers/agent-output-parser.ts`) with `awaitingInput?: { kind; prompt?; options? }`. +- Populate in `claude-output-parser.ts` for unambiguous cases (plan-mode confirm, explicit `[y/n]`, known permission strings). +- Thread through `LogEntry` and the WS `SessionHistoryMessage` payload (`web-server/types.ts`) so the watcher sees it. +- Tests against captured transcripts. + +### Step 3 - Storage [NEXT] + +- `pianola-rules-store.ts` (electron-store + atomic-json-store), `pianola-decisions-db.ts` (copy stats-db.ts pattern + migrations). + +### Step 4 - CLI `pianola watch` [NEXT] + +- Gated command polling `session show --since`, classify -> policy -> `runDispatch` (low-risk) / record. `--dry-run`, `--interval`, `--rules`, `--agent`, `--tab`. + +### Step 5 - Engine/daemon + IPC + UI panel [LATER] + +- Main-process engine mirroring Cue gating; Right Bar tab + modal; Zustand store; toast escalations. + +### Later - Cue integration (shared signal), ACP, adapter generator, webhook trigger. + +## Conventions + +- Tabs for indentation. No em/en dashes. Immutable updates. Files < 800 lines. +- Pure functions for classifier/policy. Let unexpected exceptions bubble (Sentry); handle known cases. +- Validate before push: `npm run lint`, `npm run lint:eslint`, `npm run test` for touched areas. diff --git a/src/__tests__/cli/commands/encore.test.ts b/src/__tests__/cli/commands/encore.test.ts index 8340b44401..175f7a5817 100644 --- a/src/__tests__/cli/commands/encore.test.ts +++ b/src/__tests__/cli/commands/encore.test.ts @@ -75,6 +75,23 @@ describe('encore commands', () => { expect((getPayload().value as Record).symphony).toBe(true); }); + it('lists pianola, defaulting off when unset', () => { + encoreList({ json: true }); + const parsed = JSON.parse(consoleSpy.mock.calls[0][0]); + expect(parsed.features).toHaveProperty('pianola'); + expect(parsed.features.pianola).toBe(false); + }); + + it('resolves pianola aliases (e.g. "auto-pilot" / "manager" -> pianola)', async () => { + const getPayload = mockSend({ success: true }); + await encoreSet('auto-pilot', true, {}); + expect((getPayload().value as Record).pianola).toBe(true); + + const getPayload2 = mockSend({ success: true }); + await encoreSet('manager', true, {}); + expect((getPayload2().value as Record).pianola).toBe(true); + }); + it('rejects an unknown feature without connecting', async () => { await expect(encoreSet('telepathy', true, {})).rejects.toThrow('__exit__'); expect(formatError).toHaveBeenCalledWith(expect.stringContaining('Unknown Encore feature')); diff --git a/src/__tests__/main/pianola/pianola-classifier.test.ts b/src/__tests__/main/pianola/pianola-classifier.test.ts new file mode 100644 index 0000000000..11b935ea5c --- /dev/null +++ b/src/__tests__/main/pianola/pianola-classifier.test.ts @@ -0,0 +1,143 @@ +/** + * @file pianola-classifier.test.ts + * @description Unit tests for the pure Pianola classifier. + */ + +import { describe, it, expect } from 'vitest'; +import { classifyMessages, riskAtMost, maxRisk } from '../../../main/pianola/pianola-classifier'; +import type { AwaitingInputSignal, PianolaMessage } from '../../../shared/pianola/types'; + +let seq = 0; +function msg( + role: PianolaMessage['role'], + content: string, + awaitingInput?: AwaitingInputSignal +): PianolaMessage { + seq += 1; + return { + id: `m${seq}`, + role, + source: role === 'assistant' ? 'ai' : role, + content, + timestamp: new Date(Date.UTC(2026, 0, 1, 0, 0, seq)).toISOString(), + awaitingInput, + }; +} + +describe('risk helpers', () => { + it('orders risk low < medium < high', () => { + expect(riskAtMost('low', 'high')).toBe(true); + expect(riskAtMost('high', 'low')).toBe(false); + expect(riskAtMost('medium', 'medium')).toBe(true); + expect(maxRisk('low', 'high')).toBe('high'); + expect(maxRisk('medium', 'low')).toBe('medium'); + }); +}); + +describe('classifyMessages - edge cases', () => { + it('returns none for an empty transcript', () => { + expect(classifyMessages([]).kind).toBe('none'); + }); + + it('returns none when there is no assistant message', () => { + const c = classifyMessages([msg('user', 'hello?'), msg('tool', 'ran something')]); + expect(c.kind).toBe('none'); + }); + + it('returns none when the user already replied after the assistant asked', () => { + const c = classifyMessages([ + msg('assistant', 'Which database should I use?'), + msg('user', 'postgres'), + ]); + expect(c.kind).toBe('none'); + expect(c.evidence.reason).toContain('user has replied'); + }); +}); + +describe('classifyMessages - structured signal (authoritative)', () => { + it('treats a permission signal as blocked, at least medium risk, high confidence', () => { + const signal: AwaitingInputSignal = { kind: 'permission', prompt: 'Allow reading config.ts?' }; + const c = classifyMessages([msg('assistant', 'May I?', signal)]); + expect(c.kind).toBe('blocked'); + expect(c.confidence).toBe('high'); + expect(c.evidence.structured).toBe(true); + expect(riskAtMost('medium', c.risk)).toBe(true); // medium or higher + }); + + it('escalates structured permission for a destructive action to high risk', () => { + const signal: AwaitingInputSignal = { + kind: 'permission', + prompt: 'Allow running rm -rf build to delete the output?', + }; + const c = classifyMessages([msg('assistant', 'ok?', signal)]); + expect(c.risk).toBe('high'); + }); + + it('maps a question signal to kind question', () => { + const signal: AwaitingInputSignal = { kind: 'question', prompt: 'What name do you want?' }; + const c = classifyMessages([msg('assistant', '...', signal)]); + expect(c.kind).toBe('question'); + expect(c.confidence).toBe('high'); + }); +}); + +describe('classifyMessages - heuristics', () => { + it('detects a question phrase with medium confidence', () => { + const c = classifyMessages([msg('assistant', 'Should I use tabs or spaces for the new file?')]); + expect(c.kind).toBe('question'); + expect(c.confidence).toBe('medium'); + expect(c.evidence.structured).toBe(false); + expect(c.topic.length).toBeGreaterThan(0); + }); + + it('detects an explicit choice marker', () => { + const c = classifyMessages([msg('assistant', 'Proceed with the rename? [y/n]')]); + expect(c.kind).toBe('question'); + expect(c.confidence).toBe('medium'); + }); + + it('detects a blocked phrase', () => { + const c = classifyMessages([msg('assistant', 'I am blocked: I need the API key to continue.')]); + expect(c.kind).toBe('blocked'); + }); + + it('treats a trailing question mark alone as low-confidence question', () => { + const c = classifyMessages([msg('assistant', 'That file looks odd, right?')]); + expect(c.kind).toBe('question'); + expect(c.confidence).toBe('low'); + }); + + it('returns none for a plain statement', () => { + const c = classifyMessages([msg('assistant', 'I finished updating the README.')]); + expect(c.kind).toBe('none'); + }); +}); + +describe('classifyMessages - risk rating', () => { + it('rates destructive prompts high', () => { + const c = classifyMessages([ + msg('assistant', 'Should I force push to production and drop the old table?'), + ]); + expect(c.risk).toBe('high'); + }); + + it('rates dependency changes medium', () => { + const c = classifyMessages([msg('assistant', 'Should I upgrade the react dependency?')]); + expect(c.risk).toBe('medium'); + }); + + it('rates a cosmetic choice low', () => { + const c = classifyMessages([msg('assistant', 'Should I name the variable count or total?')]); + expect(c.risk).toBe('low'); + }); + + it('uses the most recent assistant turn', () => { + const c = classifyMessages([ + msg('assistant', 'Working on it.'), + msg('tool', 'edited file'), + msg('assistant', 'Should I delete the secret from the .env file?'), + ]); + expect(c.kind).toBe('question'); + expect(c.risk).toBe('high'); + }); +}); diff --git a/src/__tests__/main/pianola/pianola-policy.test.ts b/src/__tests__/main/pianola/pianola-policy.test.ts new file mode 100644 index 0000000000..66102857b1 --- /dev/null +++ b/src/__tests__/main/pianola/pianola-policy.test.ts @@ -0,0 +1,177 @@ +/** + * @file pianola-policy.test.ts + * @description Unit tests for the pure Pianola policy engine. + */ + +import { describe, it, expect } from 'vitest'; +import { + decide, + selectRule, + ruleAppliesToScope, + ruleMatchesClassification, +} from '../../../main/pianola/pianola-policy'; +import type { + PianolaClassification, + PianolaRisk, + PianolaRule, + PianolaSignalKind, +} from '../../../shared/pianola/types'; + +function classification( + overrides: Partial & { kind: PianolaSignalKind; risk: PianolaRisk } +): PianolaClassification { + return { + topic: 'should i use tabs', + confidence: 'medium', + evidence: { messageId: 'm1', reason: 'test', structured: false }, + ...overrides, + }; +} + +let ruleSeq = 0; +function rule(overrides: Partial): PianolaRule { + ruleSeq += 1; + return { + id: `r${ruleSeq}`, + enabled: true, + scope: 'global', + match: {}, + action: 'auto_answer', + answer: 'Use tabs.', + priority: 100, + createdAt: ruleSeq, + updatedAt: ruleSeq, + ...overrides, + }; +} + +describe('decide - safety defaults', () => { + it('ignores a none classification', () => { + const d = decide(classification({ kind: 'none', risk: 'low' }), [rule({})]); + expect(d.action).toBe('ignore'); + expect(d.matchedRuleId).toBeNull(); + }); + + it('escalates when no rule matches', () => { + const d = decide(classification({ kind: 'question', risk: 'low' }), []); + expect(d.action).toBe('escalate'); + expect(d.matchedRuleId).toBeNull(); + }); + + it('never auto-answers a high-risk prompt even if a rule says to', () => { + const r = rule({ match: { maxRisk: 'high' }, action: 'auto_answer', answer: 'go' }); + const d = decide(classification({ kind: 'question', risk: 'high' }), [r]); + expect(d.action).toBe('escalate'); + expect(d.matchedRuleId).toBe(r.id); + expect(d.reason).toContain('high-risk'); + }); +}); + +describe('decide - rule actions', () => { + it('auto-answers a low-risk prompt matched by a rule', () => { + const r = rule({ match: { maxRisk: 'low' }, action: 'auto_answer', answer: 'Use tabs.' }); + const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); + expect(d.action).toBe('auto_answer'); + expect(d.answer).toBe('Use tabs.'); + expect(d.matchedRuleId).toBe(r.id); + }); + + it('escalates when matched auto-answer rule has no answer text', () => { + const r = rule({ action: 'auto_answer', answer: ' ' }); + const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); + expect(d.action).toBe('escalate'); + expect(d.reason).toContain('no answer'); + }); + + it('honors an explicit escalate rule', () => { + const r = rule({ action: 'escalate' }); + const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); + expect(d.action).toBe('escalate'); + expect(d.matchedRuleId).toBe(r.id); + }); + + it('honors an explicit ignore rule', () => { + const r = rule({ action: 'ignore' }); + const d = decide(classification({ kind: 'blocked', risk: 'low' }), [r]); + expect(d.action).toBe('ignore'); + expect(d.matchedRuleId).toBe(r.id); + }); +}); + +describe('rule matching', () => { + it('respects maxRisk', () => { + const r = rule({ match: { maxRisk: 'low' } }); + expect(ruleMatchesClassification(r, classification({ kind: 'question', risk: 'low' }))).toBe( + true + ); + expect(ruleMatchesClassification(r, classification({ kind: 'question', risk: 'medium' }))).toBe( + false + ); + }); + + it('respects kinds filter', () => { + const r = rule({ match: { kinds: ['blocked'] } }); + expect(ruleMatchesClassification(r, classification({ kind: 'blocked', risk: 'low' }))).toBe( + true + ); + expect(ruleMatchesClassification(r, classification({ kind: 'question', risk: 'low' }))).toBe( + false + ); + }); + + it('respects topicIncludes (case-insensitive)', () => { + const r = rule({ match: { topicIncludes: ['TABS'] } }); + expect( + ruleMatchesClassification( + r, + classification({ kind: 'question', risk: 'low', topic: 'should i use tabs' }) + ) + ).toBe(true); + expect( + ruleMatchesClassification( + r, + classification({ kind: 'question', risk: 'low', topic: 'rename the module' }) + ) + ).toBe(false); + }); +}); + +describe('scope filtering', () => { + it('global rules always apply', () => { + expect(ruleAppliesToScope(rule({ scope: 'global' }), {})).toBe(true); + }); + + it('project rules apply only for the matching project path', () => { + const r = rule({ scope: 'project', scopeId: '/repo/a' }); + expect(ruleAppliesToScope(r, { projectPath: '/repo/a' })).toBe(true); + expect(ruleAppliesToScope(r, { projectPath: '/repo/b' })).toBe(false); + expect(ruleAppliesToScope(r, {})).toBe(false); + }); + + it('tab rules apply only for the matching tab id', () => { + const r = rule({ scope: 'tab', scopeId: 'tab-1' }); + expect(ruleAppliesToScope(r, { tabId: 'tab-1' })).toBe(true); + expect(ruleAppliesToScope(r, { tabId: 'tab-2' })).toBe(false); + }); +}); + +describe('selectRule precedence', () => { + it('picks the lowest priority number among matches', () => { + const low = rule({ priority: 10, answer: 'low-pri-wins' }); + const high = rule({ priority: 50, answer: 'high-pri' }); + const picked = selectRule(classification({ kind: 'question', risk: 'low' }), [high, low], {}); + expect(picked?.id).toBe(low.id); + }); + + it('skips disabled and out-of-scope rules', () => { + const disabled = rule({ priority: 1, enabled: false }); + const wrongScope = rule({ priority: 2, scope: 'project', scopeId: '/other' }); + const ok = rule({ priority: 3 }); + const picked = selectRule(classification({ kind: 'question', risk: 'low' }), [ + disabled, + wrongScope, + ok, + ]); + expect(picked?.id).toBe(ok.id); + }); +}); diff --git a/src/cli/commands/encore.ts b/src/cli/commands/encore.ts index 2c8d79e09a..404041a463 100644 --- a/src/cli/commands/encore.ts +++ b/src/cli/commands/encore.ts @@ -13,6 +13,7 @@ const FEATURES: Record = { usageStats: 'Usage Dashboard', symphony: 'Symphony (Group Chat)', maestroCue: 'Maestro Cue', + pianola: 'Pianola (Manager Agent)', }; const ALIASES: Record = { @@ -28,6 +29,11 @@ const ALIASES: Record = { groupchat: 'symphony', cue: 'maestroCue', maestrocue: 'maestroCue', + 'auto-pilot': 'pianola', + autopilot: 'pianola', + pilot: 'pianola', + manager: 'pianola', + 'manager-agent': 'pianola', }; interface EncoreOptions { diff --git a/src/main/pianola/pianola-classifier.ts b/src/main/pianola/pianola-classifier.ts new file mode 100644 index 0000000000..6696a6b83f --- /dev/null +++ b/src/main/pianola/pianola-classifier.ts @@ -0,0 +1,263 @@ +/** + * Pianola classifier - PURE functions. + * + * Given the tail of an agent transcript, decide whether the agent is asking the + * user something / is blocked, summarize the topic, and rate the risk of acting + * on it. No I/O, no Electron, no app state: this is the brain, and it is unit + * tested against fixture transcripts. + * + * Detection prefers a structured AwaitingInputSignal (authoritative, high + * confidence) and falls back to conservative heuristics over message text. + */ + +import type { + AwaitingInputSignal, + PianolaClassification, + PianolaMessage, + PianolaRisk, + PianolaSignalKind, +} from '../../shared/pianola/types'; + +/** Phrases that strongly suggest the assistant is asking the user to decide. */ +const QUESTION_PHRASES = [ + 'which would you prefer', + 'would you like', + 'do you want', + 'should i', + 'shall i', + 'let me know', + 'please choose', + 'please confirm', + 'please advise', + 'need your input', + 'need confirmation', + 'waiting for your', + 'how would you like', + 'can you confirm', + 'are you sure', +]; + +/** Phrases that suggest the agent is blocked / cannot proceed without the user. */ +const BLOCKED_PHRASES = [ + 'i need', + 'i am blocked', + "i'm blocked", + 'i cannot proceed', + "i can't proceed", + 'unable to proceed', + 'requires your', + 'permission to', + 'awaiting approval', + 'needs approval', +]; + +/** High-risk: destructive, security-sensitive, or irreversible/outward-facing. */ +const HIGH_RISK_TERMS = [ + 'delete', + 'rm -rf', + 'drop table', + 'drop database', + 'truncate', + 'force push', + 'force-push', + 'git push --force', + 'reset --hard', + 'deploy', + 'production', + 'prod ', + 'secret', + 'secrets', + 'password', + 'credential', + 'api key', + 'api-key', + 'token', + 'auth', + 'payment', + 'charge', + 'invoice', + 'migrate', + 'migration', + 'wipe', + 'overwrite', + 'revoke', + 'sudo', +]; + +/** Medium-risk: meaningful but recoverable engineering choices. */ +const MEDIUM_RISK_TERMS = [ + 'install', + 'upgrade', + 'downgrade', + 'bump', + 'dependency', + 'dependencies', + 'package', + 'refactor', + 'rename', + 'move file', + 'delete comment', + 'test strategy', + 'restructure', + 'schema', + 'config', +]; + +const RISK_ORDER: Record = { low: 0, medium: 1, high: 2 }; + +/** True if risk `a` is at most as severe as `b`. */ +export function riskAtMost(a: PianolaRisk, b: PianolaRisk): boolean { + return RISK_ORDER[a] <= RISK_ORDER[b]; +} + +/** The more severe of two risks. */ +export function maxRisk(a: PianolaRisk, b: PianolaRisk): PianolaRisk { + return RISK_ORDER[a] >= RISK_ORDER[b] ? a : b; +} + +function containsAny(haystack: string, needles: readonly string[]): boolean { + return needles.some((n) => haystack.includes(n)); +} + +/** Rate risk from free text. */ +function rateRisk(text: string): PianolaRisk { + const lower = text.toLowerCase(); + if (containsAny(lower, HIGH_RISK_TERMS)) return 'high'; + if (containsAny(lower, MEDIUM_RISK_TERMS)) return 'medium'; + return 'low'; +} + +/** Build a short topic summary from a prompt: first sentence/line, trimmed. */ +function summarizeTopic(text: string): string { + const firstLine = text + .split('\n') + .map((l) => l.trim()) + .find((l) => l.length > 0); + const base = firstLine ?? text.trim(); + const sentence = base.split(/(?<=[.?!])\s/)[0] ?? base; + const trimmed = sentence.trim(); + return trimmed.length > 140 ? `${trimmed.slice(0, 137)}...` : trimmed; +} + +/** Is this the assistant speaking (vs user/tool/system)? */ +function isAssistant(message: PianolaMessage): boolean { + return message.role === 'assistant' || message.source === 'ai'; +} + +/** Find the last assistant message in chronological order. */ +function lastAssistantMessage(messages: readonly PianolaMessage[]): PianolaMessage | null { + for (let i = messages.length - 1; i >= 0; i -= 1) { + if (isAssistant(messages[i])) return messages[i]; + } + return null; +} + +/** + * If the last message is from the user (chronologically after the last + * assistant turn), the agent is not currently waiting on the user. + */ +function userHasRepliedSince( + messages: readonly PianolaMessage[], + assistant: PianolaMessage +): boolean { + const idx = messages.lastIndexOf(assistant); + if (idx < 0) return false; + return messages.slice(idx + 1).some((m) => m.role === 'user'); +} + +function classifyFromStructured( + message: PianolaMessage, + signal: AwaitingInputSignal +): PianolaClassification { + const promptText = signal.prompt ?? message.content; + const kind: PianolaSignalKind = signal.kind === 'question' ? 'question' : 'blocked'; + // Permission/plan-review prompts inherit risk from what is being requested. + let risk = rateRisk(promptText); + if (signal.kind === 'permission' || signal.kind === 'plan_review') { + risk = maxRisk(risk, 'medium'); + } + return { + kind, + risk, + topic: summarizeTopic(promptText), + confidence: 'high', + evidence: { + messageId: message.id, + reason: `structured awaiting-input signal: ${signal.kind}`, + structured: true, + }, + }; +} + +function classifyHeuristic(message: PianolaMessage): PianolaClassification { + const content = message.content ?? ''; + const lower = content.toLowerCase(); + const hasQuestionMark = content.includes('?'); + const hasQuestionPhrase = containsAny(lower, QUESTION_PHRASES); + const hasBlockedPhrase = containsAny(lower, BLOCKED_PHRASES); + // Bracketed prompts like [y/n], (yes/no), 1) ... 2) ... + const hasChoiceMarker = /\[[^\]]*\/[^\]]*\]|\((?:y\/n|yes\/no)\)/i.test(content); + + let kind: PianolaSignalKind = 'none'; + let confidence: PianolaClassification['confidence'] = 'low'; + let reason = 'no question or blocked signal detected'; + + if (hasQuestionPhrase || hasChoiceMarker) { + kind = 'question'; + confidence = 'medium'; + reason = hasChoiceMarker ? 'explicit choice marker in text' : 'question phrase in text'; + } else if (hasBlockedPhrase) { + kind = 'blocked'; + confidence = 'medium'; + reason = 'blocked phrase in text'; + } else if (hasQuestionMark) { + kind = 'question'; + confidence = 'low'; + reason = 'trailing question mark only'; + } + + return { + kind, + risk: kind === 'none' ? 'low' : rateRisk(content), + topic: kind === 'none' ? '' : summarizeTopic(content), + confidence, + evidence: { messageId: kind === 'none' ? null : message.id, reason, structured: false }, + }; +} + +/** A classification meaning "nothing actionable". */ +export function noneClassification(reason: string): PianolaClassification { + return { + kind: 'none', + risk: 'low', + topic: '', + confidence: 'high', + evidence: { messageId: null, reason, structured: false }, + }; +} + +/** + * Classify the tail of a transcript. Messages must be in chronological order + * (oldest first), matching `maestro-cli session show --json`. + */ +export function classifyMessages(messages: readonly PianolaMessage[]): PianolaClassification { + if (messages.length === 0) { + return noneClassification('empty transcript'); + } + + const assistant = lastAssistantMessage(messages); + if (!assistant) { + return noneClassification('no assistant message in transcript'); + } + + // If the user already replied after the last assistant turn, not waiting. + if (userHasRepliedSince(messages, assistant)) { + return noneClassification('user has replied since the last assistant turn'); + } + + if (assistant.awaitingInput) { + return classifyFromStructured(assistant, assistant.awaitingInput); + } + + return classifyHeuristic(assistant); +} diff --git a/src/main/pianola/pianola-policy.ts b/src/main/pianola/pianola-policy.ts new file mode 100644 index 0000000000..658afbafc4 --- /dev/null +++ b/src/main/pianola/pianola-policy.ts @@ -0,0 +1,133 @@ +/** + * Pianola policy engine - PURE functions. + * + * Given a classification and the user's rules, decide the action. Safety-first: + * - kind 'none' -> ignore + * - high risk -> always escalate (a rule can never auto-answer it) + * - matched rule, auto_answer -> auto-answer (only when risk is below high) + * - matched rule, escalate/ignore-> as the rule says + * - no matching rule -> escalate (we never auto-answer without an explicit rule) + * + * No I/O, no Electron, no app state. Unit tested against fixtures. + */ + +import type { + PianolaClassification, + PianolaDecision, + PianolaRule, +} from '../../shared/pianola/types'; +import { riskAtMost } from './pianola-classifier'; + +/** Context needed to scope-filter rules to the current tab/project. */ +export interface PianolaPolicyContext { + projectPath?: string; + tabId?: string; +} + +/** True if a rule's scope applies in the given context. */ +export function ruleAppliesToScope(rule: PianolaRule, ctx: PianolaPolicyContext): boolean { + switch (rule.scope) { + case 'global': + return true; + case 'project': + return !!rule.scopeId && rule.scopeId === ctx.projectPath; + case 'tab': + return !!rule.scopeId && rule.scopeId === ctx.tabId; + default: + return false; + } +} + +/** True if a rule's match conditions are satisfied by the classification. */ +export function ruleMatchesClassification( + rule: PianolaRule, + classification: PianolaClassification +): boolean { + const { match } = rule; + + if (match.maxRisk && !riskAtMost(classification.risk, match.maxRisk)) { + return false; + } + + if (match.kinds && match.kinds.length > 0 && !match.kinds.includes(classification.kind)) { + return false; + } + + if (match.topicIncludes && match.topicIncludes.length > 0) { + const topic = classification.topic.toLowerCase(); + const anyHit = match.topicIncludes.some((needle) => topic.includes(needle.toLowerCase())); + if (!anyHit) return false; + } + + return true; +} + +/** + * Select the highest-precedence enabled rule that applies to scope and matches + * the classification. Lower `priority` wins; ties break by earlier `createdAt`. + */ +export function selectRule( + classification: PianolaClassification, + rules: readonly PianolaRule[], + ctx: PianolaPolicyContext = {} +): PianolaRule | null { + const candidates = rules + .filter((r) => r.enabled) + .filter((r) => ruleAppliesToScope(r, ctx)) + .filter((r) => ruleMatchesClassification(r, classification)) + .sort((a, b) => a.priority - b.priority || a.createdAt - b.createdAt); + return candidates[0] ?? null; +} + +/** Decide what Pianola should do about a classified prompt. */ +export function decide( + classification: PianolaClassification, + rules: readonly PianolaRule[], + ctx: PianolaPolicyContext = {} +): PianolaDecision { + if (classification.kind === 'none') { + return { action: 'ignore', matchedRuleId: null, reason: 'nothing actionable' }; + } + + const rule = selectRule(classification, rules, ctx); + + if (!rule) { + return { + action: 'escalate', + matchedRuleId: null, + reason: 'no matching rule; escalating by default', + }; + } + + if (rule.action === 'ignore') { + return { action: 'ignore', matchedRuleId: rule.id, reason: 'rule says ignore' }; + } + + if (rule.action === 'escalate') { + return { action: 'escalate', matchedRuleId: rule.id, reason: 'rule says escalate' }; + } + + // rule.action === 'auto_answer' + if (classification.risk === 'high') { + return { + action: 'escalate', + matchedRuleId: rule.id, + reason: 'high-risk prompt always escalates, overriding auto-answer rule', + }; + } + + if (!rule.answer || rule.answer.trim().length === 0) { + return { + action: 'escalate', + matchedRuleId: rule.id, + reason: 'auto-answer rule matched but has no answer text; escalating', + }; + } + + return { + action: 'auto_answer', + answer: rule.answer, + matchedRuleId: rule.id, + reason: 'matched auto-answer rule', + }; +} diff --git a/src/renderer/components/Settings/tabs/EncoreTab.tsx b/src/renderer/components/Settings/tabs/EncoreTab.tsx index 58c291192b..d9a937ed14 100644 --- a/src/renderer/components/Settings/tabs/EncoreTab.tsx +++ b/src/renderer/components/Settings/tabs/EncoreTab.tsx @@ -1256,6 +1256,70 @@ export function EncoreTab({ theme, isOpen }: EncoreTabProps) { )} + + {/* Pianola Feature Section */} +
+ +
); } diff --git a/src/renderer/stores/settingsStore.ts b/src/renderer/stores/settingsStore.ts index afe3bb319a..ba22bb4479 100644 --- a/src/renderer/stores/settingsStore.ts +++ b/src/renderer/stores/settingsStore.ts @@ -212,6 +212,7 @@ const DEFAULT_ENCORE_FEATURES: EncoreFeatureFlags = { usageStats: true, symphony: true, maestroCue: false, + pianola: false, }; // File Preview / Edit toolbar buttons. Each key maps to a visibility toggle in diff --git a/src/renderer/types/index.ts b/src/renderer/types/index.ts index 9230c67bf5..bba8fcb988 100644 --- a/src/renderer/types/index.ts +++ b/src/renderer/types/index.ts @@ -1066,6 +1066,7 @@ export interface EncoreFeatureFlags { usageStats: boolean; symphony: boolean; maestroCue: boolean; + pianola: boolean; } // Director's Notes settings for synopsis generation diff --git a/src/shared/pianola/types.ts b/src/shared/pianola/types.ts new file mode 100644 index 0000000000..8660d6ba16 --- /dev/null +++ b/src/shared/pianola/types.ts @@ -0,0 +1,116 @@ +/** + * Pianola - shared contracts. + * + * Pianola is Maestro's autonomous manager agent (Encore-gated, flag `pianola`). + * It watches agent tabs, detects when an agent is awaiting the user, classifies + * the ask and its risk, then either auto-answers low-risk prompts from the user's + * rules or escalates. These contracts are shared across main, renderer, and CLI. + * + * The classifier and policy engine that consume these types are pure functions + * (see src/main/pianola/pianola-classifier.ts and pianola-policy.ts). + */ + +/** Risk of acting on a detected prompt, ordered low < medium < high. */ +export type PianolaRisk = 'low' | 'medium' | 'high'; + +/** What Pianola decides to do about a detected prompt. */ +export type PianolaActionKind = 'auto_answer' | 'escalate' | 'ignore'; + +/** What the classifier detected in the transcript tail. */ +export type PianolaSignalKind = 'question' | 'blocked' | 'none'; + +/** Role of a message, mirroring the WS SessionHistoryMessage contract. */ +export type PianolaMessageRole = + | 'user' + | 'assistant' + | 'system' + | 'tool' + | 'thinking' + | 'error' + | 'unknown'; + +/** + * Structured signal emitted by the parser layer when an agent is unambiguously + * waiting on the user. Narrow by design: only set for high-confidence cases + * (permission prompts, plan-mode review, explicit choices). Populated in a later + * step; the classifier treats its presence as authoritative. + */ +export interface AwaitingInputSignal { + kind: 'permission' | 'plan_review' | 'choice' | 'question'; + /** The prompt text shown to the user, if extractable. */ + prompt?: string; + /** Discrete options the user may pick from, if any. */ + options?: string[]; +} + +/** + * A normalized message the classifier reads. Mirrors the WebSocket + * `SessionHistoryMessage` shape so the CLI watcher can feed + * `maestro-cli session show --json` output in directly. + */ +export interface PianolaMessage { + id: string; + role: PianolaMessageRole; + source: string; + content: string; + /** ISO-8601 timestamp. */ + timestamp: string; + /** Structured awaiting-input marker, when the parser layer provides one. */ + awaitingInput?: AwaitingInputSignal; +} + +/** Result of classifying a transcript tail. */ +export interface PianolaClassification { + kind: PianolaSignalKind; + risk: PianolaRisk; + /** Short human-readable summary of what is being asked. */ + topic: string; + confidence: 'low' | 'medium' | 'high'; + /** Why this classification was reached, for the audit log and UI. */ + evidence: { + messageId: string | null; + reason: string; + /** True if derived from a structured AwaitingInputSignal, false if heuristic. */ + structured: boolean; + }; +} + +/** Scope a rule applies to. */ +export type PianolaRuleScope = 'global' | 'project' | 'tab'; + +/** + * Editable user rule. Matched against a classification to decide an action. + * Kept declarative and simple for v1 - no embedded code, just match conditions. + */ +export interface PianolaRule { + id: string; + enabled: boolean; + scope: PianolaRuleScope; + /** Project path (scope 'project') or tab id (scope 'tab'); omit for global. */ + scopeId?: string; + match: { + /** Only fire when classification.risk is at most this level. */ + maxRisk?: PianolaRisk; + /** Restrict to these signal kinds. */ + kinds?: PianolaSignalKind[]; + /** Case-insensitive substring match against the classification topic. */ + topicIncludes?: string[]; + }; + action: PianolaActionKind; + /** Reply text for an auto_answer action (template, v1). */ + answer?: string; + /** Lower runs first. */ + priority: number; + createdAt: number; + updatedAt: number; + description?: string; +} + +/** Decision the policy engine produced for a classification. */ +export interface PianolaDecision { + action: PianolaActionKind; + /** Present iff action === 'auto_answer'. */ + answer?: string; + matchedRuleId: string | null; + reason: string; +} diff --git a/src/shared/settingsMetadata.ts b/src/shared/settingsMetadata.ts index 6d9017065e..70db2637db 100644 --- a/src/shared/settingsMetadata.ts +++ b/src/shared/settingsMetadata.ts @@ -1011,7 +1011,13 @@ export const SETTINGS_METADATA: Record = { encoreFeatures: { description: 'Feature flags for experimental/encore features. Object with boolean flags.', type: 'object', - default: { directorNotes: false, usageStats: true, symphony: true, maestroCue: false }, + default: { + directorNotes: false, + usageStats: true, + symphony: true, + maestroCue: false, + pianola: false, + }, category: 'advanced', }, directorNotesSettings: { From cabddc0aff1931d4d0d401ca20e51f59835a411d Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Wed, 24 Jun 2026 23:57:02 -0500 Subject: [PATCH 02/76] refactor(pianola): harden classifier/policy per codex review Addresses all findings from the codex review of the foundation: - CRITICAL: high-risk prompts now always escalate before any rule action, so a broad `ignore` rule can no longer suppress the most important alerts. Added a regression test. - Risk lexicon extracted into pianola-risk.ts with word-boundary regexes instead of raw substrings, fixing false positives (`auth` in `author`, `token` in `tokenizer`) and adding outward-facing/destructive coverage (push/publish/ release/merge PR/deploy/email/.env/private key/payment). Boundary tests added. - Empty-match `auto_answer` rules now escalate (an auto-answer rule must narrow its scope), preventing an accidental catch-all from auto-answering everything. - Numbered-choice detection (1) .. 2) ..) implemented to match the documented behavior; trailing-question-mark check tightened to end-of-message only. - Project/tab rule scoping normalizes path separators and casing for cross-platform robustness. - PianolaDecision is now a discriminated union so `answer` only exists on an auto_answer decision. - Added a compile-time drift guard tying PianolaMessage to the web-server SessionHistoryMessage contract. Typecheck clean; 58 Pianola/encore tests pass. --- .../main/pianola/pianola-classifier.test.ts | 61 +++++++++++ .../pianola/pianola-message-contract.test.ts | 28 +++++ .../main/pianola/pianola-policy.test.ts | 26 +++-- src/main/pianola/pianola-classifier.ts | 101 +++++------------- src/main/pianola/pianola-policy.ts | 56 +++++++--- src/main/pianola/pianola-risk.ts | 96 +++++++++++++++++ src/shared/pianola/types.ts | 17 +-- 7 files changed, 282 insertions(+), 103 deletions(-) create mode 100644 src/__tests__/main/pianola/pianola-message-contract.test.ts create mode 100644 src/main/pianola/pianola-risk.ts diff --git a/src/__tests__/main/pianola/pianola-classifier.test.ts b/src/__tests__/main/pianola/pianola-classifier.test.ts index 11b935ea5c..8605ce501d 100644 --- a/src/__tests__/main/pianola/pianola-classifier.test.ts +++ b/src/__tests__/main/pianola/pianola-classifier.test.ts @@ -141,3 +141,64 @@ describe('classifyMessages - risk rating', () => { expect(c.risk).toBe('high'); }); }); + +describe('classifyMessages - risk lexicon coverage', () => { + const highCases = [ + 'Should I push to origin and deploy to production?', + 'Should I publish the release now?', + 'Should I merge the PR?', + 'Should I send an email to the team?', + 'Should I commit the .env file?', + 'Should I add the private key to the repo?', + 'Should I force push?', + 'Should I run git push?', + ]; + for (const text of highCases) { + it(`rates high: "${text}"`, () => { + expect(classifyMessages([msg('assistant', text)]).risk).toBe('high'); + }); + } + + const mediumCases = [ + 'Should I rename the helper function?', + 'Should I install the dependency?', + 'Should I refactor this module?', + ]; + for (const text of mediumCases) { + it(`rates medium: "${text}"`, () => { + expect(classifyMessages([msg('assistant', text)]).risk).toBe('medium'); + }); + } + + it('does not treat "author" as auth/high risk (word boundary)', () => { + const c = classifyMessages([msg('assistant', 'Should I credit the author in the header?')]); + expect(c.risk).toBe('low'); + }); + + it('does not treat "tokenizer" as token/high risk (word boundary)', () => { + const c = classifyMessages([msg('assistant', 'Should I add a tokenizer to the parser?')]); + expect(c.risk).toBe('low'); + }); +}); + +describe('classifyMessages - choice and question-mark precision', () => { + it('detects two or more numbered options as a question', () => { + const c = classifyMessages([ + msg('assistant', 'How to handle this. Options: 1) keep it 2) remove it 3) rename it'), + ]); + expect(c.kind).toBe('question'); + expect(c.confidence).toBe('medium'); + }); + + it('does not treat a single numbered item as a choice', () => { + const c = classifyMessages([msg('assistant', 'I did step 1) refactor the parser.')]); + expect(c.kind).toBe('none'); + }); + + it('ignores a question mark that is not at the end of the message', () => { + const c = classifyMessages([ + msg('assistant', 'Is the value correct? I updated the file accordingly.'), + ]); + expect(c.kind).toBe('none'); + }); +}); diff --git a/src/__tests__/main/pianola/pianola-message-contract.test.ts b/src/__tests__/main/pianola/pianola-message-contract.test.ts new file mode 100644 index 0000000000..cd080c7813 --- /dev/null +++ b/src/__tests__/main/pianola/pianola-message-contract.test.ts @@ -0,0 +1,28 @@ +/** + * @file pianola-message-contract.test.ts + * @description Compile-time drift guard: the WebSocket session-history message + * shape (what `maestro-cli session show --json` returns) must remain assignable + * to PianolaMessage, so the watcher can feed it straight into the classifier. + * If src/main/web-server/types.ts SessionHistoryMessage drifts, this stops + * compiling under `npm run lint`. + */ + +import { describe, it, expect } from 'vitest'; +import type { SessionHistoryMessage } from '../../../main/web-server/types'; +import type { PianolaMessage } from '../../../shared/pianola/types'; + +describe('Pianola message contract', () => { + it('accepts a SessionHistoryMessage as a PianolaMessage', () => { + const wire: SessionHistoryMessage = { + id: 'm1', + role: 'assistant', + source: 'ai', + content: 'hi', + timestamp: '2026-01-01T00:00:00.000Z', + }; + // Compile-time assignability check; the runtime assertions keep vitest happy. + const message: PianolaMessage = wire; + expect(message.id).toBe('m1'); + expect(message.awaitingInput).toBeUndefined(); + }); +}); diff --git a/src/__tests__/main/pianola/pianola-policy.test.ts b/src/__tests__/main/pianola/pianola-policy.test.ts index 66102857b1..9890043b50 100644 --- a/src/__tests__/main/pianola/pianola-policy.test.ts +++ b/src/__tests__/main/pianola/pianola-policy.test.ts @@ -65,19 +65,33 @@ describe('decide - safety defaults', () => { expect(d.matchedRuleId).toBe(r.id); expect(d.reason).toContain('high-risk'); }); + + it('never lets an ignore rule suppress a high-risk prompt', () => { + // Regression: high-risk override must run before rule actions, so a broad + // ignore rule cannot silence the most important alerts. + const r = rule({ action: 'ignore', match: {} }); + const d = decide(classification({ kind: 'blocked', risk: 'high' }), [r]); + expect(d.action).toBe('escalate'); + expect(d.reason).toContain('high-risk'); + }); }); describe('decide - rule actions', () => { it('auto-answers a low-risk prompt matched by a rule', () => { const r = rule({ match: { maxRisk: 'low' }, action: 'auto_answer', answer: 'Use tabs.' }); const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); - expect(d.action).toBe('auto_answer'); - expect(d.answer).toBe('Use tabs.'); - expect(d.matchedRuleId).toBe(r.id); + expect(d).toMatchObject({ action: 'auto_answer', answer: 'Use tabs.', matchedRuleId: r.id }); + }); + + it('escalates an auto-answer rule that has no narrowing predicate (too broad)', () => { + const r = rule({ match: {}, action: 'auto_answer', answer: 'sure' }); + const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); + expect(d.action).toBe('escalate'); + expect(d.reason).toContain('narrowing predicate'); }); it('escalates when matched auto-answer rule has no answer text', () => { - const r = rule({ action: 'auto_answer', answer: ' ' }); + const r = rule({ match: { maxRisk: 'low' }, action: 'auto_answer', answer: ' ' }); const d = decide(classification({ kind: 'question', risk: 'low' }), [r]); expect(d.action).toBe('escalate'); expect(d.reason).toContain('no answer'); @@ -90,8 +104,8 @@ describe('decide - rule actions', () => { expect(d.matchedRuleId).toBe(r.id); }); - it('honors an explicit ignore rule', () => { - const r = rule({ action: 'ignore' }); + it('honors an explicit ignore rule for a non-high-risk prompt', () => { + const r = rule({ action: 'ignore', match: { maxRisk: 'low' } }); const d = decide(classification({ kind: 'blocked', risk: 'low' }), [r]); expect(d.action).toBe('ignore'); expect(d.matchedRuleId).toBe(r.id); diff --git a/src/main/pianola/pianola-classifier.ts b/src/main/pianola/pianola-classifier.ts index 6696a6b83f..f0ce1750b4 100644 --- a/src/main/pianola/pianola-classifier.ts +++ b/src/main/pianola/pianola-classifier.ts @@ -7,16 +7,20 @@ * tested against fixture transcripts. * * Detection prefers a structured AwaitingInputSignal (authoritative, high - * confidence) and falls back to conservative heuristics over message text. + * confidence) and falls back to conservative heuristics over message text. Risk + * rating lives in pianola-risk.ts. */ import type { AwaitingInputSignal, PianolaClassification, PianolaMessage, - PianolaRisk, PianolaSignalKind, } from '../../shared/pianola/types'; +import { maxRisk, rateRisk } from './pianola-risk'; + +// Re-exported for convenience so callers can pull risk helpers from the classifier. +export { riskAtMost, maxRisk } from './pianola-risk'; /** Phrases that strongly suggest the assistant is asking the user to decide. */ const QUESTION_PHRASES = [ @@ -51,80 +55,21 @@ const BLOCKED_PHRASES = [ 'needs approval', ]; -/** High-risk: destructive, security-sensitive, or irreversible/outward-facing. */ -const HIGH_RISK_TERMS = [ - 'delete', - 'rm -rf', - 'drop table', - 'drop database', - 'truncate', - 'force push', - 'force-push', - 'git push --force', - 'reset --hard', - 'deploy', - 'production', - 'prod ', - 'secret', - 'secrets', - 'password', - 'credential', - 'api key', - 'api-key', - 'token', - 'auth', - 'payment', - 'charge', - 'invoice', - 'migrate', - 'migration', - 'wipe', - 'overwrite', - 'revoke', - 'sudo', -]; - -/** Medium-risk: meaningful but recoverable engineering choices. */ -const MEDIUM_RISK_TERMS = [ - 'install', - 'upgrade', - 'downgrade', - 'bump', - 'dependency', - 'dependencies', - 'package', - 'refactor', - 'rename', - 'move file', - 'delete comment', - 'test strategy', - 'restructure', - 'schema', - 'config', -]; - -const RISK_ORDER: Record = { low: 0, medium: 1, high: 2 }; - -/** True if risk `a` is at most as severe as `b`. */ -export function riskAtMost(a: PianolaRisk, b: PianolaRisk): boolean { - return RISK_ORDER[a] <= RISK_ORDER[b]; -} +/** Slash/paren choice markers: [y/n], (yes/no), [approve/cancel]. */ +const SLASH_CHOICE_RE = /\[[^\]]*\/[^\]]*\]|\((?:y\/n|yes\/no)\)/i; -/** The more severe of two risks. */ -export function maxRisk(a: PianolaRisk, b: PianolaRisk): PianolaRisk { - return RISK_ORDER[a] >= RISK_ORDER[b] ? a : b; -} +/** Two or more numbered options: "1) approve", "2. cancel" at line/segment starts. */ +const NUMBERED_CHOICE_RE = /(?:^|\n|\s)\d+[.)]\s+\S/g; function containsAny(haystack: string, needles: readonly string[]): boolean { return needles.some((n) => haystack.includes(n)); } -/** Rate risk from free text. */ -function rateRisk(text: string): PianolaRisk { - const lower = text.toLowerCase(); - if (containsAny(lower, HIGH_RISK_TERMS)) return 'high'; - if (containsAny(lower, MEDIUM_RISK_TERMS)) return 'medium'; - return 'low'; +/** True if the text presents an explicit set of choices. */ +function hasChoiceMarker(content: string): boolean { + if (SLASH_CHOICE_RE.test(content)) return true; + const numbered = content.match(NUMBERED_CHOICE_RE); + return !!numbered && numbered.length >= 2; } /** Build a short topic summary from a prompt: first sentence/line, trimmed. */ @@ -171,7 +116,8 @@ function classifyFromStructured( ): PianolaClassification { const promptText = signal.prompt ?? message.content; const kind: PianolaSignalKind = signal.kind === 'question' ? 'question' : 'blocked'; - // Permission/plan-review prompts inherit risk from what is being requested. + // Permission/plan-review prompts inherit risk from what is being requested, + // but are never less than medium since they gate an action. let risk = rateRisk(promptText); if (signal.kind === 'permission' || signal.kind === 'plan_review') { risk = maxRisk(risk, 'medium'); @@ -192,25 +138,26 @@ function classifyFromStructured( function classifyHeuristic(message: PianolaMessage): PianolaClassification { const content = message.content ?? ''; const lower = content.toLowerCase(); - const hasQuestionMark = content.includes('?'); + // "trailing question mark only" means the message ends with '?', not that it + // contains one somewhere (diagnostic text often contains stray '?'). + const endsWithQuestion = content.trimEnd().endsWith('?'); const hasQuestionPhrase = containsAny(lower, QUESTION_PHRASES); const hasBlockedPhrase = containsAny(lower, BLOCKED_PHRASES); - // Bracketed prompts like [y/n], (yes/no), 1) ... 2) ... - const hasChoiceMarker = /\[[^\]]*\/[^\]]*\]|\((?:y\/n|yes\/no)\)/i.test(content); + const choiceMarker = hasChoiceMarker(content); let kind: PianolaSignalKind = 'none'; let confidence: PianolaClassification['confidence'] = 'low'; let reason = 'no question or blocked signal detected'; - if (hasQuestionPhrase || hasChoiceMarker) { + if (hasQuestionPhrase || choiceMarker) { kind = 'question'; confidence = 'medium'; - reason = hasChoiceMarker ? 'explicit choice marker in text' : 'question phrase in text'; + reason = choiceMarker ? 'explicit choice marker in text' : 'question phrase in text'; } else if (hasBlockedPhrase) { kind = 'blocked'; confidence = 'medium'; reason = 'blocked phrase in text'; - } else if (hasQuestionMark) { + } else if (endsWithQuestion) { kind = 'question'; confidence = 'low'; reason = 'trailing question mark only'; diff --git a/src/main/pianola/pianola-policy.ts b/src/main/pianola/pianola-policy.ts index 658afbafc4..c14c781b52 100644 --- a/src/main/pianola/pianola-policy.ts +++ b/src/main/pianola/pianola-policy.ts @@ -1,14 +1,16 @@ /** * Pianola policy engine - PURE functions. * - * Given a classification and the user's rules, decide the action. Safety-first: - * - kind 'none' -> ignore - * - high risk -> always escalate (a rule can never auto-answer it) - * - matched rule, auto_answer -> auto-answer (only when risk is below high) - * - matched rule, escalate/ignore-> as the rule says - * - no matching rule -> escalate (we never auto-answer without an explicit rule) + * Given a classification and the user's rules, decide the action. Safety-first, + * in this exact precedence: + * 1. kind 'none' -> ignore + * 2. risk 'high' -> ALWAYS escalate (no rule can suppress it) + * 3. matched auto_answer rule -> auto-answer (rule must narrow its scope + have answer text) + * 4. matched escalate/ignore rule -> as the rule says + * 5. no matching rule -> escalate (we never auto-answer without an explicit rule) * - * No I/O, no Electron, no app state. Unit tested against fixtures. + * The high-risk guard runs before rule actions so an `ignore` rule can never + * silence a high-risk prompt. No I/O, no Electron, no app state. */ import type { @@ -16,7 +18,7 @@ import type { PianolaDecision, PianolaRule, } from '../../shared/pianola/types'; -import { riskAtMost } from './pianola-classifier'; +import { riskAtMost } from './pianola-risk'; /** Context needed to scope-filter rules to the current tab/project. */ export interface PianolaPolicyContext { @@ -24,20 +26,40 @@ export interface PianolaPolicyContext { tabId?: string; } +/** + * Normalize a scope identifier for comparison: trim, unify path separators, + * drop a trailing slash, and lowercase. This makes project-path matching robust + * to Windows casing and slash differences; tab ids (uuids) are unaffected. + */ +function normalizeScopeId(value: string | undefined): string { + if (!value) return ''; + return value.trim().replace(/\\/g, '/').replace(/\/+$/, '').toLowerCase(); +} + /** True if a rule's scope applies in the given context. */ export function ruleAppliesToScope(rule: PianolaRule, ctx: PianolaPolicyContext): boolean { switch (rule.scope) { case 'global': return true; case 'project': - return !!rule.scopeId && rule.scopeId === ctx.projectPath; + return !!rule.scopeId && normalizeScopeId(rule.scopeId) === normalizeScopeId(ctx.projectPath); case 'tab': - return !!rule.scopeId && rule.scopeId === ctx.tabId; + return !!rule.scopeId && normalizeScopeId(rule.scopeId) === normalizeScopeId(ctx.tabId); default: return false; } } +/** True if a rule constrains what it matches (vs matching everything). */ +export function hasNarrowingPredicate(rule: PianolaRule): boolean { + const { match } = rule; + return ( + !!match.maxRisk || + (!!match.kinds && match.kinds.length > 0) || + (!!match.topicIncludes && match.topicIncludes.length > 0) + ); +} + /** True if a rule's match conditions are satisfied by the classification. */ export function ruleMatchesClassification( rule: PianolaRule, @@ -91,6 +113,16 @@ export function decide( const rule = selectRule(classification, rules, ctx); + // High risk always escalates, before any rule action is applied. An ignore or + // auto_answer rule must never be able to suppress a high-risk prompt. + if (classification.risk === 'high') { + return { + action: 'escalate', + matchedRuleId: rule?.id ?? null, + reason: 'high-risk prompt always escalates', + }; + } + if (!rule) { return { action: 'escalate', @@ -108,11 +140,11 @@ export function decide( } // rule.action === 'auto_answer' - if (classification.risk === 'high') { + if (!hasNarrowingPredicate(rule)) { return { action: 'escalate', matchedRuleId: rule.id, - reason: 'high-risk prompt always escalates, overriding auto-answer rule', + reason: 'auto-answer rule has no narrowing predicate; too broad to auto-answer', }; } diff --git a/src/main/pianola/pianola-risk.ts b/src/main/pianola/pianola-risk.ts new file mode 100644 index 0000000000..c3a9f1f968 --- /dev/null +++ b/src/main/pianola/pianola-risk.ts @@ -0,0 +1,96 @@ +/** + * Pianola risk rating - PURE. + * + * Rates how risky it would be to act on a detected prompt, ordered + * low < medium < high. Kept in its own module so the taxonomy is easy to audit, + * extend, and test in isolation from the classifier. + * + * Matching uses word-boundary regexes, not raw substrings, so `auth` does not + * match `author` and `token` does not match `tokenizer`. High is checked before + * medium, so the most severe match wins. + * + * Safety bias: when in doubt we rate higher. The policy escalates anything high + * unconditionally and only auto-answers on an explicit rule, so an over-estimate + * costs an extra escalation (safe) while an under-estimate could auto-answer + * something dangerous (unsafe). + */ + +import type { PianolaRisk } from '../../shared/pianola/types'; + +const RISK_ORDER: Record = { low: 0, medium: 1, high: 2 }; + +/** True if risk `a` is at most as severe as `b`. */ +export function riskAtMost(a: PianolaRisk, b: PianolaRisk): boolean { + return RISK_ORDER[a] <= RISK_ORDER[b]; +} + +/** The more severe of two risks. */ +export function maxRisk(a: PianolaRisk, b: PianolaRisk): PianolaRisk { + return RISK_ORDER[a] >= RISK_ORDER[b] ? a : b; +} + +/** + * High risk: destructive, irreversible, security-sensitive, financial, or + * outward-facing actions. Single common words (push, merge, send, delete) are + * qualified so everyday prose does not trip them. + */ +const HIGH_RISK_PATTERNS: readonly RegExp[] = [ + // Destructive filesystem / data + /\brm\s+-rf\b/i, + /\bdrop\s+(table|database)\b/i, + /\b(wipe|wiping|truncate|truncating|destroy|destroying|purge|purging)\b/i, + /\bdelete\s+(the\s+|all\s+|every\s+)?(file|files|data|database|table|branch|repo|repository|directory|folder|account|user|everything)\b/i, + // Version control - irreversible or outward-facing + /\bforce[-\s]?push(ing)?\b/i, + /\bgit\s+push\b/i, + /\bpush\s+(to|--force|origin|remote|upstream)\b/i, + /\breset\s+--hard\b/i, + /\b(rebase|rebasing)\b/i, + /\bmerge\s+(the\s+)?(pr|pull\s+request|branch|into\s+\w+|to\s+main|to\s+master)\b/i, + /\b(revert|reverting|revoke|revoking)\b/i, + // Deploy / publish / release + /\b(deploy|deploying|deployment|publish|publishing|release|releasing)\b/i, + /\bproduction\b/i, + /\bprod\b/i, + /\bship\s+(it|to)\b/i, + // Secrets / auth / permissions + /\b(secret|secrets|password|passwords|credential|credentials|passphrase)\b/i, + /\bapi[-\s]?keys?\b/i, + /\bprivate\s+keys?\b/i, + /\b(access|auth)\s+tokens?\b/i, + /\btokens?\b/i, + /\bauth(entication|orization|orize)?\b/i, + /\.env\b/i, + /\b(chmod|chown|sudo)\b/i, + // Financial / outward communication + /\b(payment|payments|charge|charging|invoice|billing|refund)\b/i, + /\bsend\s+(an?\s+)?(email|message|slack|text|dm)\b/i, + /\b(email|emailing)\b/i, + /\bpublish\s+(a\s+)?(post|article|comment)\b/i, +]; + +/** + * Medium risk: meaningful but recoverable engineering choices. + */ +const MEDIUM_RISK_PATTERNS: readonly RegExp[] = [ + /\b(install|installing|uninstall|upgrade|upgrading|downgrade|downgrading|bump|bumping)\b/i, + /\b(dependency|dependencies|package|packages)\b/i, + /\b(refactor|refactoring|rename|renaming|restructure|restructuring)\b/i, + /\b(migrate|migrating|migration|migrations)\b/i, + /\b(schema|config|configuration|configure)\b/i, + /\b(delete|deleting|remove|removing|overwrite|overwriting)\b/i, + /\b(move|moving|create|creating)\s+(a\s+|the\s+)?files?\b/i, + /\bcommit(ting)?\b/i, + /\btest\s+strategy\b/i, +]; + +function matchesAny(text: string, patterns: readonly RegExp[]): boolean { + return patterns.some((re) => re.test(text)); +} + +/** Rate the risk implied by free text. High is checked before medium. */ +export function rateRisk(text: string): PianolaRisk { + if (matchesAny(text, HIGH_RISK_PATTERNS)) return 'high'; + if (matchesAny(text, MEDIUM_RISK_PATTERNS)) return 'medium'; + return 'low'; +} diff --git a/src/shared/pianola/types.ts b/src/shared/pianola/types.ts index 8660d6ba16..a4ad1a2d3f 100644 --- a/src/shared/pianola/types.ts +++ b/src/shared/pianola/types.ts @@ -106,11 +106,12 @@ export interface PianolaRule { description?: string; } -/** Decision the policy engine produced for a classification. */ -export interface PianolaDecision { - action: PianolaActionKind; - /** Present iff action === 'auto_answer'. */ - answer?: string; - matchedRuleId: string | null; - reason: string; -} +/** + * Decision the policy engine produced for a classification. Discriminated on + * `action` so `answer` only exists on an auto_answer decision - impossible + * states (e.g. an escalate carrying an answer) are unrepresentable. + */ +export type PianolaDecision = + | { action: 'auto_answer'; answer: string; matchedRuleId: string | null; reason: string } + | { action: 'escalate'; matchedRuleId: string | null; reason: string } + | { action: 'ignore'; matchedRuleId: string | null; reason: string }; From 1de70103e6b0efd1debdd652c1a8bfb49be426e5 Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Thu, 25 Jun 2026 00:01:40 -0500 Subject: [PATCH 03/76] feat(pianola): add pure structured awaiting-input detector Step 2 of the Pianola plan, realized as a pure detector module rather than parser hot-path surgery (more maintainable; no parser/IPC/WebSocket contract changes, and the watcher derives the signal from session-show output it already has). - pianola-awaiting-detector.ts: detectAwaitingInput(content) returns a typed AwaitingInputSignal (precedence plan_review > permission > choice > question) with extracted options (numbered and slash/bracket forms); enrichWithAwaiting Input(messages) fills it onto assistant turns immutably before classification. - The classifier already treats a present signal as authoritative (high confidence), so structured prompts now upgrade from heuristic to structured. - Tests cover each kind, option extraction, null cases, immutability, and the detector+classifier integration. Typecheck clean; 64 Pianola tests pass. --- Plans/pianola-implementation-plan.md | 20 ++- .../pianola/pianola-awaiting-detector.test.ts | 123 ++++++++++++++++++ src/main/pianola/pianola-awaiting-detector.ts | 121 +++++++++++++++++ 3 files changed, 258 insertions(+), 6 deletions(-) create mode 100644 src/__tests__/main/pianola/pianola-awaiting-detector.test.ts create mode 100644 src/main/pianola/pianola-awaiting-detector.ts diff --git a/Plans/pianola-implementation-plan.md b/Plans/pianola-implementation-plan.md index ec83ec4b65..21c169e0cf 100644 --- a/Plans/pianola-implementation-plan.md +++ b/Plans/pianola-implementation-plan.md @@ -48,12 +48,20 @@ Files (verified line refs): - Tests: fixture transcripts in `src/__tests__/main/pianola/` covering question/blocked/none + low/med/high. Pure functions, no I/O, no app - the brain, fully unit-tested first. -### Step 2 - Structured awaiting-input signal (narrow) [NEXT] - -- Extend `ParsedEvent` (`src/main/parsers/agent-output-parser.ts`) with `awaitingInput?: { kind; prompt?; options? }`. -- Populate in `claude-output-parser.ts` for unambiguous cases (plan-mode confirm, explicit `[y/n]`, known permission strings). -- Thread through `LogEntry` and the WS `SessionHistoryMessage` payload (`web-server/types.ts`) so the watcher sees it. -- Tests against captured transcripts. +### Step 2 - Structured awaiting-input signal (narrow) [DONE] + +Refinement vs the original plan: implemented as a pure detector module +(`src/main/pianola/pianola-awaiting-detector.ts`) instead of surgery on the +parser hot path. Rationale (maintainability-first): the watcher consumes +`session show --json` (the `SessionHistoryMessage` shape, which has no +awaiting-input field), so deriving the signal in a pure, isolated, fully-tested +module keeps Pianola cohesive and avoids changing the parser / IPC / WebSocket +contracts. `detectAwaitingInput(content)` returns a typed `AwaitingInputSignal` +(plan_review > permission > choice > question) with extracted options; +`enrichWithAwaitingInput(messages)` fills it onto assistant turns before the +classifier runs (which already treats a present signal as authoritative). +Threading a signal through the parser/WS layers remains a possible future +optimization but is not needed for the feature to work. ### Step 3 - Storage [NEXT] diff --git a/src/__tests__/main/pianola/pianola-awaiting-detector.test.ts b/src/__tests__/main/pianola/pianola-awaiting-detector.test.ts new file mode 100644 index 0000000000..096dc31906 --- /dev/null +++ b/src/__tests__/main/pianola/pianola-awaiting-detector.test.ts @@ -0,0 +1,123 @@ +/** + * @file pianola-awaiting-detector.test.ts + * @description Unit tests for the pure structured awaiting-input detector. + */ + +import { describe, it, expect } from 'vitest'; +import { + detectAwaitingInput, + enrichWithAwaitingInput, +} from '../../../main/pianola/pianola-awaiting-detector'; +import { classifyMessages } from '../../../main/pianola/pianola-classifier'; +import type { PianolaMessage } from '../../../shared/pianola/types'; + +let seq = 0; +function msg(role: PianolaMessage['role'], content: string): PianolaMessage { + seq += 1; + return { + id: `m${seq}`, + role, + source: role === 'assistant' ? 'ai' : role, + content, + timestamp: new Date(Date.UTC(2026, 0, 1, 0, 0, seq)).toISOString(), + }; +} + +describe('detectAwaitingInput - kinds', () => { + it('detects plan review', () => { + const s = detectAwaitingInput("Here's the plan: refactor the parser. Ready to code?"); + expect(s?.kind).toBe('plan_review'); + }); + + it('detects a permission request', () => { + const s = detectAwaitingInput('Do you want me to run the migration now?'); + expect(s?.kind).toBe('permission'); + }); + + it('detects an explicit choice and extracts numbered options', () => { + const s = detectAwaitingInput('How should I proceed? 1) keep it 2) remove it 3) rename it'); + expect(s?.kind).toBe('choice'); + expect(s?.options).toEqual(['keep it', 'remove it', 'rename it']); + }); + + it('extracts slash-bracket options', () => { + const s = detectAwaitingInput('Proceed with the change? [keep/discard]'); + expect(s?.options).toEqual(['keep', 'discard']); + }); + + it('detects a direct question', () => { + const s = detectAwaitingInput('Should I use tabs or spaces?'); + expect(s?.kind).toBe('question'); + expect(s?.prompt).toContain('Should I use tabs or spaces?'); + }); + + it('returns null for a plain statement', () => { + expect(detectAwaitingInput('I updated the README and ran the tests.')).toBeNull(); + }); + + it('returns null for question intent that is not actually a question', () => { + // No trailing question mark -> left to the classifier heuristics, not structured. + expect(detectAwaitingInput('I will decide which option is best and continue.')).toBeNull(); + }); + + it('returns null for empty content', () => { + expect(detectAwaitingInput(' ')).toBeNull(); + }); + + it('prefers plan review over permission when both phrasings appear', () => { + const s = detectAwaitingInput('Here is the plan. May I proceed with the plan?'); + expect(s?.kind).toBe('plan_review'); + }); +}); + +describe('enrichWithAwaitingInput', () => { + it('fills awaitingInput on assistant messages without mutating inputs', () => { + const input = [msg('user', 'go'), msg('assistant', 'Do you want me to deploy to production?')]; + const frozenContent = input[1].content; + const out = enrichWithAwaitingInput(input); + expect(out[1].awaitingInput?.kind).toBe('permission'); + expect(input[1].awaitingInput).toBeUndefined(); // original untouched + expect(input[1].content).toBe(frozenContent); + }); + + it('leaves non-assistant messages and plain statements alone', () => { + const out = enrichWithAwaitingInput([ + msg('user', 'should I do it?'), + msg('assistant', 'Done, all tests pass.'), + ]); + expect(out[0].awaitingInput).toBeUndefined(); + expect(out[1].awaitingInput).toBeUndefined(); + }); + + it('does not overwrite a pre-existing signal', () => { + const pre: PianolaMessage = { + ...msg('assistant', 'anything?'), + awaitingInput: { kind: 'choice' }, + }; + const out = enrichWithAwaitingInput([pre]); + expect(out[0].awaitingInput?.kind).toBe('choice'); + }); +}); + +describe('detector + classifier integration', () => { + it('upgrades a permission prompt to a high-confidence structured classification', () => { + const enriched = enrichWithAwaitingInput([ + msg('assistant', 'Do you want me to force push the branch?'), + ]); + const c = classifyMessages(enriched); + expect(c.evidence.structured).toBe(true); + expect(c.confidence).toBe('high'); + expect(c.kind).toBe('blocked'); // permission maps to blocked + expect(c.risk).toBe('high'); // force push + }); + + it('keeps a low-risk auto-answerable choice as a structured choice signal', () => { + const enriched = enrichWithAwaitingInput([ + msg('assistant', 'Which name do you prefer? 1) count 2) total'), + ]); + const c = classifyMessages(enriched); + expect(c.evidence.structured).toBe(true); + expect(c.kind).toBe('blocked'); // choice maps to blocked (awaiting a pick) + expect(c.risk).toBe('low'); + }); +}); diff --git a/src/main/pianola/pianola-awaiting-detector.ts b/src/main/pianola/pianola-awaiting-detector.ts new file mode 100644 index 0000000000..c2bfa1eb44 --- /dev/null +++ b/src/main/pianola/pianola-awaiting-detector.ts @@ -0,0 +1,121 @@ +/** + * Pianola awaiting-input detector - PURE functions. + * + * Derives a structured AwaitingInputSignal from an assistant message's text: + * the unambiguous, high-confidence cases where an agent is clearly waiting on + * the user (permission requests, plan-mode review, explicit choices, direct + * questions), plus the extracted options. + * + * Why a detector module rather than the parser hot path: the watcher consumes + * `maestro-cli session show --json` (the SessionHistoryMessage shape, which has + * no awaiting-input field), so deriving the signal here keeps Pianola cohesive, + * avoids touching the parser/IPC/WebSocket contracts, and stays pure and + * testable. The classifier treats a returned signal as authoritative; weaker + * cues fall through to its heuristics. + * + * Precedence: plan_review > permission > choice > question. + */ + +import type { AwaitingInputSignal, PianolaMessage } from '../../shared/pianola/types'; + +/** Plan-mode review: agent is presenting a plan and asking to proceed. */ +const PLAN_REVIEW_RE = + /\b(here'?s\s+(the|my)\s+plan|ready\s+to\s+(code|implement|build)|exit\s+plan\s+mode|proceed\s+with\s+(this|the)\s+plan|approve\s+(the|this)\s+plan|does\s+this\s+plan\s+look|shall\s+i\s+implement\s+(this|the)\s+plan)\b/i; + +/** Permission request: agent is asking to be allowed to do something. */ +const PERMISSION_RE = + /\b(do you want me to|would you like me to|may i\b|can i\b|allow me to|permission to|do you approve|is it ok(?:ay)?\s+to|shall i (?:go ahead|proceed)|want me to proceed|ok to proceed|confirm before i)\b/i; + +/** Direct decision question intent (only counts when the text is a question). */ +const QUESTION_INTENT_RE = + /\b(should i|shall i|which (?:one|option|approach|do you)|would you prefer|do you want|how (?:should|would) (?:i|we)|what (?:should|would) (?:i|we))\b/i; + +/** Extract a discrete option list, if the text presents one. */ +function extractOptions(content: string): string[] { + // Slash/bracket form: [keep/discard], (yes/no) + const bracket = content.match(/[[(]([^\])]*\/[^\])]*)[\])]/); + if (bracket) { + const parts = bracket[1] + .split('/') + .map((s) => s.trim()) + .filter(Boolean); + if (parts.length >= 2) return parts; + } + // Numbered form: "1) keep it 2) remove it" (inline or across lines) + const numbered = [...content.matchAll(/(?:^|\n|\s)\d+[.)]\s+([^\n]+?)(?=(?:\s+\d+[.)]\s)|\n|$)/g)] + .map((m) => m[1].trim()) + .filter(Boolean); + if (numbered.length >= 2) return numbered; + + return []; +} + +/** A short prompt string: the last question sentence, else the first line. */ +function extractPrompt(content: string): string | undefined { + const trimmed = content.trim(); + if (!trimmed) return undefined; + const questions = trimmed.match(/[^.?!\n]*\?/g); + const lastQuestion = + questions && questions.length > 0 ? questions[questions.length - 1].trim() : ''; + const firstLine = trimmed + .split('\n') + .map((l) => l.trim()) + .find(Boolean); + const prompt = lastQuestion || firstLine || trimmed; + return prompt.length > 200 ? `${prompt.slice(0, 197)}...` : prompt; +} + +/** Build a signal, omitting an empty options array. */ +function signal( + kind: AwaitingInputSignal['kind'], + prompt: string | undefined, + options: string[] +): AwaitingInputSignal { + return options.length > 0 ? { kind, prompt, options } : { kind, prompt }; +} + +/** + * Detect a structured awaiting-input signal in assistant text, or null when + * there is no high-confidence signal. + */ +export function detectAwaitingInput(content: string): AwaitingInputSignal | null { + if (!content || !content.trim()) return null; + + const options = extractOptions(content); + const prompt = extractPrompt(content); + + if (PLAN_REVIEW_RE.test(content)) { + return signal('plan_review', prompt, options); + } + if (PERMISSION_RE.test(content)) { + return signal('permission', prompt, options); + } + if (options.length >= 2) { + return signal('choice', prompt, options); + } + // A question intent only becomes a structured signal when the text is + // actually a question; weaker phrasing is left to the classifier heuristics. + if (QUESTION_INTENT_RE.test(content) && content.trim().endsWith('?')) { + return signal('question', prompt, options); + } + + return null; +} + +/** True if the message is the assistant speaking. */ +function isAssistant(message: PianolaMessage): boolean { + return message.role === 'assistant' || message.source === 'ai'; +} + +/** + * Return a copy of the transcript with structured awaiting-input signals filled + * in for assistant messages that don't already carry one. Pure: inputs are not + * mutated. The watcher runs this before handing messages to the classifier. + */ +export function enrichWithAwaitingInput(messages: readonly PianolaMessage[]): PianolaMessage[] { + return messages.map((message) => { + if (message.awaitingInput || !isAssistant(message)) return message; + const detected = detectAwaitingInput(message.content); + return detected ? { ...message, awaitingInput: detected } : message; + }); +} From 73f96dd241c8cebced1b9c4907102f70fb52b35f Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Thu, 25 Jun 2026 00:14:19 -0500 Subject: [PATCH 04/76] feat(pianola): storage, watch loop, and gated CLI commands Steps 3-4 of the plan; Pianola is now runnable end-to-end from the CLI. Layering: moved the pure brain (classifier, policy, risk, awaiting-detector) from src/main/pianola to src/shared/pianola, since both the CLI watcher and the future in-app engine consume it. Tests moved alongside. Storage: - shared/pianola/storage.ts: filenames, the PianolaDecisionRecord type, and a pure rule validator (drops malformed rules so a bad hand-edit cannot break the engine). - cli/services/pianola-store.ts: reads the rules file and appends to the JSON Lines decision audit log in the Maestro config dir (shared with the desktop, no native deps). Watcher + CLI: - shared/pianola/pianola-watcher.ts: runWatchIteration ties enrich -> classify -> decide -> dispatch/record with all I/O injected, so it is fully unit tested and reusable by a future engine. Dedups by last-handled assistant message id. - cli/commands/pianola.ts: `pianola watch ` polls get_session_history and acts per rules (--dry-run, --interval, --once, --agent); `pianola rules` and `pianola log` are read views. Every command hard-gates on the pianola Encore flag. Typecheck clean across all configs; 108 Pianola/encore tests pass. --- src/__tests__/cli/commands/pianola.test.ts | 61 +++++ .../cli/services/pianola-store.test.ts | 152 ++++++++++++ .../pianola/pianola-awaiting-detector.test.ts | 4 +- .../pianola/pianola-classifier.test.ts | 2 +- .../pianola/pianola-message-contract.test.ts | 0 .../pianola/pianola-policy.test.ts | 2 +- .../shared/pianola/pianola-watcher.test.ts | 175 ++++++++++++++ src/__tests__/shared/pianola/storage.test.ts | 76 ++++++ src/cli/commands/pianola.ts | 220 ++++++++++++++++++ src/cli/index.ts | 33 ++- src/cli/services/pianola-store.ts | 91 ++++++++ .../pianola/pianola-awaiting-detector.ts | 2 +- .../pianola/pianola-classifier.ts | 2 +- .../pianola/pianola-policy.ts | 6 +- src/{main => shared}/pianola/pianola-risk.ts | 2 +- src/shared/pianola/pianola-watcher.ts | 141 +++++++++++ src/shared/pianola/storage.ts | 132 +++++++++++ src/shared/pianola/types.ts | 2 +- 18 files changed, 1089 insertions(+), 14 deletions(-) create mode 100644 src/__tests__/cli/commands/pianola.test.ts create mode 100644 src/__tests__/cli/services/pianola-store.test.ts rename src/__tests__/{main => shared}/pianola/pianola-awaiting-detector.test.ts (96%) rename src/__tests__/{main => shared}/pianola/pianola-classifier.test.ts (99%) rename src/__tests__/{main => shared}/pianola/pianola-message-contract.test.ts (100%) rename src/__tests__/{main => shared}/pianola/pianola-policy.test.ts (99%) create mode 100644 src/__tests__/shared/pianola/pianola-watcher.test.ts create mode 100644 src/__tests__/shared/pianola/storage.test.ts create mode 100644 src/cli/commands/pianola.ts create mode 100644 src/cli/services/pianola-store.ts rename src/{main => shared}/pianola/pianola-awaiting-detector.ts (98%) rename src/{main => shared}/pianola/pianola-classifier.ts (99%) rename src/{main => shared}/pianola/pianola-policy.ts (97%) rename src/{main => shared}/pianola/pianola-risk.ts (98%) create mode 100644 src/shared/pianola/pianola-watcher.ts create mode 100644 src/shared/pianola/storage.ts diff --git a/src/__tests__/cli/commands/pianola.test.ts b/src/__tests__/cli/commands/pianola.test.ts new file mode 100644 index 0000000000..34929114ef --- /dev/null +++ b/src/__tests__/cli/commands/pianola.test.ts @@ -0,0 +1,61 @@ +/** + * @file pianola.test.ts + * @description Tests for the Pianola CLI command gating and read commands. + */ + +import { describe, it, expect, vi, beforeEach, type MockInstance } from 'vitest'; + +vi.mock('../../../cli/services/storage', () => ({ readSettingValue: vi.fn() })); +vi.mock('../../../cli/services/pianola-store', () => ({ + readPianolaRules: vi.fn(() => []), + appendPianolaDecision: vi.fn(), + readPianolaDecisions: vi.fn(() => []), +})); + +import { pianolaRules, pianolaLog } from '../../../cli/commands/pianola'; +import { readSettingValue } from '../../../cli/services/storage'; +import { readPianolaRules, readPianolaDecisions } from '../../../cli/services/pianola-store'; + +describe('pianola command gating', () => { + let consoleSpy: MockInstance; + let errorSpy: MockInstance; + let exitSpy: MockInstance; + + beforeEach(() => { + vi.clearAllMocks(); + consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('__exit__'); + }); + }); + + it('blocks rules when the pianola Encore flag is off', () => { + vi.mocked(readSettingValue).mockReturnValue({ pianola: false }); + expect(() => pianolaRules({})).toThrow('__exit__'); + expect(errorSpy).toHaveBeenCalledWith(expect.stringContaining('encore set pianola on')); + expect(readPianolaRules).not.toHaveBeenCalled(); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it('emits a JSON disabled error when --json is set', () => { + vi.mocked(readSettingValue).mockReturnValue(undefined); + expect(() => pianolaLog({ json: true })).toThrow('__exit__'); + const payload = JSON.parse(consoleSpy.mock.calls[0][0]); + expect(payload).toMatchObject({ success: false, code: 'PIANOLA_DISABLED' }); + }); + + it('lists rules when the flag is on', () => { + vi.mocked(readSettingValue).mockReturnValue({ pianola: true }); + pianolaRules({}); + expect(readPianolaRules).toHaveBeenCalled(); + expect(consoleSpy).toHaveBeenCalledWith('No Pianola rules defined.'); + }); + + it('shows the decision log when the flag is on', () => { + vi.mocked(readSettingValue).mockReturnValue({ pianola: true }); + pianolaLog({}); + expect(readPianolaDecisions).toHaveBeenCalled(); + expect(consoleSpy).toHaveBeenCalledWith('No Pianola decisions recorded yet.'); + }); +}); diff --git a/src/__tests__/cli/services/pianola-store.test.ts b/src/__tests__/cli/services/pianola-store.test.ts new file mode 100644 index 0000000000..2492304bf9 --- /dev/null +++ b/src/__tests__/cli/services/pianola-store.test.ts @@ -0,0 +1,152 @@ +/** + * @file pianola-store.test.ts + * @description Tests for the Pianola CLI storage (rules read + decision log). + * Uses a temp MAESTRO_USER_DATA dir so reads/writes are isolated. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { + readPianolaRules, + appendPianolaDecision, + readPianolaDecisions, +} from '../../../cli/services/pianola-store'; +import { + PIANOLA_RULES_FILENAME, + PIANOLA_DECISIONS_FILENAME, + type PianolaDecisionRecord, +} from '../../../shared/pianola/storage'; + +let tmpDir: string; +let prevEnv: string | undefined; + +beforeEach(() => { + prevEnv = process.env.MAESTRO_USER_DATA; + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pianola-store-')); + process.env.MAESTRO_USER_DATA = tmpDir; +}); + +afterEach(() => { + if (prevEnv === undefined) delete process.env.MAESTRO_USER_DATA; + else process.env.MAESTRO_USER_DATA = prevEnv; + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +function writeRulesFile(content: string): void { + fs.writeFileSync(path.join(tmpDir, PIANOLA_RULES_FILENAME), content, 'utf-8'); +} + +function decisionRecord(id: string): PianolaDecisionRecord { + return { + id, + timestamp: '2026-01-01T00:00:00.000Z', + tabId: 'tab-1', + agentId: 'agent-1', + classification: { + kind: 'question', + risk: 'low', + topic: 'tabs', + confidence: 'medium', + evidence: { messageId: 'm1', reason: 'test', structured: false }, + }, + decision: { action: 'escalate', matchedRuleId: null, reason: 'default' }, + dispatched: false, + dryRun: true, + }; +} + +describe('readPianolaRules', () => { + it('returns [] when the rules file is missing', () => { + expect(readPianolaRules()).toEqual([]); + }); + + it('reads a bare array of rules', () => { + writeRulesFile( + JSON.stringify([ + { + id: 'r1', + enabled: true, + scope: 'global', + match: { maxRisk: 'low' }, + action: 'auto_answer', + answer: 'ok', + priority: 1, + createdAt: 1, + updatedAt: 1, + }, + ]) + ); + expect(readPianolaRules().map((r) => r.id)).toEqual(['r1']); + }); + + it('reads an electron-store style { rules: [...] } object', () => { + writeRulesFile( + JSON.stringify({ + rules: [ + { + id: 'r2', + enabled: true, + scope: 'global', + match: {}, + action: 'escalate', + priority: 1, + createdAt: 1, + updatedAt: 1, + }, + ], + }) + ); + expect(readPianolaRules().map((r) => r.id)).toEqual(['r2']); + }); + + it('returns [] for malformed JSON', () => { + writeRulesFile('{ not json'); + expect(readPianolaRules()).toEqual([]); + }); + + it('drops individual invalid rules', () => { + writeRulesFile( + JSON.stringify([ + { + id: 'good', + enabled: true, + scope: 'global', + action: 'escalate', + priority: 1, + createdAt: 1, + updatedAt: 1, + }, + { id: 'bad', scope: 'planet' }, + ]) + ); + expect(readPianolaRules().map((r) => r.id)).toEqual(['good']); + }); +}); + +describe('decision audit log', () => { + it('returns [] when the log is missing', () => { + expect(readPianolaDecisions()).toEqual([]); + }); + + it('appends and reads back records in order', () => { + appendPianolaDecision(decisionRecord('d1')); + appendPianolaDecision(decisionRecord('d2')); + expect(readPianolaDecisions().map((r) => r.id)).toEqual(['d1', 'd2']); + }); + + it('honors a tail limit', () => { + appendPianolaDecision(decisionRecord('d1')); + appendPianolaDecision(decisionRecord('d2')); + appendPianolaDecision(decisionRecord('d3')); + expect(readPianolaDecisions(2).map((r) => r.id)).toEqual(['d2', 'd3']); + }); + + it('skips a corrupt line without failing the read', () => { + appendPianolaDecision(decisionRecord('d1')); + fs.appendFileSync(path.join(tmpDir, PIANOLA_DECISIONS_FILENAME), 'not json\n', 'utf-8'); + appendPianolaDecision(decisionRecord('d2')); + expect(readPianolaDecisions().map((r) => r.id)).toEqual(['d1', 'd2']); + }); +}); diff --git a/src/__tests__/main/pianola/pianola-awaiting-detector.test.ts b/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts similarity index 96% rename from src/__tests__/main/pianola/pianola-awaiting-detector.test.ts rename to src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts index 096dc31906..bd46c5eae6 100644 --- a/src/__tests__/main/pianola/pianola-awaiting-detector.test.ts +++ b/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts @@ -7,8 +7,8 @@ import { describe, it, expect } from 'vitest'; import { detectAwaitingInput, enrichWithAwaitingInput, -} from '../../../main/pianola/pianola-awaiting-detector'; -import { classifyMessages } from '../../../main/pianola/pianola-classifier'; +} from '../../../shared/pianola/pianola-awaiting-detector'; +import { classifyMessages } from '../../../shared/pianola/pianola-classifier'; import type { PianolaMessage } from '../../../shared/pianola/types'; let seq = 0; diff --git a/src/__tests__/main/pianola/pianola-classifier.test.ts b/src/__tests__/shared/pianola/pianola-classifier.test.ts similarity index 99% rename from src/__tests__/main/pianola/pianola-classifier.test.ts rename to src/__tests__/shared/pianola/pianola-classifier.test.ts index 8605ce501d..8ecc2152c2 100644 --- a/src/__tests__/main/pianola/pianola-classifier.test.ts +++ b/src/__tests__/shared/pianola/pianola-classifier.test.ts @@ -4,7 +4,7 @@ */ import { describe, it, expect } from 'vitest'; -import { classifyMessages, riskAtMost, maxRisk } from '../../../main/pianola/pianola-classifier'; +import { classifyMessages, riskAtMost, maxRisk } from '../../../shared/pianola/pianola-classifier'; import type { AwaitingInputSignal, PianolaMessage } from '../../../shared/pianola/types'; let seq = 0; diff --git a/src/__tests__/main/pianola/pianola-message-contract.test.ts b/src/__tests__/shared/pianola/pianola-message-contract.test.ts similarity index 100% rename from src/__tests__/main/pianola/pianola-message-contract.test.ts rename to src/__tests__/shared/pianola/pianola-message-contract.test.ts diff --git a/src/__tests__/main/pianola/pianola-policy.test.ts b/src/__tests__/shared/pianola/pianola-policy.test.ts similarity index 99% rename from src/__tests__/main/pianola/pianola-policy.test.ts rename to src/__tests__/shared/pianola/pianola-policy.test.ts index 9890043b50..4c975de1fd 100644 --- a/src/__tests__/main/pianola/pianola-policy.test.ts +++ b/src/__tests__/shared/pianola/pianola-policy.test.ts @@ -9,7 +9,7 @@ import { selectRule, ruleAppliesToScope, ruleMatchesClassification, -} from '../../../main/pianola/pianola-policy'; +} from '../../../shared/pianola/pianola-policy'; import type { PianolaClassification, PianolaRisk, diff --git a/src/__tests__/shared/pianola/pianola-watcher.test.ts b/src/__tests__/shared/pianola/pianola-watcher.test.ts new file mode 100644 index 0000000000..9330086e08 --- /dev/null +++ b/src/__tests__/shared/pianola/pianola-watcher.test.ts @@ -0,0 +1,175 @@ +/** + * @file pianola-watcher.test.ts + * @description Tests for the dependency-injected watch iteration. + */ + +import { describe, it, expect, vi } from 'vitest'; +import { + runWatchIteration, + initialWatchState, + type WatchDeps, + type WatchTarget, +} from '../../../shared/pianola/pianola-watcher'; +import type { PianolaMessage, PianolaRule } from '../../../shared/pianola/types'; +import type { PianolaDecisionRecord } from '../../../shared/pianola/storage'; + +let seq = 0; +function assistant(content: string): PianolaMessage { + seq += 1; + return { + id: `m${seq}`, + role: 'assistant', + source: 'ai', + content, + timestamp: new Date(Date.UTC(2026, 0, 1, 0, 0, seq)).toISOString(), + }; +} + +function autoAnswerRule(): PianolaRule { + return { + id: 'rule-1', + enabled: true, + scope: 'global', + match: { maxRisk: 'low', kinds: ['question'] }, + action: 'auto_answer', + answer: 'Use tabs.', + priority: 1, + createdAt: 1, + updatedAt: 1, + }; +} + +function makeDeps(over: Partial = {}): { + deps: WatchDeps; + records: PianolaDecisionRecord[]; + dispatch: ReturnType; +} { + const records: PianolaDecisionRecord[] = []; + const dispatch = vi.fn(async () => ({ + success: true as boolean, + error: undefined as string | undefined, + })); + const deps: WatchDeps = { + readRules: () => [], + dispatch, + recordDecision: (r) => records.push(r), + now: () => '2026-01-01T00:00:00.000Z', + genId: () => 'decision-id', + log: () => {}, + ...over, + }; + return { deps, records, dispatch }; +} + +const target: WatchTarget = { tabId: 'tab-1', agentId: 'agent-1' }; + +describe('runWatchIteration', () => { + it('does nothing for a non-actionable transcript', async () => { + const { deps, records, dispatch } = makeDeps(); + const { result } = await runWatchIteration( + [assistant('All tests pass and the build is green.')], + target, + initialWatchState(), + deps, + { dryRun: false } + ); + expect(result.acted).toBe(false); + expect(result.decision).toBeNull(); + expect(records).toHaveLength(0); + expect(dispatch).not.toHaveBeenCalled(); + }); + + it('auto-answers a low-risk question matched by a rule and records it dispatched', async () => { + const { deps, records, dispatch } = makeDeps({ readRules: () => [autoAnswerRule()] }); + const { result } = await runWatchIteration( + [assistant('Should I name it count or total?')], + target, + initialWatchState(), + deps, + { dryRun: false } + ); + expect(result.decision?.action).toBe('auto_answer'); + expect(dispatch).toHaveBeenCalledWith(target, 'Use tabs.'); + expect(records[0].dispatched).toBe(true); + expect(records[0].dryRun).toBe(false); + }); + + it('does not dispatch in dry-run mode but still records the decision', async () => { + const { deps, records, dispatch } = makeDeps({ readRules: () => [autoAnswerRule()] }); + const { result } = await runWatchIteration( + [assistant('Should I name it count or total?')], + target, + initialWatchState(), + deps, + { dryRun: true } + ); + expect(result.decision?.action).toBe('auto_answer'); + expect(dispatch).not.toHaveBeenCalled(); + expect(records[0].dispatched).toBe(false); + expect(records[0].dryRun).toBe(true); + }); + + it('escalates and records when no rule matches', async () => { + const { deps, records, dispatch } = makeDeps(); + const { result } = await runWatchIteration( + [assistant('Should I name it count or total?')], + target, + initialWatchState(), + deps, + { dryRun: false } + ); + expect(result.decision?.action).toBe('escalate'); + expect(dispatch).not.toHaveBeenCalled(); + expect(records).toHaveLength(1); + }); + + it('records a dispatch failure on the audit entry', async () => { + const dispatch = vi.fn(async () => ({ success: false, error: 'session busy' })); + const { deps, records } = makeDeps({ readRules: () => [autoAnswerRule()], dispatch }); + await runWatchIteration( + [assistant('Should I name it count or total?')], + target, + initialWatchState(), + deps, + { dryRun: false } + ); + expect(records[0].dispatched).toBe(false); + expect(records[0].error).toBe('session busy'); + }); + + it('does not re-handle the same prompt on the next iteration', async () => { + const { deps, records } = makeDeps(); + const messages = [assistant('Should I deploy to production?')]; + + const first = await runWatchIteration(messages, target, initialWatchState(), deps, { + dryRun: false, + }); + expect(first.result.acted).toBe(true); + expect(records).toHaveLength(1); + + const second = await runWatchIteration(messages, target, first.state, deps, { dryRun: false }); + expect(second.result.acted).toBe(false); + expect(second.result.skipped).toContain('already handled'); + expect(records).toHaveLength(1); // no new record + }); + + it('handles a fresh prompt after the previous one was handled', async () => { + const { deps, records } = makeDeps(); + const first = await runWatchIteration( + [assistant('Should I deploy to production?')], + target, + initialWatchState(), + deps, + { dryRun: false } + ); + const second = await runWatchIteration( + [assistant('Should I publish the release?')], + target, + first.state, + deps, + { dryRun: false } + ); + expect(second.result.acted).toBe(true); + expect(records).toHaveLength(2); + }); +}); diff --git a/src/__tests__/shared/pianola/storage.test.ts b/src/__tests__/shared/pianola/storage.test.ts new file mode 100644 index 0000000000..6b837c1e21 --- /dev/null +++ b/src/__tests__/shared/pianola/storage.test.ts @@ -0,0 +1,76 @@ +/** + * @file storage.test.ts + * @description Tests for the pure Pianola rule validator. + */ + +import { describe, it, expect } from 'vitest'; +import { validatePianolaRule, validatePianolaRules } from '../../../shared/pianola/storage'; + +function validRaw(overrides: Record = {}): Record { + return { + id: 'r1', + enabled: true, + scope: 'global', + match: { maxRisk: 'low', kinds: ['question'], topicIncludes: ['tabs'] }, + action: 'auto_answer', + answer: 'Use tabs.', + priority: 100, + createdAt: 1, + updatedAt: 2, + ...overrides, + }; +} + +describe('validatePianolaRule', () => { + it('accepts a well-formed rule', () => { + const rule = validatePianolaRule(validRaw()); + expect(rule).not.toBeNull(); + expect(rule?.id).toBe('r1'); + expect(rule?.match.kinds).toEqual(['question']); + }); + + it('accepts a minimal rule with an empty match', () => { + const rule = validatePianolaRule( + validRaw({ match: undefined, action: 'escalate', answer: undefined }) + ); + expect(rule?.match).toEqual({}); + }); + + it.each([ + ['missing id', { id: undefined }], + ['empty id', { id: '' }], + ['non-boolean enabled', { enabled: 'yes' }], + ['bad scope', { scope: 'planet' }], + ['bad action', { action: 'nuke' }], + ['non-numeric priority', { priority: 'high' }], + ['missing timestamps', { createdAt: undefined }], + ['bad maxRisk', { match: { maxRisk: 'extreme' } }], + ['bad kinds', { match: { kinds: ['banana'] } }], + ['non-string topicIncludes', { match: { topicIncludes: [1, 2] } }], + ['non-string scopeId', { scopeId: 42 }], + ])('rejects %s', (_label, overrides) => { + expect(validatePianolaRule(validRaw(overrides))).toBeNull(); + }); + + it('rejects non-object input', () => { + expect(validatePianolaRule(null)).toBeNull(); + expect(validatePianolaRule('rule')).toBeNull(); + expect(validatePianolaRule([])).toBeNull(); + }); +}); + +describe('validatePianolaRules', () => { + it('keeps valid rules and drops invalid ones', () => { + const rules = validatePianolaRules([ + validRaw({ id: 'a' }), + { junk: true }, + validRaw({ id: 'b' }), + ]); + expect(rules.map((r) => r.id)).toEqual(['a', 'b']); + }); + + it('returns an empty array for non-array input', () => { + expect(validatePianolaRules({})).toEqual([]); + expect(validatePianolaRules(undefined)).toEqual([]); + }); +}); diff --git a/src/cli/commands/pianola.ts b/src/cli/commands/pianola.ts new file mode 100644 index 0000000000..163fd353e9 --- /dev/null +++ b/src/cli/commands/pianola.ts @@ -0,0 +1,220 @@ +/** + * Pianola CLI commands. + * + * `pianola watch ` polls a desktop tab's transcript, and when the agent + * is awaiting the user it classifies the prompt, applies the user's rules, and + * either auto-answers (low-risk, explicit rule) or records an escalation. Every + * command hard-gates on the `pianola` Encore flag, so a headless CLI cannot run + * autonomous behavior on an install that has not opted in. + * + * The decision logic lives in the pure, tested watcher (shared/pianola); this + * file is the I/O shell: WebSocket polling, dispatch, and console output. + */ + +import { readSettingValue } from '../services/storage'; +import { + readPianolaRules, + appendPianolaDecision, + readPianolaDecisions, +} from '../services/pianola-store'; +import { MaestroClient } from '../services/maestro-client'; +import { runDispatch } from './dispatch'; +import { generateUUID } from '../../shared/uuid'; +import { + runWatchIteration, + initialWatchState, + type WatchDeps, + type WatchState, + type WatchTarget, +} from '../../shared/pianola/pianola-watcher'; +import type { PianolaMessage } from '../../shared/pianola/types'; + +const DEFAULT_INTERVAL_SECONDS = 5; +const POLL_TAIL = 40; + +export interface PianolaWatchOptions { + agent?: string; + interval?: string; + dryRun?: boolean; + once?: boolean; + json?: boolean; +} + +export interface PianolaListOptions { + json?: boolean; +} + +export interface PianolaLogOptions { + limit?: string; + json?: boolean; +} + +interface SessionHistoryResponse { + success?: boolean; + error?: string; + code?: string; + tabId?: string; + sessionId?: string; + agentId?: string; + agentSessionId?: string | null; + messages?: PianolaMessage[]; +} + +/** Exit with a clear message if the Pianola Encore feature is disabled. */ +function ensurePianolaEnabled(json?: boolean): void { + const flags = readSettingValue('encoreFeatures') as Record | undefined; + if (flags?.pianola === true) return; + const message = 'Pianola is not enabled. Enable it with: maestro-cli encore set pianola on'; + if (json) { + console.log(JSON.stringify({ success: false, error: message, code: 'PIANOLA_DISABLED' })); + } else { + console.error(message); + } + process.exit(1); +} + +/** Parse `--interval` as seconds ("5" or "5s"); defaults to 5, minimum 1. */ +function parseIntervalSeconds(raw?: string): number { + if (!raw) return DEFAULT_INTERVAL_SECONDS; + const match = raw.trim().match(/^(\d+)s?$/i); + if (!match) return DEFAULT_INTERVAL_SECONDS; + return Math.max(1, parseInt(match[1], 10)); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** Watch a tab and act on awaiting-input prompts per the configured rules. */ +export async function pianolaWatch(tabId: string, options: PianolaWatchOptions): Promise { + ensurePianolaEnabled(options.json); + + const intervalMs = parseIntervalSeconds(options.interval) * 1000; + const dryRun = !!options.dryRun; + const once = !!options.once; + + const deps: WatchDeps = { + readRules: readPianolaRules, + dispatch: async (target, answer) => { + const res = await runDispatch(target.agentId, answer, { tab: target.tabId }); + return { success: !!res.success, error: res.error }; + }, + recordDecision: appendPianolaDecision, + now: () => new Date().toISOString(), + genId: () => generateUUID(), + log: (line) => console.log(line), + }; + + let state: WatchState = initialWatchState(); + let stopped = false; + const onSignal = (): void => { + stopped = true; + }; + process.on('SIGINT', onSignal); + + const client = new MaestroClient(); + try { + await client.connect(); + } catch (error) { + process.off('SIGINT', onSignal); + const message = error instanceof Error ? error.message : String(error); + console.error(`[pianola] could not connect to Maestro: ${message}`); + process.exit(1); + return; + } + + if (!once) { + console.log( + `[pianola] watching tab ${tabId} every ${intervalMs / 1000}s${dryRun ? ' (dry-run)' : ''}. Ctrl+C to stop.` + ); + } + + try { + for (;;) { + let resp: SessionHistoryResponse; + try { + resp = await client.sendCommand( + { type: 'get_session_history', tabId, tail: POLL_TAIL }, + 'session_history_result' + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`[pianola] poll failed: ${message}`); + if (once || stopped) break; + await sleep(intervalMs); + continue; + } + + if (!resp.success) { + console.error(`[pianola] ${resp.error ?? 'session history unavailable'}`); + if (once || stopped) break; + await sleep(intervalMs); + continue; + } + + const agentId = options.agent ?? resp.agentId ?? ''; + if (!agentId) { + console.error('[pianola] could not resolve an agent id for this tab; pass --agent'); + break; + } + + const target: WatchTarget = { tabId, agentId }; + const messages = resp.messages ?? []; + const iteration = await runWatchIteration(messages, target, state, deps, { dryRun }); + state = iteration.state; + + if (once || stopped) break; + await sleep(intervalMs); + } + } finally { + process.off('SIGINT', onSignal); + client.disconnect(); + } +} + +/** List the configured Pianola rules (read-only from the CLI). */ +export function pianolaRules(options: PianolaListOptions): void { + ensurePianolaEnabled(options.json); + const rules = readPianolaRules(); + if (options.json) { + console.log(JSON.stringify({ rules })); + return; + } + if (rules.length === 0) { + console.log('No Pianola rules defined.'); + return; + } + console.log('Pianola rules:'); + for (const rule of rules) { + const scope = rule.scope === 'global' ? 'global' : `${rule.scope}:${rule.scopeId ?? '?'}`; + const label = rule.description ?? rule.id; + console.log( + ` ${rule.enabled ? 'on ' : 'off'} [p${rule.priority}] ${scope} ${rule.action} ${label}` + ); + } +} + +/** Show recent decisions from the audit log. */ +export function pianolaLog(options: PianolaLogOptions): void { + ensurePianolaEnabled(options.json); + const limit = options.limit ? Math.max(1, parseInt(options.limit, 10) || 20) : 20; + const records = readPianolaDecisions(limit); + if (options.json) { + console.log(JSON.stringify({ decisions: records })); + return; + } + if (records.length === 0) { + console.log('No Pianola decisions recorded yet.'); + return; + } + console.log(`Last ${records.length} Pianola decision(s):`); + for (const rec of records) { + const flags = [rec.dispatched ? 'sent' : '', rec.dryRun ? 'dry-run' : ''] + .filter(Boolean) + .join(','); + const suffix = flags ? ` (${flags})` : ''; + console.log( + ` ${rec.timestamp} ${rec.classification.kind}/${rec.classification.risk} -> ${rec.decision.action}${suffix} ${rec.classification.topic}` + ); + } +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 2175a720c9..45faa0e91a 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -71,6 +71,7 @@ import { focusAgent, switchMode } from './commands/agent-control'; import { tabNew, tabClose, tabRename, tabStar } from './commands/tab'; import { setTheme } from './commands/set-theme'; import { encoreList, encoreSet } from './commands/encore'; +import { pianolaWatch, pianolaRules, pianolaLog } from './commands/pianola'; // Injected at build time by scripts/build-cli.mjs via esbuild `define`. // The typeof guard keeps non-esbuild execution paths (ts-node, plain tsc output) from @@ -793,7 +794,9 @@ encore encore .command('enable ') - .description('Enable an Encore feature (directorNotes, usageStats, symphony, maestroCue)') + .description( + 'Enable an Encore feature (directorNotes, usageStats, symphony, maestroCue, pianola)' + ) .option('--json', 'Output as JSON (for scripting)') .action((feature, options) => encoreSet(feature, true, options)); @@ -803,6 +806,34 @@ encore .option('--json', 'Output as JSON (for scripting)') .action((feature, options) => encoreSet(feature, false, options)); +// Pianola - the autonomous manager agent (Encore-gated, off by default). +const pianola = program + .command('pianola') + .description('Pianola manager agent: watch tabs, auto-answer or escalate per your rules'); + +pianola + .command('watch ') + .description('Watch a desktop tab and act on awaiting-input prompts per your rules') + .option('--agent ', 'Agent id to dispatch answers to (defaults to the tab owner)') + .option('--interval ', 'Polling interval in seconds (default 5)') + .option('--dry-run', 'Classify and record decisions but never send a message') + .option('--once', 'Run a single iteration instead of looping') + .option('--json', 'Reserved for scripting; affects the disabled-feature error only') + .action((tabId, options) => pianolaWatch(tabId, options)); + +pianola + .command('rules') + .description('List the configured Pianola rules') + .option('--json', 'Output as JSON (for scripting)') + .action((options) => pianolaRules(options)); + +pianola + .command('log') + .description('Show recent Pianola decisions from the audit log') + .option('--limit ', 'Maximum number of records to show (default 20)') + .option('--json', 'Output as JSON (for scripting)') + .action((options) => pianolaLog(options)); + // Prompts command — read Maestro's bundled or user-customized system prompts. // Designed for agent self-fetch: parent prompts reference includes via `{{REF:_name}}` // and the agent retrieves the full content on demand with `prompts get _name`. diff --git a/src/cli/services/pianola-store.ts b/src/cli/services/pianola-store.ts new file mode 100644 index 0000000000..ad5138cb0d --- /dev/null +++ b/src/cli/services/pianola-store.ts @@ -0,0 +1,91 @@ +/** + * Pianola CLI storage. + * + * Reads the editable rules file and appends to the decision audit log, both in + * the Maestro config dir (the same directory the CLI reads settings from, so the + * desktop app and CLI share them). Rules are read-only from the CLI; the desktop + * UI owns editing. The audit log is JSON Lines: append-only, human-readable, and + * writable from a plain Node process with no native dependencies. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import { getConfigDirectory } from './storage'; +import { + PIANOLA_RULES_FILENAME, + PIANOLA_DECISIONS_FILENAME, + validatePianolaRules, + type PianolaDecisionRecord, +} from '../../shared/pianola/storage'; +import type { PianolaRule } from '../../shared/pianola/types'; + +function rulesPath(): string { + return path.join(getConfigDirectory(), PIANOLA_RULES_FILENAME); +} + +function decisionsPath(): string { + return path.join(getConfigDirectory(), PIANOLA_DECISIONS_FILENAME); +} + +/** + * Read and validate the rules file. Returns an empty list when the file is + * missing or malformed, and drops any individual invalid rule, so a bad + * hand-edit cannot break the watcher. + */ +export function readPianolaRules(): PianolaRule[] { + let content: string; + try { + content = fs.readFileSync(rulesPath(), 'utf-8'); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; + throw error; + } + let parsed: unknown; + try { + parsed = JSON.parse(content); + } catch { + return []; + } + // Accept either a bare array or an electron-store style { rules: [...] }. + const raw = Array.isArray(parsed) + ? parsed + : ((parsed as { rules?: unknown } | null)?.rules ?? []); + return validatePianolaRules(raw); +} + +/** Append one decision record to the audit log as a JSON line. */ +export function appendPianolaDecision(record: PianolaDecisionRecord): void { + const dir = getConfigDirectory(); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.appendFileSync(decisionsPath(), `${JSON.stringify(record)}\n`, 'utf-8'); +} + +/** + * Read recent decision records (most recent last). Malformed lines are skipped. + * When `limit` is given, only the last `limit` records are returned. + */ +export function readPianolaDecisions(limit?: number): PianolaDecisionRecord[] { + let content: string; + try { + content = fs.readFileSync(decisionsPath(), 'utf-8'); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; + throw error; + } + const records: PianolaDecisionRecord[] = []; + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + records.push(JSON.parse(trimmed) as PianolaDecisionRecord); + } catch { + // Skip a corrupt line rather than failing the whole read. + } + } + if (limit !== undefined && limit >= 0 && records.length > limit) { + return records.slice(records.length - limit); + } + return records; +} diff --git a/src/main/pianola/pianola-awaiting-detector.ts b/src/shared/pianola/pianola-awaiting-detector.ts similarity index 98% rename from src/main/pianola/pianola-awaiting-detector.ts rename to src/shared/pianola/pianola-awaiting-detector.ts index c2bfa1eb44..6e986bee5b 100644 --- a/src/main/pianola/pianola-awaiting-detector.ts +++ b/src/shared/pianola/pianola-awaiting-detector.ts @@ -16,7 +16,7 @@ * Precedence: plan_review > permission > choice > question. */ -import type { AwaitingInputSignal, PianolaMessage } from '../../shared/pianola/types'; +import type { AwaitingInputSignal, PianolaMessage } from './types'; /** Plan-mode review: agent is presenting a plan and asking to proceed. */ const PLAN_REVIEW_RE = diff --git a/src/main/pianola/pianola-classifier.ts b/src/shared/pianola/pianola-classifier.ts similarity index 99% rename from src/main/pianola/pianola-classifier.ts rename to src/shared/pianola/pianola-classifier.ts index f0ce1750b4..396336cfd4 100644 --- a/src/main/pianola/pianola-classifier.ts +++ b/src/shared/pianola/pianola-classifier.ts @@ -16,7 +16,7 @@ import type { PianolaClassification, PianolaMessage, PianolaSignalKind, -} from '../../shared/pianola/types'; +} from './types'; import { maxRisk, rateRisk } from './pianola-risk'; // Re-exported for convenience so callers can pull risk helpers from the classifier. diff --git a/src/main/pianola/pianola-policy.ts b/src/shared/pianola/pianola-policy.ts similarity index 97% rename from src/main/pianola/pianola-policy.ts rename to src/shared/pianola/pianola-policy.ts index c14c781b52..c27fdeecd5 100644 --- a/src/main/pianola/pianola-policy.ts +++ b/src/shared/pianola/pianola-policy.ts @@ -13,11 +13,7 @@ * silence a high-risk prompt. No I/O, no Electron, no app state. */ -import type { - PianolaClassification, - PianolaDecision, - PianolaRule, -} from '../../shared/pianola/types'; +import type { PianolaClassification, PianolaDecision, PianolaRule } from './types'; import { riskAtMost } from './pianola-risk'; /** Context needed to scope-filter rules to the current tab/project. */ diff --git a/src/main/pianola/pianola-risk.ts b/src/shared/pianola/pianola-risk.ts similarity index 98% rename from src/main/pianola/pianola-risk.ts rename to src/shared/pianola/pianola-risk.ts index c3a9f1f968..b122ac9712 100644 --- a/src/main/pianola/pianola-risk.ts +++ b/src/shared/pianola/pianola-risk.ts @@ -15,7 +15,7 @@ * something dangerous (unsafe). */ -import type { PianolaRisk } from '../../shared/pianola/types'; +import type { PianolaRisk } from './types'; const RISK_ORDER: Record = { low: 0, medium: 1, high: 2 }; diff --git a/src/shared/pianola/pianola-watcher.ts b/src/shared/pianola/pianola-watcher.ts new file mode 100644 index 0000000000..73b606b8a5 --- /dev/null +++ b/src/shared/pianola/pianola-watcher.ts @@ -0,0 +1,141 @@ +/** + * Pianola watcher - one iteration of the watch loop, with all I/O injected. + * + * This is the orchestration that ties the brain together: enrich a transcript + * with structured awaiting-input signals, classify it, decide via the rules, + * and (for an auto-answer that isn't a dry run) dispatch. It records every + * actionable decision. All side effects come in through `WatchDeps`, so the loop + * is unit-testable without a desktop app, network, or filesystem, and the same + * logic can back both the CLI watcher and a future in-app engine. + */ + +import type { PianolaClassification, PianolaDecision, PianolaMessage, PianolaRule } from './types'; +import type { PianolaDecisionRecord } from './storage'; +import { classifyMessages } from './pianola-classifier'; +import { enrichWithAwaitingInput } from './pianola-awaiting-detector'; +import { decide } from './pianola-policy'; + +/** The tab Pianola is watching and the agent it would dispatch to. */ +export interface WatchTarget { + tabId: string; + agentId: string; + projectPath?: string; +} + +/** Injected side effects. */ +export interface WatchDeps { + readRules: () => PianolaRule[]; + /** Send an auto-answer to the target tab. */ + dispatch: (target: WatchTarget, answer: string) => Promise<{ success: boolean; error?: string }>; + recordDecision: (record: PianolaDecisionRecord) => void; + /** ISO-8601 timestamp source (injected for determinism in tests). */ + now: () => string; + /** Unique id source for audit records. */ + genId: () => string; + /** Human-readable progress line. */ + log: (line: string) => void; +} + +/** Per-tab loop state, carried between iterations. */ +export interface WatchState { + /** Id of the last assistant message we already acted on (dedup guard). */ + lastHandledMessageId: string | null; +} + +export function initialWatchState(): WatchState { + return { lastHandledMessageId: null }; +} + +export interface IterationResult { + classification: PianolaClassification; + /** The decision taken, or null when nothing actionable / already handled. */ + decision: PianolaDecision | null; + record: PianolaDecisionRecord | null; + /** True when this iteration produced a new decision. */ + acted: boolean; + /** Reason an actionable prompt was skipped (e.g. already handled). */ + skipped?: string; +} + +function describe(result: IterationResult): string { + const { classification: c, decision } = result; + if (!decision) { + return result.skipped + ? `[pianola] skip (${result.skipped})` + : `[pianola] none (${c.evidence.reason})`; + } + const detail = c.topic ? `: ${c.topic}` : ''; + return `[pianola] ${c.kind}/${c.risk} -> ${decision.action}${detail}`; +} + +/** + * Run one watch iteration over the latest transcript for a tab. Returns the next + * state and a structured result. Pure aside from the injected deps; never throws + * for an expected dispatch failure (it is recorded on the audit entry instead). + */ +export async function runWatchIteration( + messages: readonly PianolaMessage[], + target: WatchTarget, + state: WatchState, + deps: WatchDeps, + options: { dryRun: boolean } +): Promise<{ state: WatchState; result: IterationResult }> { + const enriched = enrichWithAwaitingInput(messages); + const classification = classifyMessages(enriched); + + if (classification.kind === 'none') { + const result: IterationResult = { classification, decision: null, record: null, acted: false }; + deps.log(describe(result)); + return { state, result }; + } + + const messageId = classification.evidence.messageId; + if (messageId && messageId === state.lastHandledMessageId) { + const result: IterationResult = { + classification, + decision: null, + record: null, + acted: false, + skipped: 'already handled this prompt', + }; + deps.log(describe(result)); + return { state, result }; + } + + const rules = deps.readRules(); + const decision = decide(classification, rules, { + projectPath: target.projectPath, + tabId: target.tabId, + }); + + let dispatched = false; + let error: string | undefined; + if (decision.action === 'auto_answer' && !options.dryRun) { + const res = await deps.dispatch(target, decision.answer); + dispatched = res.success; + if (!res.success) error = res.error ?? 'dispatch failed'; + } + + const record: PianolaDecisionRecord = { + id: deps.genId(), + timestamp: deps.now(), + tabId: target.tabId, + agentId: target.agentId, + projectPath: target.projectPath, + classification, + decision, + dispatched, + dryRun: options.dryRun, + ...(error ? { error } : {}), + }; + deps.recordDecision(record); + + const result: IterationResult = { classification, decision, record, acted: true }; + deps.log(describe(result) + (error ? ` (dispatch error: ${error})` : '')); + + // Advance the dedup cursor so we don't re-handle the same prompt next poll. + const nextState: WatchState = { + lastHandledMessageId: messageId ?? state.lastHandledMessageId, + }; + return { state: nextState, result }; +} diff --git a/src/shared/pianola/storage.ts b/src/shared/pianola/storage.ts new file mode 100644 index 0000000000..dc9478b55e --- /dev/null +++ b/src/shared/pianola/storage.ts @@ -0,0 +1,132 @@ +/** + * Pianola storage contract - shared constants, the audit record type, and a + * pure validator. The fs/electron specifics live in the CLI store + * (src/cli/services/pianola-store.ts) and, later, a desktop store; both import + * these so the filenames, record shape, and validation stay in one place. + */ + +import type { + PianolaClassification, + PianolaDecision, + PianolaRule, + PianolaRuleScope, + PianolaSignalKind, + PianolaActionKind, + PianolaRisk, +} from './types'; + +/** Editable rules file (JSON array of PianolaRule), in the Maestro config dir. */ +export const PIANOLA_RULES_FILENAME = 'maestro-pianola-rules.json'; + +/** Append-only decision audit log (JSON Lines), in the Maestro config dir. */ +export const PIANOLA_DECISIONS_FILENAME = 'pianola-decisions.jsonl'; + +/** One recorded decision in the audit log. */ +export interface PianolaDecisionRecord { + id: string; + /** ISO-8601 timestamp of when the decision was made. */ + timestamp: string; + tabId: string; + agentId: string; + projectPath?: string; + classification: PianolaClassification; + decision: PianolaDecision; + /** True if Pianola actually sent a message (auto-answer that was dispatched). */ + dispatched: boolean; + /** True if running in dry-run mode (never dispatches). */ + dryRun: boolean; + /** Populated when a dispatch attempt failed. */ + error?: string; +} + +const RULE_SCOPES: readonly PianolaRuleScope[] = ['global', 'project', 'tab']; +const ACTION_KINDS: readonly PianolaActionKind[] = ['auto_answer', 'escalate', 'ignore']; +const RISKS: readonly PianolaRisk[] = ['low', 'medium', 'high']; +const SIGNAL_KINDS: readonly PianolaSignalKind[] = ['question', 'blocked', 'none']; + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function isStringArray(value: unknown): value is string[] { + return Array.isArray(value) && value.every((v) => typeof v === 'string'); +} + +function validateMatch(raw: unknown): PianolaRule['match'] | null { + if (raw === undefined) return {}; + if (!isRecord(raw)) return null; + const match: PianolaRule['match'] = {}; + + if (raw.maxRisk !== undefined) { + if (!RISKS.includes(raw.maxRisk as PianolaRisk)) return null; + match.maxRisk = raw.maxRisk as PianolaRisk; + } + if (raw.kinds !== undefined) { + if ( + !isStringArray(raw.kinds) || + !raw.kinds.every((k) => SIGNAL_KINDS.includes(k as PianolaSignalKind)) + ) + return null; + match.kinds = raw.kinds as PianolaSignalKind[]; + } + if (raw.topicIncludes !== undefined) { + if (!isStringArray(raw.topicIncludes)) return null; + match.topicIncludes = raw.topicIncludes; + } + + return match; +} + +/** + * Validate one untrusted rule object. Returns a typed PianolaRule, or null if + * the shape is invalid (callers drop invalid rules rather than throwing, so one + * bad hand-edited rule cannot break the whole engine). + */ +export function validatePianolaRule(raw: unknown): PianolaRule | null { + if (!isRecord(raw)) return null; + + if (typeof raw.id !== 'string' || raw.id.length === 0) return null; + if (typeof raw.enabled !== 'boolean') return null; + if (!RULE_SCOPES.includes(raw.scope as PianolaRuleScope)) return null; + if (!ACTION_KINDS.includes(raw.action as PianolaActionKind)) return null; + if (typeof raw.priority !== 'number' || !Number.isFinite(raw.priority)) return null; + if (typeof raw.createdAt !== 'number' || typeof raw.updatedAt !== 'number') return null; + + const match = validateMatch(raw.match); + if (match === null) return null; + + if (raw.scopeId !== undefined && typeof raw.scopeId !== 'string') return null; + if (raw.answer !== undefined && typeof raw.answer !== 'string') return null; + if (raw.description !== undefined && typeof raw.description !== 'string') return null; + + const rule: PianolaRule = { + id: raw.id, + enabled: raw.enabled, + scope: raw.scope as PianolaRuleScope, + match, + action: raw.action as PianolaActionKind, + priority: raw.priority, + createdAt: raw.createdAt, + updatedAt: raw.updatedAt, + }; + if (typeof raw.scopeId === 'string') rule.scopeId = raw.scopeId; + if (typeof raw.answer === 'string') rule.answer = raw.answer; + if (typeof raw.description === 'string') rule.description = raw.description; + + return rule; +} + +/** Validate an untrusted rules payload, dropping any malformed entries. */ +export function validatePianolaRules(raw: unknown): PianolaRule[] { + if (!Array.isArray(raw)) return []; + const rules: PianolaRule[] = []; + for (const item of raw) { + const rule = validatePianolaRule(item); + if (rule) rules.push(rule); + } + return rules; +} + +// Re-exported so storage consumers get the decision/classification types from +// one import site. +export type { PianolaClassification, PianolaDecision, PianolaRule }; diff --git a/src/shared/pianola/types.ts b/src/shared/pianola/types.ts index a4ad1a2d3f..56a3ba1bc0 100644 --- a/src/shared/pianola/types.ts +++ b/src/shared/pianola/types.ts @@ -7,7 +7,7 @@ * rules or escalates. These contracts are shared across main, renderer, and CLI. * * The classifier and policy engine that consume these types are pure functions - * (see src/main/pianola/pianola-classifier.ts and pianola-policy.ts). + * (see src/shared/pianola/pianola-classifier.ts and pianola-policy.ts). */ /** Risk of acting on a detected prompt, ordered low < medium < high. */ From e92235c46b6194913e1df5342fd708b3c252fec9 Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Thu, 25 Jun 2026 00:38:13 -0500 Subject: [PATCH 05/76] fix(pianola): harden watcher safety and detection per codex review Addresses the runnable-MVP review findings: - HIGH audit-before-dispatch: the watcher records the decision BEFORE sending a message (intent record), then appends the dispatch outcome under the same id. A pre-dispatch audit-write failure now fails closed (no message sent). - HIGH bounded retry: a failed dispatch no longer advances the dedup cursor, so the prompt is retried up to MAX_DISPATCH_ATTEMPTS, then Pianola gives up instead of either looping forever or skipping it forever. - HIGH injection hardening: option extraction is strict (clean short tokens; no file paths, markdown links, version numbers) and a `choice` requires real question/choice context, so prose cannot be shaped into an auto-answerable choice. The classifier reuses the same option/asking helpers (one source). - MEDIUM decision records are validated and folded by id on read, so a malformed JSONL line cannot crash `pianola log`. - MEDIUM projectPath is threaded through the get_session_history result so project-scoped rules work in the CLI watcher. - MEDIUM the watch loop now contains iteration errors (logs and continues). - LOW a malformed rules file surfaces a one-line warning instead of silently disabling all rules. - Added watcher safety tests (audit ordering, retry/give-up, fail-closed), detector false-positive fixtures, store folding/validation tests, and CLI watch tests (mocked client + dispatch). Typecheck clean across all configs; 123 Pianola/encore tests pass. --- src/__tests__/cli/commands/pianola.test.ts | 121 ++++++++++++- .../cli/services/pianola-store.test.ts | 17 ++ .../pianola/pianola-awaiting-detector.test.ts | 26 +++ .../shared/pianola/pianola-watcher.test.ts | 146 ++++++++++----- src/cli/commands/pianola.ts | 19 +- src/cli/services/pianola-store.ts | 43 +++-- .../web-server/handlers/messageHandlers.ts | 1 + src/main/web-server/types.ts | 2 + src/main/web-server/web-server-factory.ts | 1 + .../pianola/pianola-awaiting-detector.ts | 54 +++++- src/shared/pianola/pianola-classifier.ts | 17 +- src/shared/pianola/pianola-watcher.ts | 166 ++++++++++++++---- src/shared/pianola/storage.ts | 39 ++++ 13 files changed, 539 insertions(+), 113 deletions(-) diff --git a/src/__tests__/cli/commands/pianola.test.ts b/src/__tests__/cli/commands/pianola.test.ts index 34929114ef..4a86cd86ab 100644 --- a/src/__tests__/cli/commands/pianola.test.ts +++ b/src/__tests__/cli/commands/pianola.test.ts @@ -1,20 +1,73 @@ /** * @file pianola.test.ts - * @description Tests for the Pianola CLI command gating and read commands. + * @description Tests for the Pianola CLI commands: Encore gating, read views, + * and the watch loop (with the WebSocket client and dispatch mocked). */ import { describe, it, expect, vi, beforeEach, type MockInstance } from 'vitest'; +import type { PianolaRule } from '../../../shared/pianola/types'; + +const { connectMock, sendCommandMock, disconnectMock, runDispatchMock } = vi.hoisted(() => ({ + connectMock: vi.fn(), + sendCommandMock: vi.fn(), + disconnectMock: vi.fn(), + runDispatchMock: vi.fn(), +})); vi.mock('../../../cli/services/storage', () => ({ readSettingValue: vi.fn() })); vi.mock('../../../cli/services/pianola-store', () => ({ readPianolaRules: vi.fn(() => []), + readPianolaRulesResult: vi.fn(() => ({ rules: [], malformed: false })), appendPianolaDecision: vi.fn(), readPianolaDecisions: vi.fn(() => []), })); +vi.mock('../../../cli/services/maestro-client', () => ({ + MaestroClient: class { + connect = connectMock; + sendCommand = sendCommandMock; + disconnect = disconnectMock; + }, +})); +vi.mock('../../../cli/commands/dispatch', () => ({ runDispatch: runDispatchMock })); -import { pianolaRules, pianolaLog } from '../../../cli/commands/pianola'; +import { pianolaRules, pianolaLog, pianolaWatch } from '../../../cli/commands/pianola'; import { readSettingValue } from '../../../cli/services/storage'; -import { readPianolaRules, readPianolaDecisions } from '../../../cli/services/pianola-store'; +import { + readPianolaRules, + readPianolaDecisions, + appendPianolaDecision, +} from '../../../cli/services/pianola-store'; + +function autoAnswerRule(): PianolaRule { + return { + id: 'rule-1', + enabled: true, + scope: 'global', + match: { maxRisk: 'low', kinds: ['question'] }, + action: 'auto_answer', + answer: 'Use tabs.', + priority: 1, + createdAt: 1, + updatedAt: 1, + }; +} + +function questionResponse(over: Record = {}): Record { + return { + success: true, + agentId: 'a1', + messages: [ + { + id: 'm1', + role: 'assistant', + source: 'ai', + content: 'Should I name it count or total?', + timestamp: '2026-01-01T00:00:00.000Z', + }, + ], + ...over, + }; +} describe('pianola command gating', () => { let consoleSpy: MockInstance; @@ -59,3 +112,65 @@ describe('pianola command gating', () => { expect(consoleSpy).toHaveBeenCalledWith('No Pianola decisions recorded yet.'); }); }); + +describe('pianola watch', () => { + let exitSpy: MockInstance; + + beforeEach(() => { + vi.clearAllMocks(); + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('__exit__'); + }); + connectMock.mockResolvedValue(undefined); + disconnectMock.mockReturnValue(undefined); + vi.mocked(readSettingValue).mockReturnValue({ pianola: true }); + vi.mocked(readPianolaRules).mockReturnValue([]); + }); + + it('refuses to run when the Encore flag is off, before connecting', async () => { + vi.mocked(readSettingValue).mockReturnValue({ pianola: false }); + await expect(pianolaWatch('tab-1', { once: true })).rejects.toThrow('__exit__'); + expect(connectMock).not.toHaveBeenCalled(); + }); + + it('escalates a question with no matching rule and records it, without dispatching', async () => { + sendCommandMock.mockResolvedValue(questionResponse()); + await pianolaWatch('tab-1', { once: true }); + expect(appendPianolaDecision).toHaveBeenCalledTimes(1); + expect(runDispatchMock).not.toHaveBeenCalled(); + expect(disconnectMock).toHaveBeenCalled(); + }); + + it('auto-answers via runDispatch when a rule matches', async () => { + vi.mocked(readPianolaRules).mockReturnValue([autoAnswerRule()]); + sendCommandMock.mockResolvedValue(questionResponse()); + runDispatchMock.mockResolvedValue({ success: true }); + await pianolaWatch('tab-1', { once: true }); + expect(runDispatchMock).toHaveBeenCalledWith('a1', 'Use tabs.', { tab: 'tab-1' }); + // Intent + outcome records. + expect(appendPianolaDecision).toHaveBeenCalledTimes(2); + }); + + it('uses the --agent override as the dispatch target', async () => { + vi.mocked(readPianolaRules).mockReturnValue([autoAnswerRule()]); + sendCommandMock.mockResolvedValue(questionResponse({ agentId: 'a1' })); + runDispatchMock.mockResolvedValue({ success: true }); + await pianolaWatch('tab-1', { once: true, agent: 'a2' }); + expect(runDispatchMock).toHaveBeenCalledWith('a2', 'Use tabs.', { tab: 'tab-1' }); + }); + + it('logs and exits the single run on a poll failure', async () => { + sendCommandMock.mockRejectedValue(new Error('boom')); + await pianolaWatch('tab-1', { once: true }); + expect(appendPianolaDecision).not.toHaveBeenCalled(); + expect(disconnectMock).toHaveBeenCalled(); + }); + + it('exits when the connection cannot be established', async () => { + connectMock.mockRejectedValue(new Error('no server')); + await expect(pianolaWatch('tab-1', { once: true })).rejects.toThrow('__exit__'); + expect(exitSpy).toHaveBeenCalledWith(1); + }); +}); diff --git a/src/__tests__/cli/services/pianola-store.test.ts b/src/__tests__/cli/services/pianola-store.test.ts index 2492304bf9..5a626014a5 100644 --- a/src/__tests__/cli/services/pianola-store.test.ts +++ b/src/__tests__/cli/services/pianola-store.test.ts @@ -149,4 +149,21 @@ describe('decision audit log', () => { appendPianolaDecision(decisionRecord('d2')); expect(readPianolaDecisions().map((r) => r.id)).toEqual(['d1', 'd2']); }); + + it('skips a schema-invalid JSON line', () => { + appendPianolaDecision(decisionRecord('d1')); + fs.appendFileSync(path.join(tmpDir, PIANOLA_DECISIONS_FILENAME), '{"foo":1}\n', 'utf-8'); + expect(readPianolaDecisions().map((r) => r.id)).toEqual(['d1']); + }); + + it('folds an intent and outcome record sharing an id (last wins, position kept)', () => { + const intent = { ...decisionRecord('same'), dispatched: false }; + const outcome = { ...decisionRecord('same'), dispatched: true }; + appendPianolaDecision(decisionRecord('first')); + appendPianolaDecision(intent); + appendPianolaDecision(outcome); + const records = readPianolaDecisions(); + expect(records.map((r) => r.id)).toEqual(['first', 'same']); + expect(records[1].dispatched).toBe(true); + }); }); diff --git a/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts b/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts index bd46c5eae6..a7e84827b7 100644 --- a/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts +++ b/src/__tests__/shared/pianola/pianola-awaiting-detector.test.ts @@ -70,6 +70,32 @@ describe('detectAwaitingInput - kinds', () => { }); }); +describe('detectAwaitingInput - false-positive hardening', () => { + it('does not treat a markdown link as options', () => { + expect(detectAwaitingInput('See the guide at [docs/api](https://x.example). Done.')).toBeNull(); + }); + + it('does not treat a bracketed file path as options', () => { + expect(detectAwaitingInput('I edited [src/foo.ts] as requested.')).toBeNull(); + }); + + it('does not treat a version number as a numbered choice', () => { + expect(detectAwaitingInput('Bumped the package to 1.2.3 today.')).toBeNull(); + }); + + it('does not treat a changelog-style numbered list without a question as a choice', () => { + expect( + detectAwaitingInput('Changes in this release: 1) fixed a bug 2) added a feature') + ).toBeNull(); + }); + + it('still treats a genuine bracketed choice with asking context as a choice', () => { + const s = detectAwaitingInput('Proceed? [approve/cancel]'); + expect(s?.kind).toBe('choice'); + expect(s?.options).toEqual(['approve', 'cancel']); + }); +}); + describe('enrichWithAwaitingInput', () => { it('fills awaitingInput on assistant messages without mutating inputs', () => { const input = [msg('user', 'go'), msg('assistant', 'Do you want me to deploy to production?')]; diff --git a/src/__tests__/shared/pianola/pianola-watcher.test.ts b/src/__tests__/shared/pianola/pianola-watcher.test.ts index 9330086e08..fcdfc27ef5 100644 --- a/src/__tests__/shared/pianola/pianola-watcher.test.ts +++ b/src/__tests__/shared/pianola/pianola-watcher.test.ts @@ -1,13 +1,16 @@ /** * @file pianola-watcher.test.ts - * @description Tests for the dependency-injected watch iteration. + * @description Tests for the dependency-injected watch iteration, including the + * audit-before-dispatch and bounded-retry safety invariants. */ import { describe, it, expect, vi } from 'vitest'; import { runWatchIteration, initialWatchState, + MAX_DISPATCH_ATTEMPTS, type WatchDeps, + type WatchState, type WatchTarget, } from '../../../shared/pianola/pianola-watcher'; import type { PianolaMessage, PianolaRule } from '../../../shared/pianola/types'; @@ -45,6 +48,7 @@ function makeDeps(over: Partial = {}): { dispatch: ReturnType; } { const records: PianolaDecisionRecord[] = []; + let idCounter = 0; const dispatch = vi.fn(async () => ({ success: true as boolean, error: undefined as string | undefined, @@ -54,7 +58,10 @@ function makeDeps(over: Partial = {}): { dispatch, recordDecision: (r) => records.push(r), now: () => '2026-01-01T00:00:00.000Z', - genId: () => 'decision-id', + genId: () => { + idCounter += 1; + return `decision-${idCounter}`; + }, log: () => {}, ...over, }; @@ -63,7 +70,7 @@ function makeDeps(over: Partial = {}): { const target: WatchTarget = { tabId: 'tab-1', agentId: 'agent-1' }; -describe('runWatchIteration', () => { +describe('runWatchIteration - basics', () => { it('does nothing for a non-actionable transcript', async () => { const { deps, records, dispatch } = makeDeps(); const { result } = await runWatchIteration( @@ -74,13 +81,12 @@ describe('runWatchIteration', () => { { dryRun: false } ); expect(result.acted).toBe(false); - expect(result.decision).toBeNull(); expect(records).toHaveLength(0); expect(dispatch).not.toHaveBeenCalled(); }); - it('auto-answers a low-risk question matched by a rule and records it dispatched', async () => { - const { deps, records, dispatch } = makeDeps({ readRules: () => [autoAnswerRule()] }); + it('escalates and records once when no rule matches', async () => { + const { deps, records, dispatch } = makeDeps(); const { result } = await runWatchIteration( [assistant('Should I name it count or total?')], target, @@ -88,13 +94,12 @@ describe('runWatchIteration', () => { deps, { dryRun: false } ); - expect(result.decision?.action).toBe('auto_answer'); - expect(dispatch).toHaveBeenCalledWith(target, 'Use tabs.'); - expect(records[0].dispatched).toBe(true); - expect(records[0].dryRun).toBe(false); + expect(result.decision?.action).toBe('escalate'); + expect(dispatch).not.toHaveBeenCalled(); + expect(records).toHaveLength(1); }); - it('does not dispatch in dry-run mode but still records the decision', async () => { + it('dry-run never dispatches but records the decision', async () => { const { deps, records, dispatch } = makeDeps({ readRules: () => [autoAnswerRule()] }); const { result } = await runWatchIteration( [assistant('Should I name it count or total?')], @@ -105,71 +110,128 @@ describe('runWatchIteration', () => { ); expect(result.decision?.action).toBe('auto_answer'); expect(dispatch).not.toHaveBeenCalled(); - expect(records[0].dispatched).toBe(false); + expect(records).toHaveLength(1); expect(records[0].dryRun).toBe(true); + expect(records[0].dispatched).toBe(false); }); +}); - it('escalates and records when no rule matches', async () => { - const { deps, records, dispatch } = makeDeps(); - const { result } = await runWatchIteration( +describe('runWatchIteration - auto-answer dispatch', () => { + it('writes an audit record before dispatching, then an outcome record', async () => { + const order: string[] = []; + const dispatch = vi.fn(async () => { + order.push('dispatch'); + return { success: true, error: undefined }; + }); + const { deps, records } = makeDeps({ + readRules: () => [autoAnswerRule()], + dispatch, + recordDecision: () => order.push('record'), + }); + await runWatchIteration( [assistant('Should I name it count or total?')], target, initialWatchState(), deps, { dryRun: false } ); - expect(result.decision?.action).toBe('escalate'); - expect(dispatch).not.toHaveBeenCalled(); - expect(records).toHaveLength(1); + // Audit (intent) must be persisted before the message is sent. + expect(order).toEqual(['record', 'dispatch', 'record']); + void records; + expect(dispatch).toHaveBeenCalledWith(target, 'Use tabs.'); }); - it('records a dispatch failure on the audit entry', async () => { - const dispatch = vi.fn(async () => ({ success: false, error: 'session busy' })); - const { deps, records } = makeDeps({ readRules: () => [autoAnswerRule()], dispatch }); - await runWatchIteration( + it('records intent and a dispatched outcome under one id', async () => { + const { deps, records } = makeDeps({ readRules: () => [autoAnswerRule()] }); + const { result } = await runWatchIteration( [assistant('Should I name it count or total?')], target, initialWatchState(), deps, { dryRun: false } ); - expect(records[0].dispatched).toBe(false); - expect(records[0].error).toBe('session busy'); + expect(records).toHaveLength(2); + expect(records[0].id).toBe(records[1].id); // same id, folded by readers + expect(records[0].dispatched).toBe(false); // intent + expect(records[1].dispatched).toBe(true); // outcome + expect(result.dispatched).toBe(true); + }); + + it('does not dispatch if the pre-dispatch audit write fails (fails closed)', async () => { + const dispatch = vi.fn(async () => ({ success: true, error: undefined })); + const deps = makeDeps({ + readRules: () => [autoAnswerRule()], + dispatch, + recordDecision: () => { + throw new Error('disk full'); + }, + }).deps; + await expect( + runWatchIteration( + [assistant('Should I name it count or total?')], + target, + initialWatchState(), + deps, + { + dryRun: false, + } + ) + ).rejects.toThrow('disk full'); + expect(dispatch).not.toHaveBeenCalled(); }); +}); - it('does not re-handle the same prompt on the next iteration', async () => { +describe('runWatchIteration - dedup and retry', () => { + it('does not re-handle the same prompt after a successful decision', async () => { const { deps, records } = makeDeps(); const messages = [assistant('Should I deploy to production?')]; - const first = await runWatchIteration(messages, target, initialWatchState(), deps, { dryRun: false, }); expect(first.result.acted).toBe(true); - expect(records).toHaveLength(1); - const second = await runWatchIteration(messages, target, first.state, deps, { dryRun: false }); expect(second.result.acted).toBe(false); expect(second.result.skipped).toContain('already handled'); - expect(records).toHaveLength(1); // no new record + expect(records).toHaveLength(1); }); - it('handles a fresh prompt after the previous one was handled', async () => { - const { deps, records } = makeDeps(); - const first = await runWatchIteration( - [assistant('Should I deploy to production?')], + it('retries a failed dispatch on subsequent polls, then gives up at the cap', async () => { + const dispatch = vi.fn(async () => ({ success: false, error: 'session busy' })); + const { deps } = makeDeps({ readRules: () => [autoAnswerRule()], dispatch }); + const messages = [assistant('Should I name it count or total?')]; + + let state: WatchState = initialWatchState(); + // First MAX-1 failures keep retrying (cursor not advanced). + for (let attempt = 1; attempt < MAX_DISPATCH_ATTEMPTS; attempt += 1) { + const out = await runWatchIteration(messages, target, state, deps, { dryRun: false }); + state = out.state; + expect(state.lastHandledMessageId).toBeNull(); + expect(state.pendingRetry?.attempts).toBe(attempt); + } + // The capping attempt gives up: cursor advances, retry cleared. + const final = await runWatchIteration(messages, target, state, deps, { dryRun: false }); + expect(final.state.pendingRetry).toBeNull(); + expect(final.state.lastHandledMessageId).toBe('m' + seq); + expect(dispatch).toHaveBeenCalledTimes(MAX_DISPATCH_ATTEMPTS); + + // After giving up, the same prompt is skipped. + const skipped = await runWatchIteration(messages, target, final.state, deps, { dryRun: false }); + expect(skipped.result.skipped).toContain('already handled'); + expect(dispatch).toHaveBeenCalledTimes(MAX_DISPATCH_ATTEMPTS); + }); + + it('records the dispatch error on the outcome entry', async () => { + const dispatch = vi.fn(async () => ({ success: false, error: 'session busy' })); + const { deps, records } = makeDeps({ readRules: () => [autoAnswerRule()], dispatch }); + await runWatchIteration( + [assistant('Should I name it count or total?')], target, initialWatchState(), deps, { dryRun: false } ); - const second = await runWatchIteration( - [assistant('Should I publish the release?')], - target, - first.state, - deps, - { dryRun: false } - ); - expect(second.result.acted).toBe(true); - expect(records).toHaveLength(2); + const outcome = records[records.length - 1]; + expect(outcome.dispatched).toBe(false); + expect(outcome.error).toBe('session busy'); }); }); diff --git a/src/cli/commands/pianola.ts b/src/cli/commands/pianola.ts index 163fd353e9..5a17dd2be8 100644 --- a/src/cli/commands/pianola.ts +++ b/src/cli/commands/pianola.ts @@ -14,6 +14,7 @@ import { readSettingValue } from '../services/storage'; import { readPianolaRules, + readPianolaRulesResult, appendPianolaDecision, readPianolaDecisions, } from '../services/pianola-store'; @@ -57,6 +58,7 @@ interface SessionHistoryResponse { sessionId?: string; agentId?: string; agentSessionId?: string | null; + projectPath?: string; messages?: PianolaMessage[]; } @@ -123,6 +125,10 @@ export async function pianolaWatch(tabId: string, options: PianolaWatchOptions): return; } + if (readPianolaRulesResult().malformed) { + console.error('[pianola] warning: rules file is invalid JSON; no rules will apply until fixed'); + } + if (!once) { console.log( `[pianola] watching tab ${tabId} every ${intervalMs / 1000}s${dryRun ? ' (dry-run)' : ''}. Ctrl+C to stop.` @@ -158,10 +164,17 @@ export async function pianolaWatch(tabId: string, options: PianolaWatchOptions): break; } - const target: WatchTarget = { tabId, agentId }; + const target: WatchTarget = { tabId, agentId, projectPath: resp.projectPath }; const messages = resp.messages ?? []; - const iteration = await runWatchIteration(messages, target, state, deps, { dryRun }); - state = iteration.state; + try { + const iteration = await runWatchIteration(messages, target, state, deps, { dryRun }); + state = iteration.state; + } catch (error) { + // Unexpected failure (e.g. an audit write failed before dispatch). + // Fail closed: log and keep watching rather than crashing the loop. + const message = error instanceof Error ? error.message : String(error); + console.error(`[pianola] iteration error: ${message}`); + } if (once || stopped) break; await sleep(intervalMs); diff --git a/src/cli/services/pianola-store.ts b/src/cli/services/pianola-store.ts index ad5138cb0d..c7f363c9eb 100644 --- a/src/cli/services/pianola-store.ts +++ b/src/cli/services/pianola-store.ts @@ -15,10 +15,18 @@ import { PIANOLA_RULES_FILENAME, PIANOLA_DECISIONS_FILENAME, validatePianolaRules, + validatePianolaDecisionRecord, type PianolaDecisionRecord, } from '../../shared/pianola/storage'; import type { PianolaRule } from '../../shared/pianola/types'; +/** Result of loading rules, distinguishing "no rules" from "file is malformed". */ +export interface RulesLoadResult { + rules: PianolaRule[]; + /** True when the rules file exists but could not be parsed as JSON. */ + malformed: boolean; +} + function rulesPath(): string { return path.join(getConfigDirectory(), PIANOLA_RULES_FILENAME); } @@ -28,29 +36,34 @@ function decisionsPath(): string { } /** - * Read and validate the rules file. Returns an empty list when the file is - * missing or malformed, and drops any individual invalid rule, so a bad - * hand-edit cannot break the watcher. + * Read and validate the rules file, reporting whether the file was present but + * unparseable. Individual invalid rules are dropped, so a bad hand-edit cannot + * break the watcher; callers can surface `malformed` as a warning. */ -export function readPianolaRules(): PianolaRule[] { +export function readPianolaRulesResult(): RulesLoadResult { let content: string; try { content = fs.readFileSync(rulesPath(), 'utf-8'); } catch (error) { - if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return { rules: [], malformed: false }; throw error; } let parsed: unknown; try { parsed = JSON.parse(content); } catch { - return []; + return { rules: [], malformed: true }; } // Accept either a bare array or an electron-store style { rules: [...] }. const raw = Array.isArray(parsed) ? parsed : ((parsed as { rules?: unknown } | null)?.rules ?? []); - return validatePianolaRules(raw); + return { rules: validatePianolaRules(raw), malformed: false }; +} + +/** Read and validate the rules file. Returns [] when missing or malformed. */ +export function readPianolaRules(): PianolaRule[] { + return readPianolaRulesResult().rules; } /** Append one decision record to the audit log as a JSON line. */ @@ -63,8 +76,10 @@ export function appendPianolaDecision(record: PianolaDecisionRecord): void { } /** - * Read recent decision records (most recent last). Malformed lines are skipped. - * When `limit` is given, only the last `limit` records are returned. + * Read recent decision records (most recent last). Malformed and schema-invalid + * lines are skipped. Records sharing an id (an auto-answer's intent line and its + * dispatch-outcome line) are folded so the latest wins, keeping the original + * position. When `limit` is given, only the last `limit` records are returned. */ export function readPianolaDecisions(limit?: number): PianolaDecisionRecord[] { let content: string; @@ -74,16 +89,20 @@ export function readPianolaDecisions(limit?: number): PianolaDecisionRecord[] { if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; throw error; } - const records: PianolaDecisionRecord[] = []; + const byId = new Map(); for (const line of content.split('\n')) { const trimmed = line.trim(); if (!trimmed) continue; + let parsed: unknown; try { - records.push(JSON.parse(trimmed) as PianolaDecisionRecord); + parsed = JSON.parse(trimmed); } catch { - // Skip a corrupt line rather than failing the whole read. + continue; // skip a corrupt line rather than failing the whole read } + const record = validatePianolaDecisionRecord(parsed); + if (record) byId.set(record.id, record); // last write for an id wins, position kept } + const records = [...byId.values()]; if (limit !== undefined && limit >= 0 && records.length > limit) { return records.slice(records.length - limit); } diff --git a/src/main/web-server/handlers/messageHandlers.ts b/src/main/web-server/handlers/messageHandlers.ts index 4fdabcc048..c9c741f9ec 100644 --- a/src/main/web-server/handlers/messageHandlers.ts +++ b/src/main/web-server/handlers/messageHandlers.ts @@ -4908,6 +4908,7 @@ export class WebSocketMessageHandler { sessionId: result.sessionId, agentId: result.agentId, agentSessionId: result.agentSessionId, + projectPath: result.projectPath, messages: result.messages, requestId: message.requestId, }); diff --git a/src/main/web-server/types.ts b/src/main/web-server/types.ts index 867df58f41..fc25be287b 100644 --- a/src/main/web-server/types.ts +++ b/src/main/web-server/types.ts @@ -544,6 +544,8 @@ export interface SessionHistoryResult { sessionId: string; agentId: string; agentSessionId: string | null; + /** Agent working directory, when known. Used for project-scoped automation. */ + projectPath?: string; messages: SessionHistoryMessage[]; } diff --git a/src/main/web-server/web-server-factory.ts b/src/main/web-server/web-server-factory.ts index 96ebfc08b8..e26c3b458a 100644 --- a/src/main/web-server/web-server-factory.ts +++ b/src/main/web-server/web-server-factory.ts @@ -349,6 +349,7 @@ export function createWebServerFactory(deps: WebServerFactoryDependencies) { sessionId: tabId, agentId: s.id, agentSessionId: typeof tab.agentSessionId === 'string' ? tab.agentSessionId : null, + projectPath: typeof s.cwd === 'string' ? s.cwd : undefined, messages, }; } diff --git a/src/shared/pianola/pianola-awaiting-detector.ts b/src/shared/pianola/pianola-awaiting-detector.ts index 6e986bee5b..dc2c3d695f 100644 --- a/src/shared/pianola/pianola-awaiting-detector.ts +++ b/src/shared/pianola/pianola-awaiting-detector.ts @@ -13,6 +13,12 @@ * testable. The classifier treats a returned signal as authoritative; weaker * cues fall through to its heuristics. * + * Hardening: option extraction is deliberately strict (short, word-like tokens; + * no file paths, markdown links, version numbers, or sentences), and a "choice" + * is only emitted when the text actually reads like a question/choice. This + * stops ordinary prose - including an adversarial transcript - from being shaped + * into a structured choice that could trip an auto-answer rule. + * * Precedence: plan_review > permission > choice > question. */ @@ -30,18 +36,45 @@ const PERMISSION_RE = const QUESTION_INTENT_RE = /\b(should i|shall i|which (?:one|option|approach|do you)|would you prefer|do you want|how (?:should|would) (?:i|we)|what (?:should|would) (?:i|we))\b/i; -/** Extract a discrete option list, if the text presents one. */ -function extractOptions(content: string): string[] { - // Slash/bracket form: [keep/discard], (yes/no) - const bracket = content.match(/[[(]([^\])]*\/[^\])]*)[\])]/); +/** Words that signal the text is inviting a choice/decision. */ +const CHOICE_CONTEXT_RE = /\b(choose|choice|which|option|options|prefer|proceed|select|pick|or)\b/i; + +/** True if the text reads like it is asking the user to decide. */ +export function looksLikeAsking(content: string): boolean { + return ( + content.trim().endsWith('?') || + QUESTION_INTENT_RE.test(content) || + CHOICE_CONTEXT_RE.test(content) + ); +} + +/** + * A clean option token: starts with a letter, short, word-like (letters, + * digits, spaces, +/-). Rejects file paths (foo.ts), urls, version numbers, and + * full sentences so prose is not mistaken for a discrete option. + */ +function isCleanOptionToken(token: string): boolean { + return /^[a-zA-Z][\w +-]{0,23}$/.test(token); +} + +/** + * Extract a discrete option list, if the text presents one. Returns [] unless it + * finds 2-5 clean tokens in a slash/bracket form ([keep/discard], (y/n)) or a + * numbered list (1) a 2) b). Markdown links ([label](url)) are rejected. + */ +export function extractOptions(content: string): string[] { + // Slash/bracket form: [keep/discard], (yes/no). Not a markdown link. + const bracket = content.match(/[[(]\s*([^[\]()]*\/[^[\]()]*?)\s*[\])](?!\()/); if (bracket) { const parts = bracket[1] .split('/') .map((s) => s.trim()) .filter(Boolean); - if (parts.length >= 2) return parts; + if (parts.length >= 2 && parts.length <= 5 && parts.every(isCleanOptionToken)) { + return parts; + } } - // Numbered form: "1) keep it 2) remove it" (inline or across lines) + // Numbered form: "1) keep it 2) remove it" (inline or across lines). const numbered = [...content.matchAll(/(?:^|\n|\s)\d+[.)]\s+([^\n]+?)(?=(?:\s+\d+[.)]\s)|\n|$)/g)] .map((m) => m[1].trim()) .filter(Boolean); @@ -85,18 +118,21 @@ export function detectAwaitingInput(content: string): AwaitingInputSignal | null const prompt = extractPrompt(content); if (PLAN_REVIEW_RE.test(content)) { - return signal('plan_review', prompt, options); + // Plan steps are not pickable options; report the kind only. + return { kind: 'plan_review', prompt }; } if (PERMISSION_RE.test(content)) { return signal('permission', prompt, options); } - if (options.length >= 2) { + // A choice needs both a real option list AND a question/choice context, so + // changelog-style numbered lists and incidental brackets are not choices. + if (options.length >= 2 && looksLikeAsking(content)) { return signal('choice', prompt, options); } // A question intent only becomes a structured signal when the text is // actually a question; weaker phrasing is left to the classifier heuristics. if (QUESTION_INTENT_RE.test(content) && content.trim().endsWith('?')) { - return signal('question', prompt, options); + return { kind: 'question', prompt }; } return null; diff --git a/src/shared/pianola/pianola-classifier.ts b/src/shared/pianola/pianola-classifier.ts index 396336cfd4..a91a447073 100644 --- a/src/shared/pianola/pianola-classifier.ts +++ b/src/shared/pianola/pianola-classifier.ts @@ -18,6 +18,7 @@ import type { PianolaSignalKind, } from './types'; import { maxRisk, rateRisk } from './pianola-risk'; +import { extractOptions, looksLikeAsking } from './pianola-awaiting-detector'; // Re-exported for convenience so callers can pull risk helpers from the classifier. export { riskAtMost, maxRisk } from './pianola-risk'; @@ -55,21 +56,17 @@ const BLOCKED_PHRASES = [ 'needs approval', ]; -/** Slash/paren choice markers: [y/n], (yes/no), [approve/cancel]. */ -const SLASH_CHOICE_RE = /\[[^\]]*\/[^\]]*\]|\((?:y\/n|yes\/no)\)/i; - -/** Two or more numbered options: "1) approve", "2. cancel" at line/segment starts. */ -const NUMBERED_CHOICE_RE = /(?:^|\n|\s)\d+[.)]\s+\S/g; - function containsAny(haystack: string, needles: readonly string[]): boolean { return needles.some((n) => haystack.includes(n)); } -/** True if the text presents an explicit set of choices. */ +/** + * True if the text presents an explicit set of choices. Reuses the detector's + * hardened option extraction and asking-context check so both tiers agree (a + * changelog-style numbered list or an incidental bracket is not a choice). + */ function hasChoiceMarker(content: string): boolean { - if (SLASH_CHOICE_RE.test(content)) return true; - const numbered = content.match(NUMBERED_CHOICE_RE); - return !!numbered && numbered.length >= 2; + return extractOptions(content).length >= 2 && looksLikeAsking(content); } /** Build a short topic summary from a prompt: first sentence/line, trimmed. */ diff --git a/src/shared/pianola/pianola-watcher.ts b/src/shared/pianola/pianola-watcher.ts index 73b606b8a5..07cb7c4688 100644 --- a/src/shared/pianola/pianola-watcher.ts +++ b/src/shared/pianola/pianola-watcher.ts @@ -1,12 +1,19 @@ /** * Pianola watcher - one iteration of the watch loop, with all I/O injected. * - * This is the orchestration that ties the brain together: enrich a transcript - * with structured awaiting-input signals, classify it, decide via the rules, - * and (for an auto-answer that isn't a dry run) dispatch. It records every - * actionable decision. All side effects come in through `WatchDeps`, so the loop - * is unit-testable without a desktop app, network, or filesystem, and the same + * Ties the brain together: enrich a transcript with structured awaiting-input + * signals, classify it, decide via the rules, and (for a live auto-answer) + * dispatch. All side effects come in through `WatchDeps`, so the loop is + * unit-testable without a desktop app, network, or filesystem, and the same * logic can back both the CLI watcher and a future in-app engine. + * + * Safety invariants: + * - Audit before dispatch: the decision is recorded BEFORE any message is sent, + * so Pianola can never dispatch without an audit trail. A second record + * captures the dispatch outcome (readers fold the two by id). + * - Bounded retry: a failed dispatch does NOT advance the dedup cursor, so the + * same prompt is retried on the next poll, up to MAX_DISPATCH_ATTEMPTS, after + * which Pianola gives up on that prompt rather than looping forever. */ import type { PianolaClassification, PianolaDecision, PianolaMessage, PianolaRule } from './types'; @@ -15,6 +22,9 @@ import { classifyMessages } from './pianola-classifier'; import { enrichWithAwaitingInput } from './pianola-awaiting-detector'; import { decide } from './pianola-policy'; +/** Max dispatch attempts for a single prompt before giving up. */ +export const MAX_DISPATCH_ATTEMPTS = 3; + /** The tab Pianola is watching and the agent it would dispatch to. */ export interface WatchTarget { tabId: string; @@ -38,12 +48,14 @@ export interface WatchDeps { /** Per-tab loop state, carried between iterations. */ export interface WatchState { - /** Id of the last assistant message we already acted on (dedup guard). */ + /** Id of the last assistant message we already finished handling (dedup guard). */ lastHandledMessageId: string | null; + /** A prompt whose dispatch failed and is awaiting another attempt. */ + pendingRetry: { messageId: string; attempts: number } | null; } export function initialWatchState(): WatchState { - return { lastHandledMessageId: null }; + return { lastHandledMessageId: null, pendingRetry: null }; } export interface IterationResult { @@ -53,11 +65,13 @@ export interface IterationResult { record: PianolaDecisionRecord | null; /** True when this iteration produced a new decision. */ acted: boolean; + /** True when an auto-answer was sent successfully. */ + dispatched: boolean; /** Reason an actionable prompt was skipped (e.g. already handled). */ skipped?: string; } -function describe(result: IterationResult): string { +function describe(result: IterationResult, error?: string): string { const { classification: c, decision } = result; if (!decision) { return result.skipped @@ -65,13 +79,36 @@ function describe(result: IterationResult): string { : `[pianola] none (${c.evidence.reason})`; } const detail = c.topic ? `: ${c.topic}` : ''; - return `[pianola] ${c.kind}/${c.risk} -> ${decision.action}${detail}`; + const errSuffix = error ? ` (dispatch error: ${error})` : ''; + return `[pianola] ${c.kind}/${c.risk} -> ${decision.action}${detail}${errSuffix}`; +} + +function buildRecord( + deps: WatchDeps, + target: WatchTarget, + classification: PianolaClassification, + decision: PianolaDecision, + fields: { id: string; dispatched: boolean; dryRun: boolean; error?: string } +): PianolaDecisionRecord { + return { + id: fields.id, + timestamp: deps.now(), + tabId: target.tabId, + agentId: target.agentId, + projectPath: target.projectPath, + classification, + decision, + dispatched: fields.dispatched, + dryRun: fields.dryRun, + ...(fields.error ? { error: fields.error } : {}), + }; } /** * Run one watch iteration over the latest transcript for a tab. Returns the next - * state and a structured result. Pure aside from the injected deps; never throws - * for an expected dispatch failure (it is recorded on the audit entry instead). + * state and a structured result. Never throws for an expected dispatch failure + * (it is recorded on the audit entry and retried). An audit-write failure is + * unexpected and propagates BEFORE any dispatch, so the caller fails closed. */ export async function runWatchIteration( messages: readonly PianolaMessage[], @@ -84,7 +121,13 @@ export async function runWatchIteration( const classification = classifyMessages(enriched); if (classification.kind === 'none') { - const result: IterationResult = { classification, decision: null, record: null, acted: false }; + const result: IterationResult = { + classification, + decision: null, + record: null, + acted: false, + dispatched: false, + }; deps.log(describe(result)); return { state, result }; } @@ -96,6 +139,7 @@ export async function runWatchIteration( decision: null, record: null, acted: false, + dispatched: false, skipped: 'already handled this prompt', }; deps.log(describe(result)); @@ -108,34 +152,88 @@ export async function runWatchIteration( tabId: target.tabId, }); - let dispatched = false; - let error: string | undefined; - if (decision.action === 'auto_answer' && !options.dryRun) { - const res = await deps.dispatch(target, decision.answer); - dispatched = res.success; - if (!res.success) error = res.error ?? 'dispatch failed'; + const willDispatch = decision.action === 'auto_answer' && !options.dryRun; + + // Non-dispatch decisions (escalate / ignore / dry-run auto-answer): a single + // audit record, and the prompt is considered handled. + if (!willDispatch) { + const record = buildRecord(deps, target, classification, decision, { + id: deps.genId(), + dispatched: false, + dryRun: options.dryRun, + }); + deps.recordDecision(record); + const result: IterationResult = { + classification, + decision, + record, + acted: true, + dispatched: false, + }; + deps.log(describe(result)); + return { + state: { lastHandledMessageId: messageId ?? state.lastHandledMessageId, pendingRetry: null }, + result, + }; } - const record: PianolaDecisionRecord = { - id: deps.genId(), - timestamp: deps.now(), - tabId: target.tabId, - agentId: target.agentId, - projectPath: target.projectPath, + // Live auto-answer. Audit the intent BEFORE dispatching so a message is never + // sent without a record. The id is shared by the intent and outcome records. + const id = deps.genId(); + const priorAttempts = + state.pendingRetry && state.pendingRetry.messageId === messageId + ? state.pendingRetry.attempts + : 0; + const attempts = priorAttempts + 1; + + const intent = buildRecord(deps, target, classification, decision, { + id, + dispatched: false, + dryRun: false, + }); + deps.recordDecision(intent); // audit-before-dispatch; throws here => no dispatch + + const res = await deps.dispatch(target, decision.action === 'auto_answer' ? decision.answer : ''); + const error = res.success ? undefined : (res.error ?? 'dispatch failed'); + + const outcome = buildRecord(deps, target, classification, decision, { + id, + dispatched: res.success, + dryRun: false, + error, + }); + deps.recordDecision(outcome); + + const result: IterationResult = { classification, decision, - dispatched, - dryRun: options.dryRun, - ...(error ? { error } : {}), + record: outcome, + acted: true, + dispatched: res.success, }; - deps.recordDecision(record); + deps.log(describe(result, error)); - const result: IterationResult = { classification, decision, record, acted: true }; - deps.log(describe(result) + (error ? ` (dispatch error: ${error})` : '')); + if (res.success) { + return { + state: { lastHandledMessageId: messageId ?? state.lastHandledMessageId, pendingRetry: null }, + result, + }; + } - // Advance the dedup cursor so we don't re-handle the same prompt next poll. - const nextState: WatchState = { - lastHandledMessageId: messageId ?? state.lastHandledMessageId, + // Dispatch failed. Retry on the next poll until we hit the attempt cap, then + // give up on this prompt so we do not loop on it forever. + if (!messageId || attempts >= MAX_DISPATCH_ATTEMPTS) { + deps.log(`[pianola] giving up on prompt after ${attempts} dispatch attempt(s)`); + return { + state: { lastHandledMessageId: messageId ?? state.lastHandledMessageId, pendingRetry: null }, + result, + }; + } + return { + state: { + lastHandledMessageId: state.lastHandledMessageId, + pendingRetry: { messageId, attempts }, + }, + result, }; - return { state: nextState, result }; } diff --git a/src/shared/pianola/storage.ts b/src/shared/pianola/storage.ts index dc9478b55e..a316fd001d 100644 --- a/src/shared/pianola/storage.ts +++ b/src/shared/pianola/storage.ts @@ -116,6 +116,45 @@ export function validatePianolaRule(raw: unknown): PianolaRule | null { return rule; } +const CONFIDENCES = ['low', 'medium', 'high'] as const; + +function isValidClassification(raw: unknown): raw is PianolaClassification { + if (!isRecord(raw)) return false; + if (!SIGNAL_KINDS.includes(raw.kind as PianolaSignalKind)) return false; + if (!RISKS.includes(raw.risk as PianolaRisk)) return false; + if (typeof raw.topic !== 'string') return false; + if (!CONFIDENCES.includes(raw.confidence as (typeof CONFIDENCES)[number])) return false; + if (!isRecord(raw.evidence)) return false; + return true; +} + +function isValidDecision(raw: unknown): raw is PianolaDecision { + if (!isRecord(raw)) return false; + if (!ACTION_KINDS.includes(raw.action as PianolaActionKind)) return false; + if (raw.matchedRuleId !== null && typeof raw.matchedRuleId !== 'string') return false; + if (typeof raw.reason !== 'string') return false; + if (raw.action === 'auto_answer' && typeof raw.answer !== 'string') return false; + return true; +} + +/** + * Validate one untrusted decision-audit record. Returns the typed record or null + * so a malformed JSONL line (hand-edited or from an older schema) is skipped + * rather than crashing a reader that dereferences nested fields. + */ +export function validatePianolaDecisionRecord(raw: unknown): PianolaDecisionRecord | null { + if (!isRecord(raw)) return null; + if (typeof raw.id !== 'string' || raw.id.length === 0) return null; + if (typeof raw.timestamp !== 'string') return null; + if (typeof raw.tabId !== 'string' || typeof raw.agentId !== 'string') return null; + if (typeof raw.dispatched !== 'boolean' || typeof raw.dryRun !== 'boolean') return null; + if (raw.projectPath !== undefined && typeof raw.projectPath !== 'string') return null; + if (raw.error !== undefined && typeof raw.error !== 'string') return null; + if (!isValidClassification(raw.classification)) return null; + if (!isValidDecision(raw.decision)) return null; + return raw as unknown as PianolaDecisionRecord; +} + /** Validate an untrusted rules payload, dropping any malformed entries. */ export function validatePianolaRules(raw: unknown): PianolaRule[] { if (!Array.isArray(raw)) return []; From bd484b2420137682753eb18d09d65033224c54fc Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Thu, 25 Jun 2026 00:48:19 -0500 Subject: [PATCH 06/76] feat(pianola): desktop IPC layer for rules CRUD and decision log --- .../main/pianola/pianola-store-main.test.ts | 149 ++++++++++++++++++ src/main/ipc/handlers/index.ts | 7 + src/main/ipc/handlers/pianola.ts | 86 ++++++++++ src/main/pianola/pianola-store-main.ts | 112 +++++++++++++ src/main/preload/index.ts | 10 ++ src/main/preload/pianola.ts | 35 ++++ src/renderer/global.d.ts | 10 ++ 7 files changed, 409 insertions(+) create mode 100644 src/__tests__/main/pianola/pianola-store-main.test.ts create mode 100644 src/main/ipc/handlers/pianola.ts create mode 100644 src/main/pianola/pianola-store-main.ts create mode 100644 src/main/preload/pianola.ts diff --git a/src/__tests__/main/pianola/pianola-store-main.test.ts b/src/__tests__/main/pianola/pianola-store-main.test.ts new file mode 100644 index 0000000000..841ade7e5b --- /dev/null +++ b/src/__tests__/main/pianola/pianola-store-main.test.ts @@ -0,0 +1,149 @@ +/** + * @file pianola-store-main.test.ts + * @description Tests for the Pianola main-process store (rules read/write + + * decision log). Uses a temp MAESTRO_USER_DATA dir so reads/writes are isolated, + * and mocks electron's `app` (unused on this path but imported by the module). + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +vi.mock('electron', () => ({ + app: { getPath: () => os.tmpdir() }, +})); + +import { + readRules, + writeRules, + appendDecision, + readDecisions, +} from '../../../main/pianola/pianola-store-main'; +import { + PIANOLA_RULES_FILENAME, + PIANOLA_DECISIONS_FILENAME, + type PianolaDecisionRecord, +} from '../../../shared/pianola/storage'; +import type { PianolaRule } from '../../../shared/pianola/types'; + +let tmpDir: string; +let prevEnv: string | undefined; + +beforeEach(() => { + prevEnv = process.env.MAESTRO_USER_DATA; + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pianola-main-')); + process.env.MAESTRO_USER_DATA = tmpDir; +}); + +afterEach(() => { + if (prevEnv === undefined) delete process.env.MAESTRO_USER_DATA; + else process.env.MAESTRO_USER_DATA = prevEnv; + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +function autoAnswerRule(id: string): PianolaRule { + return { + id, + enabled: true, + scope: 'global', + match: { maxRisk: 'low' }, + action: 'auto_answer', + answer: 'Use tabs.', + priority: 1, + createdAt: 1, + updatedAt: 1, + }; +} + +function decisionRecord(id: string): PianolaDecisionRecord { + return { + id, + timestamp: '2026-01-01T00:00:00.000Z', + tabId: 'tab-1', + agentId: 'agent-1', + classification: { + kind: 'question', + risk: 'low', + topic: 'tabs', + confidence: 'medium', + evidence: { messageId: 'm1', reason: 'test', structured: false }, + }, + decision: { action: 'escalate', matchedRuleId: null, reason: 'default' }, + dispatched: false, + dryRun: true, + }; +} + +describe('rules read/write', () => { + it('returns [] when no rules file exists', () => { + expect(readRules()).toEqual([]); + }); + + it('round-trips rules through writeRules/readRules', () => { + writeRules([autoAnswerRule('r1'), autoAnswerRule('r2')]); + expect(readRules().map((r) => r.id)).toEqual(['r1', 'r2']); + }); + + it('writeRules drops invalid entries before persisting', () => { + const bad = { id: 'bad', scope: 'planet' } as unknown as PianolaRule; + const saved = writeRules([autoAnswerRule('good'), bad]); + expect(saved.map((r) => r.id)).toEqual(['good']); + expect(readRules().map((r) => r.id)).toEqual(['good']); + }); + + it('writes atomically (no leftover .tmp file)', () => { + writeRules([autoAnswerRule('r1')]); + const tmp = path.join(tmpDir, `${PIANOLA_RULES_FILENAME}.tmp`); + expect(fs.existsSync(tmp)).toBe(false); + }); + + it('reads an electron-store style { rules: [...] } object', () => { + fs.writeFileSync( + path.join(tmpDir, PIANOLA_RULES_FILENAME), + JSON.stringify({ rules: [autoAnswerRule('r2')] }), + 'utf-8' + ); + expect(readRules().map((r) => r.id)).toEqual(['r2']); + }); + + it('returns [] for malformed JSON', () => { + fs.writeFileSync(path.join(tmpDir, PIANOLA_RULES_FILENAME), '{ not json', 'utf-8'); + expect(readRules()).toEqual([]); + }); +}); + +describe('decision audit log', () => { + it('returns [] when the log is missing', () => { + expect(readDecisions()).toEqual([]); + }); + + it('appends and reads back records in order', () => { + appendDecision(decisionRecord('d1')); + appendDecision(decisionRecord('d2')); + expect(readDecisions().map((r) => r.id)).toEqual(['d1', 'd2']); + }); + + it('honors a tail limit', () => { + appendDecision(decisionRecord('d1')); + appendDecision(decisionRecord('d2')); + appendDecision(decisionRecord('d3')); + expect(readDecisions(2).map((r) => r.id)).toEqual(['d2', 'd3']); + }); + + it('skips corrupt and schema-invalid lines', () => { + appendDecision(decisionRecord('d1')); + fs.appendFileSync(path.join(tmpDir, PIANOLA_DECISIONS_FILENAME), 'not json\n', 'utf-8'); + fs.appendFileSync(path.join(tmpDir, PIANOLA_DECISIONS_FILENAME), '{"foo":1}\n', 'utf-8'); + appendDecision(decisionRecord('d2')); + expect(readDecisions().map((r) => r.id)).toEqual(['d1', 'd2']); + }); + + it('folds an intent and outcome record sharing an id (last wins)', () => { + appendDecision({ ...decisionRecord('same'), dispatched: false }); + appendDecision({ ...decisionRecord('same'), dispatched: true }); + const records = readDecisions(); + expect(records.map((r) => r.id)).toEqual(['same']); + expect(records[0].dispatched).toBe(true); + }); +}); diff --git a/src/main/ipc/handlers/index.ts b/src/main/ipc/handlers/index.ts index c3e5418fa0..9229c026d2 100644 --- a/src/main/ipc/handlers/index.ts +++ b/src/main/ipc/handlers/index.ts @@ -63,6 +63,7 @@ import { registerTabNamingHandlers, TabNamingHandlerDependencies } from './tabNa import { registerDirectorNotesHandlers, DirectorNotesHandlerDependencies } from './director-notes'; import { registerCueHandlers, CueHandlerDependencies } from './cue'; import { registerCueBackupHandlers } from './cue-backup'; +import { registerPianolaHandlers, PianolaHandlerDependencies } from './pianola'; import { registerWakatimeHandlers } from './wakatime'; import { registerFeedbackHandlers } from './feedback'; import { registerMaestroCliHandlers } from './maestro-cli'; @@ -125,6 +126,8 @@ export type { DirectorNotesHandlerDependencies }; export { registerCueHandlers }; export type { CueHandlerDependencies }; export { registerCueBackupHandlers }; +export { registerPianolaHandlers }; +export type { PianolaHandlerDependencies }; export { registerWakatimeHandlers }; export { registerFeedbackHandlers }; export { registerMaestroCliHandlers }; @@ -324,6 +327,10 @@ export function registerAllHandlers(deps: HandlerDependencies): void { registerCueBackupHandlers({ sessionsStore: deps.sessionsStore, }); + // Register Pianola handlers (autonomous manager: rules CRUD + decision log) + registerPianolaHandlers({ + settingsStore: deps.settingsStore, + }); // Register Core Prompts handlers (no dependencies needed) registerPromptsHandlers(); // Register project Memory handlers (Claude Code per-project memory viewer) diff --git a/src/main/ipc/handlers/pianola.ts b/src/main/ipc/handlers/pianola.ts new file mode 100644 index 0000000000..580ba1c6d2 --- /dev/null +++ b/src/main/ipc/handlers/pianola.ts @@ -0,0 +1,86 @@ +/** + * Pianola IPC Handlers + * + * Exposes the Pianola rules CRUD and the decision audit log to the renderer. + * Thin transport that delegates to the main-process store + * (src/main/pianola/pianola-store-main.ts), which reads/writes the same files + * the CLI watcher uses. + * + * Gated at the handler on `encoreFeatures.pianola`. Pianola can auto-send + * messages to agents, so when the Encore flag is off every channel throws + * `'PianolaDisabled'` rather than returning empty data - the renderer needs to + * distinguish "feature off" from "no rules / no decisions yet". The gate runs + * OUTSIDE withIpcErrorLogging so the sentinel is not logged as an unexpected + * IPC failure. + */ + +import { ipcMain } from 'electron'; +import { withIpcErrorLogging, type CreateHandlerOptions } from '../../utils/ipcHandler'; +import { readRules, writeRules, readDecisions } from '../../pianola/pianola-store-main'; +import { validatePianolaRules } from '../../../shared/pianola/storage'; +import type { PianolaDecisionRecord } from '../../../shared/pianola/storage'; +import type { PianolaRule } from '../../../shared/pianola/types'; + +const LOG_CONTEXT = '[Pianola]'; + +const handlerOpts = (operation: string): Pick => ({ + context: LOG_CONTEXT, + operation, +}); + +/** + * Dependencies for Pianola handlers. Only the settings store is needed, for the + * Encore gate. + */ +export interface PianolaHandlerDependencies { + settingsStore: { + get: (key: string) => unknown; + }; +} + +/** + * Returns true only when `encoreFeatures.pianola` is explicitly enabled. Read on + * every call so a toggle change takes effect without an app restart. + */ +function isPianolaEnabled(settingsStore: { get: (key: string) => unknown }): boolean { + const ef = (settingsStore.get('encoreFeatures') ?? {}) as Record; + return ef.pianola === true; +} + +/** + * Register the Pianola IPC handlers. + */ +export function registerPianolaHandlers(deps: PianolaHandlerDependencies): void { + const { settingsStore } = deps; + + const wrappedGetRules = withIpcErrorLogging( + handlerOpts('getRules'), + async (): Promise => readRules() + ); + const wrappedSaveRules = withIpcErrorLogging( + handlerOpts('saveRules'), + async (rules: unknown): Promise => writeRules(validatePianolaRules(rules)) + ); + const wrappedGetDecisions = withIpcErrorLogging( + handlerOpts('getDecisions'), + async (limit?: number): Promise => readDecisions(limit) + ); + + ipcMain.handle('pianola:get-rules', async (event): Promise => { + if (!isPianolaEnabled(settingsStore)) throw new Error('PianolaDisabled'); + return wrappedGetRules(event); + }); + + ipcMain.handle('pianola:save-rules', async (event, rules: unknown): Promise => { + if (!isPianolaEnabled(settingsStore)) throw new Error('PianolaDisabled'); + return wrappedSaveRules(event, rules); + }); + + ipcMain.handle( + 'pianola:get-decisions', + async (event, limit?: number): Promise => { + if (!isPianolaEnabled(settingsStore)) throw new Error('PianolaDisabled'); + return wrappedGetDecisions(event, limit); + } + ); +} diff --git a/src/main/pianola/pianola-store-main.ts b/src/main/pianola/pianola-store-main.ts new file mode 100644 index 0000000000..87da2ad236 --- /dev/null +++ b/src/main/pianola/pianola-store-main.ts @@ -0,0 +1,112 @@ +/** + * Pianola main-process storage. + * + * Reads/writes the rules file and the decision audit log in the Maestro user + * data directory - the same files the CLI watcher uses (see + * src/cli/services/pianola-store.ts), so desktop and CLI stay in sync. The fs + * logic is intentionally duplicated rather than shared from src/shared, because + * src/shared is also bundled into the renderer where `fs` is unavailable; the + * validation and contracts ARE shared (src/shared/pianola/storage.ts). + */ + +import { app } from 'electron'; +import * as fs from 'fs'; +import * as path from 'path'; +import { + PIANOLA_RULES_FILENAME, + PIANOLA_DECISIONS_FILENAME, + validatePianolaRules, + validatePianolaDecisionRecord, + type PianolaDecisionRecord, +} from '../../shared/pianola/storage'; +import type { PianolaRule } from '../../shared/pianola/types'; + +/** Resolve the Maestro data dir, matching the CLI's getConfigDir semantics. */ +function pianolaDir(): string { + if (process.env.MAESTRO_USER_DATA) return path.resolve(process.env.MAESTRO_USER_DATA); + return app.getPath('userData'); +} + +function rulesPath(): string { + return path.join(pianolaDir(), PIANOLA_RULES_FILENAME); +} + +function decisionsPath(): string { + return path.join(pianolaDir(), PIANOLA_DECISIONS_FILENAME); +} + +/** Read and validate the rules, dropping malformed entries. */ +export function readRules(): PianolaRule[] { + let content: string; + try { + content = fs.readFileSync(rulesPath(), 'utf-8'); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; + throw error; + } + let parsed: unknown; + try { + parsed = JSON.parse(content); + } catch { + return []; + } + const raw = Array.isArray(parsed) + ? parsed + : ((parsed as { rules?: unknown } | null)?.rules ?? []); + return validatePianolaRules(raw); +} + +/** + * Persist the full rules list (validated first; invalid entries are rejected). + * Written atomically via a temp file + rename so a concurrent reader never sees + * a partial file. + */ +export function writeRules(rules: PianolaRule[]): PianolaRule[] { + const validated = validatePianolaRules(rules); + const dir = pianolaDir(); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + const target = rulesPath(); + const tmp = `${target}.tmp`; + fs.writeFileSync(tmp, JSON.stringify(validated, null, '\t'), 'utf-8'); + fs.renameSync(tmp, target); + return validated; +} + +/** Append one decision record to the audit log as a JSON line. */ +export function appendDecision(record: PianolaDecisionRecord): void { + const dir = pianolaDir(); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + fs.appendFileSync(decisionsPath(), `${JSON.stringify(record)}\n`, 'utf-8'); +} + +/** + * Read recent decision records (most recent last). Malformed/invalid lines are + * skipped; records sharing an id (intent + outcome) are folded, latest winning. + */ +export function readDecisions(limit?: number): PianolaDecisionRecord[] { + let content: string; + try { + content = fs.readFileSync(decisionsPath(), 'utf-8'); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []; + throw error; + } + const byId = new Map(); + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + let parsed: unknown; + try { + parsed = JSON.parse(trimmed); + } catch { + continue; + } + const record = validatePianolaDecisionRecord(parsed); + if (record) byId.set(record.id, record); + } + const records = [...byId.values()]; + if (limit !== undefined && limit >= 0 && records.length > limit) { + return records.slice(records.length - limit); + } + return records; +} diff --git a/src/main/preload/index.ts b/src/main/preload/index.ts index 4feb0449b2..0941ffe96f 100644 --- a/src/main/preload/index.ts +++ b/src/main/preload/index.ts @@ -53,6 +53,7 @@ import { createTabNamingApi } from './tabNaming'; import { createDirectorNotesApi } from './directorNotes'; import { createCueApi } from './cue'; import { createCueBackupApi } from './cueBackup'; +import { createPianolaApi } from './pianola'; import { createWakatimeApi } from './wakatime'; import { createMaestroCliApi } from './maestroCli'; import { createPromptsApi } from './prompts'; @@ -209,6 +210,9 @@ contextBridge.exposeInMainWorld('maestro', { // Cue Backup API (Cue modal Backup tab — snapshot/restore cue.yaml + prompts) cueBackup: createCueBackupApi(), + // Pianola API (autonomous manager: rules + decision log) + pianola: createPianolaApi(), + // WakaTime API (CLI check, API key validation) wakatime: createWakatimeApi(), @@ -298,6 +302,8 @@ export { createCueApi, // Cue Backup createCueBackupApi, + // Pianola + createPianolaApi, // WakaTime createWakatimeApi, // Maestro CLI @@ -532,6 +538,10 @@ export type { CueEventType, CueRunStatus, } from './cue'; +export type { + // From pianola + PianolaApi, +} from './pianola'; export type { // From wakatime WakatimeApi, diff --git a/src/main/preload/pianola.ts b/src/main/preload/pianola.ts new file mode 100644 index 0000000000..c9ce906daa --- /dev/null +++ b/src/main/preload/pianola.ts @@ -0,0 +1,35 @@ +/** + * Preload API for Pianola (autonomous manager agent). + * + * Provides the window.maestro.pianola namespace for managing auto-answer rules + * and reading the decision audit log. All channels are gated in the main + * process on the `pianola` Encore flag; when it is off they reject with + * 'PianolaDisabled', which callers treat as "feature off". + */ + +import { ipcRenderer } from 'electron'; +import type { PianolaRule } from '../../shared/pianola/types'; +import type { PianolaDecisionRecord } from '../../shared/pianola/storage'; + +/** + * Creates the Pianola API object for contextBridge exposure. + */ +export function createPianolaApi() { + return { + /** Read all auto-answer rules (validated; malformed entries dropped). */ + getRules: (): Promise => ipcRenderer.invoke('pianola:get-rules'), + + /** Persist the full rules list. Returns the validated, saved rules. */ + saveRules: (rules: PianolaRule[]): Promise => + ipcRenderer.invoke('pianola:save-rules', rules), + + /** + * Read recent decision audit records (most recent last). Pass a limit to + * tail the log; omit it for the full history. + */ + getDecisions: (limit?: number): Promise => + ipcRenderer.invoke('pianola:get-decisions', limit), + }; +} + +export type PianolaApi = ReturnType; diff --git a/src/renderer/global.d.ts b/src/renderer/global.d.ts index 8876dfb5ee..7f0f825b9e 100644 --- a/src/renderer/global.d.ts +++ b/src/renderer/global.d.ts @@ -163,6 +163,8 @@ import type { CueStatsAggregation, CueStatsTimeRange } from '../shared/cue-stats import type { DurationPercentiles } from '../shared/percentiles'; import type { MaestroCliStatus, MaestroCliInstallResult } from '../shared/maestro-cli'; import type { GitWorktreeSetupResult, GitWorktreeCheckoutResult } from '../main/preload/git'; +import type { PianolaRule } from '../shared/pianola/types'; +import type { PianolaDecisionRecord } from '../shared/pianola/storage'; interface MaestroAPI { // Context merging API (for session context transfer and grooming) @@ -3476,6 +3478,14 @@ interface MaestroAPI { delete: (filePath: string) => Promise; }; + // Pianola API (autonomous manager: rules + decision log) + // All channels reject with 'PianolaDisabled' when the Encore flag is off. + pianola: { + getRules: () => Promise; + saveRules: (rules: PianolaRule[]) => Promise; + getDecisions: (limit?: number) => Promise; + }; + // WakaTime API (CLI check, API key validation) wakatime: { checkCli: () => Promise<{ available: boolean; version?: string }>; From a793c951c18342fd54355be16266d13f4b4c327e Mon Sep 17 00:00:00 2001 From: jSydorowicz21 Date: Thu, 25 Jun 2026 01:10:36 -0500 Subject: [PATCH 07/76] feat(pianola): desktop management UI (decision log + rules editor) --- .../renderer/components/PianolaModal.test.tsx | 140 +++++ src/__tests__/setup.ts | 6 + src/renderer/App.tsx | 7 + .../components/AppModals/AppModals.tsx | 5 + .../components/AppModals/AppUtilityModals.tsx | 5 + .../components/AppStandaloneModals.tsx | 12 + .../components/PianolaModal/PianolaModal.tsx | 589 ++++++++++++++++++ .../components/PianolaModal/RuleEditor.tsx | 370 +++++++++++ src/renderer/components/PianolaModal/index.ts | 3 + .../QuickActionsModal/QuickActionsModal.tsx | 2 + .../commands/featureCommands.ts | 14 + .../components/QuickActionsModal/types.ts | 1 + .../SessionList/HamburgerMenuContent.tsx | 20 + src/renderer/constants/modalPriorities.ts | 6 + src/renderer/stores/modalStore.ts | 12 +- 15 files changed, 1191 insertions(+), 1 deletion(-) create mode 100644 src/__tests__/renderer/components/PianolaModal.test.tsx create mode 100644 src/renderer/components/PianolaModal/PianolaModal.tsx create mode 100644 src/renderer/components/PianolaModal/RuleEditor.tsx create mode 100644 src/renderer/components/PianolaModal/index.ts diff --git a/src/__tests__/renderer/components/PianolaModal.test.tsx b/src/__tests__/renderer/components/PianolaModal.test.tsx new file mode 100644 index 0000000000..b396cfd4fd --- /dev/null +++ b/src/__tests__/renderer/components/PianolaModal.test.tsx @@ -0,0 +1,140 @@ +/** + * @fileoverview Tests for the Pianola modal: the pure rule-summary helpers and a + * render smoke test covering empty states, tab switching, and opening the rule + * editor. window.maestro.pianola is mocked globally in setup.ts. + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { + PianolaModal, + describeRuleMatch, + newBlankRule, +} from '../../../renderer/components/PianolaModal'; +import type { Theme } from '../../../renderer/types'; +import type { PianolaRule } from '../../../shared/pianola/types'; + +// useModalLayer pulls in the layer-stack context; stub it for isolated rendering. +vi.mock('../../../renderer/hooks/ui/useModalLayer', () => ({ + useModalLayer: vi.fn(), +})); + +const theme = { + colors: { + bgMain: '#1a1a2e', + bgSidebar: '#16213e', + bgActivity: '#0f3460', + textMain: '#e8e8e8', + textDim: '#888888', + accent: '#7b2cbf', + border: '#333355', + }, +} as unknown as Theme; + +function rule(over: Partial = {}): PianolaRule { + return { + id: 'r1', + enabled: true, + scope: 'global', + match: {}, + action: 'escalate', + priority: 100, + createdAt: 1, + updatedAt: 1, + ...over, + }; +} + +describe('describeRuleMatch', () => { + it('summarizes an unconstrained rule as "any prompt"', () => { + expect(describeRuleMatch(rule())).toBe('any prompt'); + }); + + it('summarizes risk, kinds, and topic', () => { + const summary = describeRuleMatch( + rule({ match: { maxRisk: 'low', kinds: ['question'], topicIncludes: ['naming'] } }) + ); + expect(summary).toContain('risk <= low'); + expect(summary).toContain('kind: question'); + expect(summary).toContain('topic ~ naming'); + }); +}); + +describe('newBlankRule', () => { + it('creates an enabled global auto-answer rule with a narrowing default', () => { + const r = newBlankRule(); + expect(r.enabled).toBe(true); + expect(r.scope).toBe('global'); + expect(r.action).toBe('auto_answer'); + // Must ship a narrowing condition so the policy will accept it once an answer is set. + expect((r.match.kinds?.length ?? 0) > 0 || (r.match.topicIncludes?.length ?? 0) > 0).toBe(true); + expect(typeof r.id).toBe('string'); + expect(r.id.length).toBeGreaterThan(0); + }); +}); + +describe('PianolaModal', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(window.maestro.pianola.getRules).mockResolvedValue([]); + vi.mocked(window.maestro.pianola.getDecisions).mockResolvedValue([]); + }); + + it('loads rules and decisions on mount and shows empty states', async () => { + render(); + + expect(screen.getByText('Pianola')).toBeInTheDocument(); + await waitFor(() => { + expect(window.maestro.pianola.getRules).toHaveBeenCalled(); + expect(window.maestro.pianola.getDecisions).toHaveBeenCalled(); + }); + expect(await screen.findByText('No decisions recorded yet.')).toBeInTheDocument(); + }); + + it('switches to the rules tab and shows the empty rules state', async () => { + render(); + await screen.findByText('No decisions recorded yet.'); + + fireEvent.click(screen.getByText('Rules (0)')); + expect( + await screen.findByText(/Without rules, Pianola escalates everything/) + ).toBeInTheDocument(); + }); + + it('opens the rule editor from the rules tab', async () => { + render(); + await screen.findByText('No decisions recorded yet.'); + fireEvent.click(screen.getByText('Rules (0)')); + + fireEvent.click(await screen.findByText('Add rule')); + expect(await screen.findByText('New rule')).toBeInTheDocument(); + // The default auto-answer needs reply text, so Save is blocked until provided. + expect(screen.getByText('An auto-answer rule needs reply text.')).toBeInTheDocument(); + }); + + it('renders a high-risk escalation decision', async () => { + vi.mocked(window.maestro.pianola.getDecisions).mockResolvedValue([ + { + id: 'd1', + timestamp: '2026-01-01T00:00:00.000Z', + tabId: 'tab-1', + agentId: 'agent-1', + classification: { + kind: 'question', + risk: 'high', + topic: 'deploy to production?', + confidence: 'high', + evidence: { messageId: 'm1', reason: 'high-risk', structured: false }, + }, + decision: { action: 'escalate', matchedRuleId: null, reason: 'high risk always escalates' }, + dispatched: false, + dryRun: false, + }, + ]); + + render(); + expect(await screen.findByText('deploy to production?')).toBeInTheDocument(); + expect(screen.getByText('high risk always escalates')).toBeInTheDocument(); + expect(screen.getAllByText('Escalated').length).toBeGreaterThan(0); + }); +}); diff --git a/src/__tests__/setup.ts b/src/__tests__/setup.ts index 357086b4e8..0fcfe72224 100644 --- a/src/__tests__/setup.ts +++ b/src/__tests__/setup.ts @@ -618,6 +618,12 @@ const mockMaestro = { validateYaml: vi.fn().mockResolvedValue({ valid: true, errors: [] }), onActivityUpdate: vi.fn().mockReturnValue(() => {}), }, + // Pianola API (autonomous manager: rules + decision log) + pianola: { + getRules: vi.fn().mockResolvedValue([]), + saveRules: vi.fn().mockImplementation((rules: unknown) => Promise.resolve(rules)), + getDecisions: vi.fn().mockResolvedValue([]), + }, // Core Prompts API (disk-based prompts loaded at runtime) prompts: { get: vi.fn().mockResolvedValue({ success: true, content: '' }), diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index 602c413f82..525e82dad6 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -350,6 +350,8 @@ function MaestroConsoleInner() { setDirectorNotesOpen, // Maestro Cue Modal — cueModalOpen now self-sourced in AppStandaloneModals setCueModalOpen, + // Pianola Modal — pianolaModalOpen now self-sourced in AppStandaloneModals + setPianolaModalOpen, // Maestro Cue YAML Editor — open state, sessionId, projectRoot self-sourced in AppStandaloneModals closeCueYamlEditor, } = useModalActions(); @@ -489,6 +491,10 @@ function MaestroConsoleInner() { } }, [encoreFeatures.maestroCue, setCueModalOpen, closeCueYamlEditor]); + useEffect(() => { + if (!encoreFeatures.pianola) setPianolaModalOpen(false); + }, [encoreFeatures.pianola, setPianolaModalOpen]); + // --- KEYBOARD SHORTCUT HELPERS --- const { isShortcut, isTabShortcut } = useKeyboardShortcutHelpers({ shortcuts, @@ -3144,6 +3150,7 @@ function MaestroConsoleInner() { encoreFeatures.directorNotes ? () => setDirectorNotesOpen(true) : undefined } onOpenMaestroCue={encoreFeatures.maestroCue ? () => setCueModalOpen(true) : undefined} + onOpenPianola={encoreFeatures.pianola ? () => setPianolaModalOpen(true) : undefined} onConfigureCue={encoreFeatures.maestroCue ? handleConfigureCue : undefined} onCloseTabSwitcher={handleCloseTabSwitcher} onTabSelect={handleUtilityTabSelect} diff --git a/src/renderer/components/AppModals/AppModals.tsx b/src/renderer/components/AppModals/AppModals.tsx index ee5223df07..1a7157de54 100644 --- a/src/renderer/components/AppModals/AppModals.tsx +++ b/src/renderer/components/AppModals/AppModals.tsx @@ -309,6 +309,8 @@ export interface AppModalsProps { onOpenDirectorNotes?: () => void; // Maestro Cue onOpenMaestroCue?: () => void; + // Pianola + onOpenPianola?: () => void; onConfigureCue?: (session: Session) => void; onCloseTabSwitcher: () => void; onTabSelect: (tabId: string) => void; @@ -770,6 +772,8 @@ export const AppModals = memo(function AppModals(props: AppModalsProps) { onOpenDirectorNotes, // Maestro Cue onOpenMaestroCue, + // Pianola + onOpenPianola, onConfigureCue, onCloseTabSwitcher, onTabSelect, @@ -1096,6 +1100,7 @@ export const AppModals = memo(function AppModals(props: AppModalsProps) { onOpenSymphony={onOpenSymphony} onOpenDirectorNotes={onOpenDirectorNotes} onOpenMaestroCue={onOpenMaestroCue} + onOpenPianola={onOpenPianola} onConfigureCue={onConfigureCue} lightboxImage={lightboxImage} lightboxImages={lightboxImages} diff --git a/src/renderer/components/AppModals/AppUtilityModals.tsx b/src/renderer/components/AppModals/AppUtilityModals.tsx index 39773938e8..25fd7e5583 100644 --- a/src/renderer/components/AppModals/AppUtilityModals.tsx +++ b/src/renderer/components/AppModals/AppUtilityModals.tsx @@ -168,6 +168,8 @@ export interface AppUtilityModalsProps { // Maestro Cue onOpenMaestroCue?: () => void; + // Pianola + onOpenPianola?: () => void; onConfigureCue?: (session: Session) => void; // LightboxModal @@ -404,6 +406,8 @@ export const AppUtilityModals = memo(function AppUtilityModals({ onOpenDirectorNotes, // Maestro Cue onOpenMaestroCue, + // Pianola + onOpenPianola, onConfigureCue, // LightboxModal lightboxImage, @@ -598,6 +602,7 @@ export const AppUtilityModals = memo(function AppUtilityModals({ onOpenSymphony={onOpenSymphony} onOpenDirectorNotes={onOpenDirectorNotes} onOpenMaestroCue={onOpenMaestroCue} + onOpenPianola={onOpenPianola} onConfigureCue={onConfigureCue} onOpenQueueBrowser={onOpenQueueBrowser} onNewTab={onQuickActionsNewTab} diff --git a/src/renderer/components/AppStandaloneModals.tsx b/src/renderer/components/AppStandaloneModals.tsx index fd33458398..d17015609a 100644 --- a/src/renderer/components/AppStandaloneModals.tsx +++ b/src/renderer/components/AppStandaloneModals.tsx @@ -60,6 +60,9 @@ const CueModal = lazy(() => import('./CueModal').then((m) => ({ default: m.CueMo const CueYamlEditor = lazy(() => import('./CueYamlEditor').then((m) => ({ default: m.CueYamlEditor })) ); +const PianolaModal = lazy(() => + import('./PianolaModal').then((m) => ({ default: m.PianolaModal })) +); /** * Props for the AppStandaloneModals component. @@ -240,6 +243,8 @@ function AppStandaloneModalsInner({ setDirectorNotesOpen, cueModalOpen, setCueModalOpen, + pianolaModalOpen, + setPianolaModalOpen, cueYamlEditorOpen, cueYamlEditorSessionId, cueYamlEditorProjectRoot, @@ -394,6 +399,13 @@ function AppStandaloneModalsInner({ )} + {/* --- PIANOLA MODAL (lazy-loaded, Encore Feature) --- */} + {encoreFeatures.pianola && pianolaModalOpen && ( + + setPianolaModalOpen(false)} /> + + )} + {/* --- MAESTRO CUE YAML EDITOR (standalone, lazy-loaded) --- */} {encoreFeatures.maestroCue && cueYamlEditorOpen && diff --git a/src/renderer/components/PianolaModal/PianolaModal.tsx b/src/renderer/components/PianolaModal/PianolaModal.tsx new file mode 100644 index 0000000000..ce45b234fd --- /dev/null +++ b/src/renderer/components/PianolaModal/PianolaModal.tsx @@ -0,0 +1,589 @@ +import { useCallback, useEffect, useMemo, useState } from 'react'; +import { createPortal } from 'react-dom'; +import { + X, + Music, + ShieldAlert, + Send, + Ban, + Plus, + Trash2, + Pencil, + RefreshCw, + AlertTriangle, +} from 'lucide-react'; +import type { Theme } from '../../types'; +import type { + PianolaRule, + PianolaRuleScope, + PianolaSignalKind, + PianolaActionKind, + PianolaRisk, +} from '../../../shared/pianola/types'; +import type { PianolaDecisionRecord } from '../../../shared/pianola/storage'; +import { useModalLayer } from '../../hooks/ui/useModalLayer'; +import { MODAL_PRIORITIES } from '../../constants/modalPriorities'; +import { generateId } from '../../utils/ids'; +import { notifyToast } from '../../stores/notificationStore'; +import { logger } from '../../utils/logger'; +import { RuleEditor } from './RuleEditor'; + +export interface PianolaModalProps { + theme: Theme; + onClose: () => void; +} + +type PianolaTab = 'decisions' | 'rules'; +type DecisionFilter = 'all' | 'escalate' | 'auto_answer'; + +const ACTION_META: Record = + { + auto_answer: { label: 'Auto-answered', color: '#22c55e', Icon: Send }, + escalate: { label: 'Escalated', color: '#f59e0b', Icon: ShieldAlert }, + ignore: { label: 'Ignored', color: '#94a3b8', Icon: Ban }, + }; + +const RISK_COLOR: Record = { + low: '#22c55e', + medium: '#f59e0b', + high: '#ef4444', +}; + +/** One-line, human-readable summary of a rule's match conditions. */ +export function describeRuleMatch(rule: PianolaRule): string { + const parts: string[] = []; + if (rule.match.maxRisk) parts.push(`risk <= ${rule.match.maxRisk}`); + if (rule.match.kinds && rule.match.kinds.length > 0) + parts.push(`kind: ${rule.match.kinds.join(', ')}`); + if (rule.match.topicIncludes && rule.match.topicIncludes.length > 0) + parts.push(`topic ~ ${rule.match.topicIncludes.join(' / ')}`); + return parts.length > 0 ? parts.join(' - ') : 'any prompt'; +} + +function scopeLabel(rule: PianolaRule): string { + if (rule.scope === 'global') return 'global'; + if (rule.scope === 'project') return `project: ${rule.scopeId ?? '(unset)'}`; + return `tab: ${rule.scopeId ?? '(unset)'}`; +} + +/** + * Pianola dashboard. Two tabs: the decision audit log (what Pianola did and what + * it escalated) and the editable auto-answer rules. Reads and writes the same + * files the CLI watcher uses, via the gated `window.maestro.pianola` bridge. + */ +export function PianolaModal({ theme, onClose }: PianolaModalProps) { + useModalLayer(MODAL_PRIORITIES.PIANOLA_MODAL, 'Pianola', onClose); + + const [tab, setTab] = useState('decisions'); + const [rules, setRules] = useState([]); + const [decisions, setDecisions] = useState([]); + const [filter, setFilter] = useState('all'); + const [loading, setLoading] = useState(true); + const [editing, setEditing] = useState(null); + const [creating, setCreating] = useState(false); + + const load = useCallback(async () => { + setLoading(true); + try { + const [loadedRules, loadedDecisions] = await Promise.all([ + window.maestro.pianola.getRules(), + window.maestro.pianola.getDecisions(500), + ]); + setRules(loadedRules); + setDecisions(loadedDecisions); + } catch (error) { + logger.error('[Pianola] Failed to load', undefined, error); + notifyToast({ + color: 'red', + title: 'Pianola', + message: 'Could not load rules and decisions.', + }); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { + void load(); + }, [load]); + + const persistRules = useCallback( + async (next: PianolaRule[]) => { + try { + const saved = await window.maestro.pianola.saveRules(next); + setRules(saved); + return true; + } catch (error) { + logger.error('[Pianola] Failed to save rules', undefined, error); + notifyToast({ color: 'red', title: 'Pianola', message: 'Could not save rules.' }); + void load(); + return false; + } + }, + [load] + ); + + const handleToggleRule = useCallback( + (id: string) => { + const next = rules.map((r) => + r.id === id ? { ...r, enabled: !r.enabled, updatedAt: Date.now() } : r + ); + void persistRules(next); + }, + [rules, persistRules] + ); + + const handleDeleteRule = useCallback( + (id: string) => { + void persistRules(rules.filter((r) => r.id !== id)); + }, + [rules, persistRules] + ); + + const handleSaveRule = useCallback( + async (rule: PianolaRule) => { + const exists = rules.some((r) => r.id === rule.id); + const next = exists ? rules.map((r) => (r.id === rule.id ? rule : r)) : [...rules, rule]; + const ok = await persistRules(next); + if (ok) { + setEditing(null); + setCreating(false); + } + }, + [rules, persistRules] + ); + + const sortedRules = useMemo( + () => [...rules].sort((a, b) => a.priority - b.priority || a.createdAt - b.createdAt), + [rules] + ); + + const filteredDecisions = useMemo(() => { + const view = + filter === 'all' ? decisions : decisions.filter((d) => d.decision.action === filter); + // Most recent first for reading. + return [...view].reverse(); + }, [decisions, filter]); + + const escalationCount = useMemo( + () => decisions.filter((d) => d.decision.action === 'escalate').length, + [decisions] + ); + + return createPortal( +
{ + if (e.target === e.currentTarget) onClose(); + }} + > +
+ +
+ {/* Header */} +
+
+ +

+ Pianola +

+ + autonomous manager + +
+
+ + +
+
+ + {/* Tabs */} +
+ {( + [ + ['decisions', `Decisions${escalationCount ? ` (${escalationCount} escalated)` : ''}`], + ['rules', `Rules (${rules.length})`], + ] as const + ).map(([id, label]) => ( + + ))} +
+ + {/* Body */} +
+ {tab === 'decisions' ? ( + + ) : ( + setCreating(true)} + onEdit={setEditing} + onToggle={handleToggleRule} + onDelete={handleDeleteRule} + /> + )} +
+
+ + {/* Rule editor (create or edit) */} + {(creating || editing) && ( + { + setEditing(null); + setCreating(false); + }} + onSave={handleSaveRule} + /> + )} +
, + document.body + ); +} + +interface DecisionsViewProps { + theme: Theme; + decisions: PianolaDecisionRecord[]; + filter: DecisionFilter; + onFilterChange: (f: DecisionFilter) => void; + loading: boolean; +} + +function DecisionsView({ theme, decisions, filter, onFilterChange, loading }: DecisionsViewProps) { + return ( +
+
+ {( + [ + ['all', 'All'], + ['escalate', 'Escalations'], + ['auto_answer', 'Auto-answered'], + ] as const + ).map(([id, label]) => ( + + ))} +
+ + {decisions.length === 0 ? ( +
+ {loading ? 'Loading...' : 'No decisions recorded yet.'} +
+ ) : ( +
+ {decisions.map((d) => { + const meta = ACTION_META[d.decision.action]; + return ( +
+
+
+ + + {meta.label} + + + {d.classification.kind} / {d.classification.risk} + + {d.dryRun && ( + + dry-run + + )} +
+ + {new Date(d.timestamp).toLocaleString()} + +
+ + {d.classification.topic && ( +
+ {d.classification.topic} +
+ )} + +
+ {d.decision.reason} + {d.decision.action === 'auto_answer' && ( + <> + {' '} + → replied:{' '} + + “{d.decision.answer}” + + + )} +
+ +
+ agent: {d.agentId} + tab: {d.tabId} + {d.decision.action === 'auto_answer' && ( + + {d.dispatched ? 'sent' : 'not sent'} + + )} +
+ + {d.error && ( +
+ + {d.error} +
+ )} +
+ ); + })} +
+ )} +
+ ); +} + +interface RulesViewProps { + theme: Theme; + rules: PianolaRule[]; + loading: boolean; + onAdd: () => void; + onEdit: (rule: PianolaRule) => void; + onToggle: (id: string) => void; + onDelete: (id: string) => void; +} + +function RulesView({ theme, rules, loading, onAdd, onEdit, onToggle, onDelete }: RulesViewProps) { + return ( +
+
+

+ Rules let Pianola auto-answer low-risk prompts. High-risk prompts always escalate to you, + no matter the rules. An auto-answer rule needs a narrowing condition (a kind or topic) and + reply text. +

+ +
+ + {rules.length === 0 ? ( +
+ {loading ? 'Loading...' : 'No rules yet. Without rules, Pianola escalates everything.'} +
+ ) : ( +
+ {rules.map((rule) => { + const meta = ACTION_META[rule.action]; + return ( +
+ {/* Enable toggle */} + + +
+
+ + + {meta.label} + + + {scopeLabel(rule)} + + + priority {rule.priority} + +
+
+ when {describeRuleMatch(rule)} +
+ {rule.action === 'auto_answer' && rule.answer && ( +
+ reply: “{rule.answer}” +
+ )} + {rule.description && ( +
+ {rule.description} +
+ )} +
+ +
+ + +
+
+ ); + })} +
+ )} +
+ ); +} + +/** Factory for a blank rule, used when creating. */ +export function newBlankRule(): PianolaRule { + const now = Date.now(); + return { + id: generateId(), + enabled: true, + scope: 'global', + match: { maxRisk: 'low', kinds: ['question'] }, + action: 'auto_answer', + answer: '', + priority: 100, + createdAt: now, + updatedAt: now, + }; +} + +// Re-export the option constants so RuleEditor and tests share one source. +export const RULE_SCOPES: readonly PianolaRuleScope[] = ['global', 'project', 'tab']; +export const RULE_ACTIONS: readonly PianolaActionKind[] = ['auto_answer', 'escalate', 'ignore']; +export const RULE_RISKS: readonly PianolaRisk[] = ['low', 'medium', 'high']; +export const RULE_KINDS: readonly PianolaSignalKind[] = ['question', 'blocked']; diff --git a/src/renderer/components/PianolaModal/RuleEditor.tsx b/src/renderer/components/PianolaModal/RuleEditor.tsx new file mode 100644 index 0000000000..9d19abbea0 --- /dev/null +++ b/src/renderer/components/PianolaModal/RuleEditor.tsx @@ -0,0 +1,370 @@ +import { useMemo, useState } from 'react'; +import { createPortal } from 'react-dom'; +import { X } from 'lucide-react'; +import type { Theme } from '../../types'; +import type { + PianolaRule, + PianolaRuleScope, + PianolaSignalKind, + PianolaActionKind, + PianolaRisk, +} from '../../../shared/pianola/types'; +import { useModalLayer } from '../../hooks/ui/useModalLayer'; +import { MODAL_PRIORITIES } from '../../constants/modalPriorities'; +import { RULE_SCOPES, RULE_ACTIONS, RULE_RISKS, RULE_KINDS, newBlankRule } from './PianolaModal'; + +export interface RuleEditorProps { + theme: Theme; + /** The rule to edit, or null to create a new one. */ + rule: PianolaRule | null; + onCancel: () => void; + onSave: (rule: PianolaRule) => void; +} + +/** Editable working copy: arrays become comma strings for text inputs. */ +interface Draft { + enabled: boolean; + scope: PianolaRuleScope; + scopeId: string; + maxRisk: PianolaRisk | ''; + kinds: PianolaSignalKind[]; + topicIncludes: string; + action: PianolaActionKind; + answer: string; + priority: number; + description: string; +} + +function toDraft(rule: PianolaRule): Draft { + return { + enabled: rule.enabled, + scope: rule.scope, + scopeId: rule.scopeId ?? '', + maxRisk: rule.match.maxRisk ?? '', + kinds: rule.match.kinds ?? [], + topicIncludes: (rule.match.topicIncludes ?? []).join(', '), + action: rule.action, + answer: rule.answer ?? '', + priority: rule.priority, + description: rule.description ?? '', + }; +} + +/** + * Modal form for creating or editing one Pianola rule. Layered above the Pianola + * dashboard. Mirrors the shared policy's safety contract in the UI: an + * auto-answer rule must have a narrowing condition (a kind or topic) and a reply, + * since the policy refuses to auto-answer an unconstrained rule. + */ +export function RuleEditor({ theme, rule, onCancel, onSave }: RuleEditorProps) { + useModalLayer(MODAL_PRIORITIES.PIANOLA_RULE_EDITOR, 'Pianola Rule Editor', onCancel); + + const base = useMemo(() => rule ?? newBlankRule(), [rule]); + const [draft, setDraft] = useState(() => toDraft(base)); + + const isAutoAnswer = draft.action === 'auto_answer'; + const hasNarrowing = + draft.kinds.length > 0 || + draft.topicIncludes + .split(',') + .map((s) => s.trim()) + .filter(Boolean).length > 0; + const answerTrimmed = draft.answer.trim(); + + const validationError = useMemo(() => { + if (draft.scope !== 'global' && draft.scopeId.trim().length === 0) { + return `A ${draft.scope} rule needs a ${draft.scope === 'project' ? 'project path' : 'tab id'}.`; + } + if (isAutoAnswer && !hasNarrowing) { + return 'An auto-answer rule needs a narrowing condition: pick a kind or add a topic.'; + } + if (isAutoAnswer && answerTrimmed.length === 0) { + return 'An auto-answer rule needs reply text.'; + } + return null; + }, [draft.scope, draft.scopeId, isAutoAnswer, hasNarrowing, answerTrimmed]); + + const handleSave = () => { + if (validationError) return; + const topicIncludes = draft.topicIncludes + .split(',') + .map((s) => s.trim()) + .filter(Boolean); + const match: PianolaRule['match'] = {}; + if (draft.maxRisk) match.maxRisk = draft.maxRisk; + if (draft.kinds.length > 0) match.kinds = draft.kinds; + if (topicIncludes.length > 0) match.topicIncludes = topicIncludes; + + const next: PianolaRule = { + id: base.id, + enabled: draft.enabled, + scope: draft.scope, + match, + action: draft.action, + priority: Number.isFinite(draft.priority) ? draft.priority : 100, + createdAt: base.createdAt, + updatedAt: Date.now(), + }; + if (draft.scope !== 'global' && draft.scopeId.trim()) next.scopeId = draft.scopeId.trim(); + if (isAutoAnswer && answerTrimmed) next.answer = answerTrimmed; + if (draft.description.trim()) next.description = draft.description.trim(); + + onSave(next); + }; + + const labelStyle = { color: theme.colors.textDim }; + const inputStyle = { + backgroundColor: theme.colors.bgMain, + borderColor: theme.colors.border, + color: theme.colors.textMain, + }; + + return createPortal( +
{ + if (e.target === e.currentTarget) onCancel(); + }} + > +
+ +
+
+

+ {rule ? 'Edit rule' : 'New rule'} +

+ +
+ +
+ {/* Action */} +
+ +
+ {RULE_ACTIONS.map((a) => ( + + ))} +
+
+ + {/* Scope */} +
+
+ + +
+ {draft.scope !== 'global' && ( +
+ + setDraft((d) => ({ ...d, scopeId: e.target.value }))} + className="w-full px-2 py-1.5 text-sm rounded-md border" + style={inputStyle} + placeholder={draft.scope === 'project' ? '/path/to/project' : 'tab-id'} + /> +
+ )} +
+ + {/* Match: maxRisk + priority */} +
+
+ + +
+
+ + setDraft((d) => ({ ...d, priority: Number(e.target.value) }))} + className="w-full px-2 py-1.5 text-sm rounded-md border" + style={inputStyle} + /> +
+
+ + {/* Match: kinds */} +
+ +
+ {RULE_KINDS.map((k) => { + const active = draft.kinds.includes(k); + return ( + + ); + })} +
+
+ + {/* Match: topicIncludes */} +
+ + setDraft((d) => ({ ...d, topicIncludes: e.target.value }))} + className="w-full px-2 py-1.5 text-sm rounded-md border" + style={inputStyle} + placeholder="naming, formatting" + /> +
+ + {/* Answer (auto_answer only) */} + {isAutoAnswer && ( +
+ +