From d4efebdc3515989fe7549342f5a18a607c87278d Mon Sep 17 00:00:00 2001 From: Val Alexander Date: Fri, 19 Jun 2026 22:03:31 -0500 Subject: [PATCH 1/6] feat: operationalize coven-github hosted MVP --- DESIGN.md | 98 +++++++++++++ FAMILIAR-CONTRACT.md | 57 ++++++++ HOSTED.md | 76 ++++++++++ README.md | 25 +++- ROADMAP.md | 74 ++++++++++ crates/github/src/lib.rs | 12 ++ crates/webhook/src/events.rs | 68 ++++++++- crates/webhook/src/routes.rs | 146 ++++++++++++++++++- crates/worker/src/lib.rs | 87 ++++++++++- docs/container-isolation.md | 63 ++++++++ docs/hosted-mvp-plan.md | 103 +++++++++++++ docs/hosted-vs-self-hosted.md | 91 ++++++++++++ docs/security.md | 82 +++++++++++ docs/self-hosting.md | 56 ++++++- examples/familiar-github-starter/README.md | 13 ++ examples/familiar-github-starter/config.toml | 34 +++++ 16 files changed, 1072 insertions(+), 13 deletions(-) create mode 100644 DESIGN.md create mode 100644 FAMILIAR-CONTRACT.md create mode 100644 HOSTED.md create mode 100644 ROADMAP.md create mode 100644 docs/container-isolation.md create mode 100644 docs/hosted-mvp-plan.md create mode 100644 docs/hosted-vs-self-hosted.md create mode 100644 docs/security.md create mode 100644 examples/familiar-github-starter/README.md create mode 100644 examples/familiar-github-starter/config.toml diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..17f01b0 --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,98 @@ +# coven-github Design + +`coven-github` is a thin GitHub ingress layer for trusted familiar work. It should not become a generic agent platform inside the GitHub App. The GitHub App accepts repository events, routes them to the right familiar, records task state, and keeps humans in control through Cave oversight. + +## Design Goal + +Assign GitHub work to a known familiar and get a draft PR back with visible context, evidence, and an oversight path. + +The core design constraint is trust continuity: + +- The same familiar can return to the same repository with memory and team context. +- The team can see what the familiar used, changed, tested, and could not decide. +- The service can fail transparently without losing task state. +- The open-source adapter remains self-hostable so buyers can inspect the trust boundary. + +## Task Flow + +```text +GitHub event + -> webhook HMAC validation + -> event parsing and familiar routing + -> task record and queue + -> worker starts isolated session + -> coven-code --headless receives session brief + -> familiar drafts changes and result envelope + -> GitHub Check Run and PR are updated + -> Cave Board and session link expose oversight +``` + +## Routing Model + +The initial self-hosted adapter uses TOML familiar config: + +- `bot_username` routes issue assignment and mentions. +- `trigger_labels` route issue labels such as `coven:fix`. +- `skills` and `model` shape the familiar runtime. + +Hosted routing should move this into installation-scoped configuration: + +- installation id, +- organization, +- repository, +- familiar id, +- allowed trigger labels, +- memory scope, +- skill pack, +- model route, +- autonomy tier. + +## Cave Oversight Gate + +Cave oversight is the control plane. It should show: + +- task status and terminal state, +- familiar identity, +- repository and issue/PR links, +- Check Run link, +- session link, +- context and memory scope used, +- evidence collected, +- human decision points. + +Draft PRs are the default. The team should promote more autonomy only after the familiar earns trust through repeated visible work. + +## Trust Boundaries + +| Boundary | Rule | +|---|---| +| Webhook ingress | Validate HMAC before parsing or routing. | +| Tenant data | Scope task state by GitHub installation before hosted launch. | +| Worker execution | Run each task with a timeout and isolated workspace. | +| Git auth | Use installation tokens, not user credentials. | +| Memory | Make familiar memory opt-in, inspectable, and revocable. | +| Comments | Ignore familiar bot self-comments to avoid loops. | +| Output | Prefer draft PRs and explicit failure states. | + +## Hosted Reliability Requirements + +Hosted `coven-github` needs these before paid beta: + +1. Durable queue. +2. Persistent task store. +3. GitHub delivery idempotency. +4. Tenant-scoped task API auth. +5. Installation-scoped familiar routing. +6. Worker isolation and timeout enforcement. +7. Audit events for accepted, started, retried, timed out, failed, needs input, PR opened, and completed. + +## Why This Design Monetizes + +The paid product is not "an agent that can write code." The paid product is a managed trust pipeline: + +- Teams keep familiar context instead of re-explaining their standards. +- Managers get visibility instead of hidden automation. +- Security reviewers get a self-host path and a clear credential boundary. +- Engineers get PRs from a known actor with a history. + +That is what generic GitHub bots and one-shot coding agents do not provide. diff --git a/FAMILIAR-CONTRACT.md b/FAMILIAR-CONTRACT.md new file mode 100644 index 0000000..c8069c7 --- /dev/null +++ b/FAMILIAR-CONTRACT.md @@ -0,0 +1,57 @@ +# Familiar Contract for GitHub Work + +`coven-github` is not valuable because it can call the GitHub API. The value is that a team can deploy a familiar: a known, persistent, context-aware operator that understands the repo, the team's standards, and when to stop for human judgment. + +The GitHub App should make that trust visible. + +## Promise + +A GitHub familiar should: + +- Carry persistent identity across issues, PRs, repositories, and review cycles. +- Use the team's configured model, skills, memory, and operating rules. +- Explain what it changed and why in the familiar's voice. +- Prefer draft PRs and Cave oversight for non-trivial work. +- Treat ambiguity as a reason to ask, not a reason to guess. +- Preserve repo hygiene: small branches, tested changes, clear failure states. +- Make recovery easy when the task cannot be completed. + +## Behavioral Guarantees + +| Guarantee | Product behavior | +|---|---| +| Context continuity | The familiar can use organization/repo memory and prior task history when enabled. | +| Team fit | Routing and skills are configured per installation, repository, and familiar. | +| Human control | Cave oversight links appear in Check Runs, comments, and task state. | +| Failure transparency | Every task ends in a visible state: review, done, needs input, failed, or timed out. | +| Minimal surprise | Familiars open draft PRs by default until the team explicitly promotes automation. | +| No self-trigger loops | Bot-authored comments do not retrigger the same familiar. | +| Bounded execution | Worker timeout, retry, and isolation rules are enforced. | + +## Why This Beats Generic Agents + +Generic coding agents optimize for a single task. Familiars optimize for an ongoing working relationship. + +That matters most in PR clearing: + +- A generic agent can satisfy a prompt; a familiar can remember the team's release posture. +- A generic agent can run tests; a familiar can know which tests are trusted signal. +- A generic agent can make edits; a familiar can know when the change needs a design note, a migration path, or a human decision. +- A generic agent can produce output; a familiar can build trust over repeated work. + +## Operational Requirements + +To make the familiar promise real, hosted `coven-github` needs: + +1. Tenant-scoped familiar routing. +2. Durable task history and event idempotency. +3. Cave oversight as the default review surface. +4. Familiar memory boundaries that are opt-in, inspectable, and revocable. +5. Audit logs for task acceptance, execution, retries, timeout, PR creation, and human intervention. +6. Clear tier limits so teams know when a familiar is operating as a draft helper versus an autonomous maintainer. + +## Launch Rule + +Do not sell "autonomous code changes" first. Sell "a trusted familiar that drafts PRs under your team's control." + +Autonomy can expand after the service proves reliability through visible oversight, repeatable failure handling, and team-specific context. diff --git a/HOSTED.md b/HOSTED.md new file mode 100644 index 0000000..1a0bd40 --- /dev/null +++ b/HOSTED.md @@ -0,0 +1,76 @@ +# Hosted OpenCoven for GitHub + +Hosted OpenCoven is the managed version of `coven-github`: install the GitHub App, configure a familiar, assign an issue or label, and get a draft PR back with Cave oversight. + +The hosted tier should monetize managed reliability and familiar continuity, while the open-source adapter remains self-hostable for trust and inspection. + +## What Hosted Adds + +| Capability | Self-hosted adapter | Hosted OpenCoven | +|---|---|---| +| GitHub App ingress | You run it | Managed | +| Queue | In-process/dev path until configured | Durable queue | +| Task state | Local/in-memory unless extended | Persistent history | +| Worker isolation | Operator-managed | Managed worker pool | +| Familiar routing | Static config | Installation/repo scoped | +| Familiar memory | Local/operator-managed | Optional cloud memory | +| Cave oversight | Local Cave | Hosted-ready oversight links and dashboard | +| Usage limits | Operator-managed | Tiered limits and audit logs | +| Support | Community | Priority by tier | + +## Packaging + +| Tier | Buyer | Initial shape | +|---|---|---| +| Open / Self-host | OSS maintainers, security reviewers, local-first users | Free adapter, BYOM, one familiar, community support. | +| Hosted Starter | Small teams with backlog | Managed queue, one familiar, task caps, Cave oversight links. | +| Hosted Team | Product/platform teams | Multi-familiar routing, audit log, usage controls, priority queue, team memory. | +| Hosted Dedicated | Security-sensitive orgs | Dedicated workers, stronger retention controls, custom limits, SLA, onboarding support. | + +Launch with flat monthly tiers and task caps. Avoid pure usage billing until task duration and model-cost distribution are known. + +## Buyer Promise + +> Your familiar on your GitHub: the one that already knows your code, your team, and how you ship. + +The strongest buyer promise is trust continuity. A familiar should know the diff between "works" and "good enough for this repo." Hosted makes that reliable without making the customer operate workers, queues, or task history. + +## Data Boundaries + +Hosted should make these boundaries explicit before beta: + +- GitHub installation tokens are scoped to the installed repositories. +- User GitHub credentials are not used for worker pushes. +- Familiar memory is opt-in and scoped by installation/repository policy. +- Task history records metadata, status, evidence, links, and summaries. +- Raw repository workspaces are temporary and destroyed after the task. +- Secrets are redacted from logs and task output. + +## Beta Gate + +Hosted beta should wait for: + +1. Persistent task store. +2. Durable queue. +3. Tenant-scoped task API authentication. +4. Worker isolation with cleanup and timeouts. +5. Usage metering by installation, repo, familiar, and task. +6. Cave oversight dashboard for task history and human intervention. + +## Landing Page Copy + +Headline: + +> Assign it like a teammate. Get a PR back. + +Support copy: + +> OpenCoven lets your team deploy a trusted familiar to GitHub. It knows your repo context, follows your skills and review norms, drafts PRs under Cave oversight, and gets better as it works with your team. + +Primary CTA: + +- Join hosted beta + +Secondary CTA: + +- Self-host the adapter diff --git a/README.md b/README.md index 397db50..2e75a4a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ Every existing GitHub coding agent is a black box: GitHub's model, GitHub's cont `coven-github` flips that. Your familiar is yours: your model, your skills, your memory, your voice in the PR body. The GitHub App is just the ingress layer. +That is the product wedge: assign it like a teammate, get a PR back, and keep Cave oversight in the loop. A familiar should know the difference between "technically works" and "good enough for this repo, this team, and this moment." + +See [Design](DESIGN.md), [Hosted OpenCoven](HOSTED.md), [Familiar Contract](FAMILIAR-CONTRACT.md), [Roadmap](ROADMAP.md), and [Hosted vs self-hosted](docs/hosted-vs-self-hosted.md) for the operational plan. + --- ## Architecture @@ -72,7 +76,22 @@ CovenCave oversight UI ← watch session live, intervene, steer ## Status -🚧 **In development.** See [COVEN-GITHUB.md](COVEN-GITHUB.md) for the full product spec. +🚧 **In development.** The repo has the first GitHub App adapter path wired, but hosted production readiness is still being built. See [COVEN-GITHUB.md](COVEN-GITHUB.md) for the roadmap-level product spec. + +| Capability | Status | Notes | +|---|---|---| +| Webhook HMAC validation | Implemented | Rejects unsigned or invalid GitHub webhook payloads. | +| Issue assignment trigger | Implemented | Routes matching bot assignees to configured familiars. | +| Label trigger | Implemented | Routes configured `trigger_labels` such as `coven:fix`. | +| Issue / PR mention trigger | Implemented | Ignores familiar bot self-comments to avoid loops. | +| GitHub App installation tokens | Implemented | Mints installation access tokens from the App private key. | +| Check Run creation and completion | Partial | Creates and updates Check Runs; branch/SHA resolution still needs production hardening. | +| `coven-code --headless` execution | Partial | Worker spawns headless sessions and enforces task timeouts; result quality depends on the runtime. | +| Pull request creation | Partial | Opens draft PRs from session results; base branch is still hardcoded to `main`. | +| CovenCave task polling | Partial | In-memory task API exists for local oversight; hosted control-plane auth and persistence are planned. | +| Durable queue / task store | Planned | Required for hosted reliability and restarts. | +| Hosted tier | Planned | See [Hosted vs self-hosted](docs/hosted-vs-self-hosted.md). | +| Familiar trust contract | Planned | See [Familiar Contract](FAMILIAR-CONTRACT.md). | --- @@ -91,7 +110,7 @@ cp config/example.toml config/local.toml\n# Set: github_app_id, private_key_path ./target/release/coven-github serve --config config/local.toml ``` -See [docs/self-hosting.md](docs/self-hosting.md) for full setup including GitHub App registration. +See [docs/self-hosting.md](docs/self-hosting.md) for full setup including GitHub App registration. For a minimal familiar route, start from [`examples/familiar-github-starter`](examples/familiar-github-starter/). --- @@ -99,7 +118,7 @@ See [docs/self-hosting.md](docs/self-hosting.md) for full setup including GitHub `coven-github` is open source and self-hostable. OpenCoven offers a **hosted tier** for organizations that want managed infra, cloud familiar memory, and multi-familiar routing without running their own workers. -See [opencoven.ai/github](https://opencoven.ai/github) for hosted tier details. +See [Hosted OpenCoven](HOSTED.md) and [Hosted vs self-hosted](docs/hosted-vs-self-hosted.md) for the service shape, security boundaries, and buyer packaging. --- diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..842193f --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,74 @@ +# coven-github Roadmap + +This roadmap operationalizes `coven-github` as a hosted service funded by teams that want trusted familiar-driven GitHub work. + +## Strategic Thesis + +The market already has generic coding agents. OpenCoven's advantage is the familiar: + +- A known teammate, not a disposable bot. +- Context-aware across repos, issues, reviews, and team norms. +- Governed by skills, memory, and a visible trust contract. +- Watched and steered through Cave instead of hidden in a black box. + +The hosted product should monetize managed reliability and trust continuity: durable task infrastructure, worker isolation, auditability, familiar memory, and multi-familiar routing. + +## Milestone 1: Honest Self-Hosted Adapter + +Goal: a motivated user can run the adapter and understand exactly what works. + +- Implement issue assignment, label, issue mention, and PR review comment triggers. +- Enforce webhook HMAC validation and bot self-comment suppression. +- Enforce worker timeout behavior. +- Keep README status honest about implemented, partial, and planned capabilities. +- Publish security, isolation, self-hosting, hosted-vs-self-hosted, and familiar contract docs. + +## Milestone 2: Hosted Control Plane + +Goal: support real hosted installations without losing task state or leaking tenant context. + +- Persistent task store. +- Durable queue. +- GitHub delivery idempotency. +- Installation-scoped familiar routing. +- Tenant-scoped task API auth for Cave. +- Task audit log and terminal states. + +## Milestone 3: GitHub Correctness + +Goal: make the GitHub App reliable across normal repositories. + +- Resolve repository default branch through the GitHub API. +- Resolve Check Run head SHA instead of using placeholders. +- Use the repo default branch for PR base and session brief. +- Capture review-comment diff hunk context. +- Add transient GitHub API retry classification. +- Add webhook fixture tests for all supported triggers. + +## Milestone 4: Hosted Worker Fleet + +Goal: make familiar execution safe enough to charge for. + +- Containerized worker backend. +- CPU, memory, disk, network, and timeout limits. +- Workspace cleanup guarantees. +- Token redaction and secret handling tests. +- Usage metering by installation, repo, familiar, and task runtime. +- Tier limits and concurrency controls. + +## Milestone 5: Monetization Surface + +Goal: make the value legible and buyable. + +- `opencoven.ai/github` landing page. +- Hosted beta waitlist. +- Pricing: Community, Hosted Starter, Hosted Team, Hosted Dedicated. +- Cave dashboard for task history, familiar routing, usage, and audit events. +- Demo assets: issue assignment to Check Run, draft PR back to issue, Cave oversight intervention. + +## Current Focus + +1. Land the hosted MVP hardening branch. +2. Build persistent task state and idempotency. +3. Move familiar routing from global TOML toward installation-scoped config. +4. Make Cave oversight central in the public story and product loop. diff --git a/crates/github/src/lib.rs b/crates/github/src/lib.rs index 31e396d..b4c2605 100644 --- a/crates/github/src/lib.rs +++ b/crates/github/src/lib.rs @@ -58,6 +58,7 @@ async fn send_json( #[serde(tag = "event_type", rename_all = "snake_case")] pub enum GitHubEvent { IssueAssigned(IssueAssignedEvent), + IssueLabeled(IssueLabeledEvent), IssueComment(IssueCommentEvent), PullRequestReviewComment(PrReviewCommentEvent), Unsupported { name: String }, @@ -74,6 +75,17 @@ pub struct IssueAssignedEvent { pub assignee_login: String, } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct IssueLabeledEvent { + pub installation_id: u64, + pub repo_owner: String, + pub repo_name: String, + pub issue_number: u64, + pub issue_title: String, + pub issue_body: String, + pub label_name: String, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct IssueCommentEvent { pub installation_id: u64, diff --git a/crates/webhook/src/events.rs b/crates/webhook/src/events.rs index e1a13c2..d8294a6 100644 --- a/crates/webhook/src/events.rs +++ b/crates/webhook/src/events.rs @@ -1,9 +1,9 @@ //! Webhook event parsing: GitHub payload → typed events. -use serde::Deserialize; use coven_github_api::{ - GitHubEvent, IssueAssignedEvent, IssueCommentEvent, PrReviewCommentEvent, + GitHubEvent, IssueAssignedEvent, IssueCommentEvent, IssueLabeledEvent, PrReviewCommentEvent, }; +use serde::Deserialize; /// Raw GitHub webhook payload (partial — we only pull what we need). #[derive(Debug, Deserialize)] @@ -13,6 +13,7 @@ pub struct WebhookPayload { pub repository: Option, pub issue: Option, pub comment: Option, + pub label: Option