From f2973287d1654e758173ec78c95cf0e1ec2207b2 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 16 Feb 2026 15:16:20 -0800 Subject: [PATCH 1/2] status updated --- docs/better-sdk-opportunity-proposal.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/better-sdk-opportunity-proposal.md b/docs/better-sdk-opportunity-proposal.md index de1a54c..adec58d 100644 --- a/docs/better-sdk-opportunity-proposal.md +++ b/docs/better-sdk-opportunity-proposal.md @@ -22,7 +22,7 @@ Status snapshot date: 2026-02-16 | Phase | Status | ETA | Owner | | --- | --- | --- | --- | | Phase 0: Architecture and Spec Lock | Partially complete | 1-2 weeks total (remaining: sign-off + schema/process formalization) | SDK + Platform + Security | -| Phase 1: Local SDK Guard (MVP) | In progress | 3-5 weeks total (remaining: `sdk-python` hooks + OTel export + examples + CI publish flow) | SDK | +| Phase 1: Local SDK Guard (MVP) | In progress (implementation-complete) | 3-5 weeks total (remaining: first production package release in dependency order) | SDK | | Phase 2: Sidecar + Identity Bridge | Not started (design only) | 4-6 weeks | Platform + Identity | | Phase 3: Hosted Governance Control Plane | Not started (design only) | 6-8 weeks | Platform + Product | | Phase 4: Enterprise Hardening and Scale | Not started (design only) | Ongoing (first 4-6 weeks) | Platform + Security + GTM | @@ -523,17 +523,20 @@ async def web_search_tool(query: str): - Basic policy DSL. - Trace/proof event emission to existing tracer. -Status (as of 2026-02-16): **in progress (MVP scaffold implemented in this `predicate-authority` repository)** +Status (as of 2026-02-16): **in progress (MVP implemented; release execution pending)** - Completed in repo: - `predicate-contracts` package scaffold with typed contracts and protocols. - `predicate-authority` local `ActionGuard.authorize(...)` + `enforce(...)`. - Signed local mandates with TTL + verification. - Local policy evaluation and normalized deny reasons. - - In-memory proof ledger with optional trace emitter interface. - - pytest coverage for policy, mandate signing, and proof emission paths. + - In-memory proof ledger with optional OpenTelemetry trace emitter interface. + - typed `sdk-python` integration adapter path for pre-action authorization. + - pytest coverage for policy, mandate signing, integration, and proof/telemetry paths. + - package-level `pyproject.toml` for `predicate-contracts` and `predicate-authority`. + - CI workflows for tests and dependency-ordered publish. + - release runbook for PyPI publishing. - Remaining to close full Phase 1 exit: - - connect CI publish jobs to real package build/publish steps and credentials, - publish first `predicate-contracts` and `predicate-authority` versions in dependency order. ## Phase 2: Sidecar and IdP bridge (4-8 weeks) @@ -665,7 +668,7 @@ Exit criteria: - developer quickstart validated end-to-end on local-only mode. - CI release pipeline can publish and verify `predicate-contracts` and `predicate-authority` in dependency order. -Current status: **in progress** +Current status: **in progress (implementation-complete)** - [x] local `ActionGuard.authorize(...)`. - [x] signed local mandates. @@ -675,7 +678,10 @@ Current status: **in progress** - [x] `sdk-python` runtime integration hooks (typed adapter path). - [x] OpenTelemetry-native authority event export. - [x] quickstart/examples for browser/MCP/outbound HTTP. -- [x] dependency-ordered package publish pipeline in CI (workflow scaffold). +- [x] dependency-ordered package publish pipeline in CI. +- [x] package-level `pyproject.toml` for both publishable packages. +- [x] dedicated GitHub Actions test workflow. +- [ ] first production releases published to PyPI in dependency order. ## Phase 2: Sidecar + Identity Bridge (4-6 weeks) From c4788c56604ae079a374620c9024c47ef71dbe92 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 16 Feb 2026 15:52:10 -0800 Subject: [PATCH 2/2] phase 2 --- docs/better-sdk-opportunity-proposal.md | 766 ----------- docs/caracal-analysis.md | 1178 ----------------- docs/caracal-codebase-overview.md | 187 --- docs/northstar.md | 103 -- docs/predicate-sdk-verification-analysis.md | 1235 ------------------ docs/pypi-release-guide.md | 74 -- docs/roadmap.md | 370 ------ examples/README.md | 25 +- examples/authorityd/daemon_endpoint_check.py | 46 + examples/authorityd/policy.json | 20 + predicate_authority/__init__.py | 38 +- predicate_authority/bridge.py | 98 +- predicate_authority/daemon.py | 228 ++++ predicate_authority/policy.py | 3 + predicate_authority/policy_source.py | 45 + predicate_authority/pyproject.toml | 3 + predicate_authority/revocation.py | 35 + predicate_authority/sidecar.py | 151 +++ predicate_authority/sidecar_store.py | 76 ++ tests/test_daemon_phase2.py | 130 ++ tests/test_identity_bridge_phase2.py | 46 + tests/test_sidecar_phase2.py | 140 ++ 22 files changed, 1077 insertions(+), 3920 deletions(-) delete mode 100644 docs/better-sdk-opportunity-proposal.md delete mode 100644 docs/caracal-analysis.md delete mode 100644 docs/caracal-codebase-overview.md delete mode 100644 docs/northstar.md delete mode 100644 docs/predicate-sdk-verification-analysis.md delete mode 100644 docs/pypi-release-guide.md delete mode 100644 docs/roadmap.md create mode 100644 examples/authorityd/daemon_endpoint_check.py create mode 100644 examples/authorityd/policy.json create mode 100644 predicate_authority/daemon.py create mode 100644 predicate_authority/policy_source.py create mode 100644 predicate_authority/revocation.py create mode 100644 predicate_authority/sidecar.py create mode 100644 predicate_authority/sidecar_store.py create mode 100644 tests/test_daemon_phase2.py create mode 100644 tests/test_identity_bridge_phase2.py create mode 100644 tests/test_sidecar_phase2.py diff --git a/docs/better-sdk-opportunity-proposal.md b/docs/better-sdk-opportunity-proposal.md deleted file mode 100644 index adec58d..0000000 --- a/docs/better-sdk-opportunity-proposal.md +++ /dev/null @@ -1,766 +0,0 @@ -# Better SDK Opportunity Proposal - -## Goal - -Design a new SDK that gives Predicate a **production-grade pre-execution authority enforcement layer** for AI agents, while integrating with existing enterprise identity systems and preserving the deterministic guarantees already provided by `sdk-python`. - -Important scope note: - -- `sdk-python` is web-agent specific because it depends on a Playwright/page snapshot engine. -- Non-web agents (backend/code/infra agents) cannot directly use `sdk-python` snapshot verification and must use a runtime-agnostic authority path in `predicate-authority` (policy, mandate, intent, and proof checks without page snapshots). - -This proposal answers the "Better SDK Opportunity" in `northstar.md` by combining: - -- Predicate's deterministic verification runtime (`assert_`, `check().eventually()`, `assert_done`, traces, artifacts), -- Caracal's strongest ideas (short-lived mandates, scope checks, fail-closed gateway-style enforcement, immutable authority ledger), -- A bridge-first strategy (works with Azure AD/Okta/Auth0 and existing agent stacks). - -## Progress Dashboard - -Status snapshot date: 2026-02-16 - -| Phase | Status | ETA | Owner | -| --- | --- | --- | --- | -| Phase 0: Architecture and Spec Lock | Partially complete | 1-2 weeks total (remaining: sign-off + schema/process formalization) | SDK + Platform + Security | -| Phase 1: Local SDK Guard (MVP) | In progress (implementation-complete) | 3-5 weeks total (remaining: first production package release in dependency order) | SDK | -| Phase 2: Sidecar + Identity Bridge | Not started (design only) | 4-6 weeks | Platform + Identity | -| Phase 3: Hosted Governance Control Plane | Not started (design only) | 6-8 weeks | Platform + Product | -| Phase 4: Enterprise Hardening and Scale | Not started (design only) | Ongoing (first 4-6 weeks) | Platform + Security + GTM | - - -## TL;DR Design - -Build a new package family centered on: - -- `predicate-authority` (Python) -- optional sidecar `predicate-authorityd` (local authority broker) - -Core behavior: - -1. Agent asks for authority **before** sensitive action. -2. SDK binds request to deterministic state (snapshot hash, step id, run id, assertion outcomes). -3. Authority is issued as a short-lived capability/mandate token with strict scope. -4. Enforcement wrappers verify token + scope + state constraints before executing action. -5. Every decision is logged with cryptographic proofs for audit and replay. -6. Any uncertainty/failure => fail closed (no action). - - -## Why This Is Better (vs Caracal-only or verification-only) - -- **Vs verification-only (`sdk-python`)**: adds true "can/cannot execute" authorization, not just "did it work?" checks. -- **Vs Caracal-only**: keeps deterministic runtime deeply coupled to authorization and integrates with enterprise IdPs instead of requiring a parallel identity silo. -- **Enterprise fit**: supports existing identity/SSO and logging systems (Splunk/Datadog/SIEM) plus cryptographic proof records. - - -## Product Surface - -## 1) Python SDK (`predicate_authority`) - -Main components: - -- `AuthorityClient`: obtains/revokes short-lived mandates. -- `ActionGuard`: pre-execution check for tool/API/browser actions. -- `IdentityBridge`: adapter for IdPs (Entra/Okta/Auth0/custom OIDC). -- `ProofLedger`: signed decision/event writer. -- `PolicyEngine`: evaluates local+remote policy constraints. - -## 2) Sidecar (`predicate-authorityd`) - recommended for production - -A local daemon (`localhost`) that: - -- stores and rotates refresh credentials safely, -- mints/refreshes short-lived mandates, -- enforces local policy and deterministic invariants before refresh/issue, -- allows token revocation/kill-switch without app restart. - -This follows the "cached credentials service" operational model and is ideal for long-running agents. - -## 3) Optional control plane - -Central policy/authority service for: - -- policy management, -- mandate issuance delegation, -- revocation propagation, -- audit aggregation. - -The SDK can run in local-only mode or connected mode. - -## 4) Business model plane (hosted governance) - -Keep the SDK open source (MIT/Apache-2.0), monetize the connected governance plane: - -- hosted audit and retention, -- centralized policy orchestration, -- fleet-wide revocation and incident controls, -- enterprise identity/compliance operations. - - -## Integration with Existing `sdk-python` - -The new SDK should be additive and compose with `AgentRuntime` rather than replacing it. - -Constraint: - -- this integration applies to web agents where `AgentRuntime` and page snapshots are available. -- non-web agents integrate through `predicate-authority` directly (SDK guard + sidecar + IdentityBridge) without `sdk-python` dependencies. - -## Hook points in current runtime flow - -At each sensitive step: - -1. `runtime.begin_step(...)` -2. `await runtime.snapshot()` -3. deterministic checks (`assert_`/`check`) -4. **new: `guard.authorize(...)` before action** -5. execute action -6. post-condition verify (`assert_`) -7. emit traces and ledger proofs - -This creates two gates: - -- **Pre-execution authority gate**: "Are you allowed to try this?" -- **Post-execution deterministic gate**: "Did the intended effect occur?" - -Both are required for production trust. - -## Suggested Python usage - -```python -from predicate import AgentRuntime -from predicate_authority import AuthorityClient, ActionGuard, ActionSpec - -runtime = AgentRuntime.from_playwright_page(page, tracer=tracer) -auth = AuthorityClient.from_env() # bridge to sidecar or control plane -guard = ActionGuard(runtime=runtime, authority_client=auth) - -runtime.begin_step("checkout_submit") -await runtime.snapshot() - -# Deterministic precondition from existing sdk-python -runtime.assert_(exists("role=button text~'Submit'"), "submit_present", required=True) - -# New authority pre-execution gate -decision = await guard.authorize( - ActionSpec( - action="web.click", - resource="https://shop.example.com/checkout#submit", - tool="playwright.click", - risk_level="high", - ), - required_predicates=["submit_present"], -) - -if not decision.allowed: - raise RuntimeError(f"Blocked by authority: {decision.reason}") - -await page.click("role=button text~'Submit'") -await runtime.snapshot() - -# Existing deterministic postcondition -runtime.assert_(exists("text~'Order Confirmed'"), "order_confirmed", required=True) -await runtime.emit_step_end() -``` - - -## Authorization Model - -Use short-lived **Execution Mandates** (capability tokens) with explicit constraints: - -- `sub`: agent/principal identity -- `iss`: authority issuer (sidecar/control plane) -- `aud`: target system/tool class -- `exp`: very short TTL (e.g., 30-120s) -- `action_scope`: verbs (`web.click`, `mcp.execute`, `http.post`, etc.) -- `resource_scope`: URL/tool/resource patterns -- `budget_scope`: optional spend/quota constraints -- `delegation_depth`: bounded chain -- `intent_hash`: hash of requested action intent -- `state_hash`: hash of relevant deterministic state snapshot -- `run_id`, `step_id`, `correlation_id` - -Mandates are cryptographically signed (ES256/EdDSA) and locally verifiable. - - -## Determinism + Authority Binding (Key Innovation) - -Caracal validates mandate + scope. Predicate validates effect deterministically. -The new SDK should cryptographically bind those two worlds: - -- **State-bound mandate issuance**: include `state_hash` from latest snapshot/assertion context. -- **Predicate-bound authority**: require specified verification labels to pass before issuance. -- **Intent-bound mandate**: include canonical action payload hash (`intent_hash`) to prevent replay/mutation. - -This ensures "authorized" means: - -1. right identity, -2. right scope, -3. right time, -4. right state context, -5. right exact action. - -## Interface Decision: State Hash and Verification Signals - -Based on review of `sdk-python/predicate/snapshot.py` and `sdk-python/predicate/backends/snapshot.py`: - -- those modules provide snapshot collection (extension/API/backend-agnostic paths), -- they do **not** currently expose a first-class interface contract for `state_hash`, -- current runtime digesting patterns should be treated as implementation detail, not a cross-SDK contract. - -Design decision: - -- introduce an explicit authority-facing interface layer in a tiny shared package: `predicate-contracts`, -- have `sdk-python` and `predicate-authority` both depend on `predicate-contracts`, -- provide non-web adapters with equivalent contracts that do not depend on browser snapshots. - -### Package dependency graph (one-way) - -To avoid import cycles, use this package layout: - -- `predicate-contracts` (tiny shared package) - - only Protocols + data contracts (`StateEvidence`, `VerificationSignal`, etc.) - - no runtime dependencies on Playwright, `sdk-python`, or authority services -- `predicate-authority` - - depends on `predicate-contracts` - - authority logic, policy engine, sidecar client, mandate handling -- `sdk-python` - - depends on `predicate-contracts` - - implements web-specific evidence adapters against existing `AgentRuntime` -- optional integration package: `predicate-authority-sdk-python-adapter` - - depends on both `predicate-authority` and `sdk-python` - - keeps cross-package glue isolated - -Graph: - -```text -predicate-contracts - ├──> predicate-authority - ├──> sdk-python - └──> predicate-authority-sdk-python-adapter (optional, also depends on the two above) -``` - -### Release pipeline impact (PyPI) - -Because `predicate-authority` depends on `predicate-contracts`, release automation must publish both packages with ordering control. - -Required pipeline behavior: - -1. Build/test/publish `predicate-contracts` first. -2. Wait for package availability on PyPI. -3. Build/test/publish `predicate-authority` pinned to the new compatible `predicate-contracts` version range. -4. (Optional) Build/test/publish `predicate-authority-sdk-python-adapter`. - -Recommended controls: - -- semantic versioning for `predicate-contracts` with strict compatibility rules, -- CI guard that blocks `predicate-authority` publish if required `predicate-contracts` version is unavailable, -- contract-compat test matrix (`sdk-python` x `predicate-authority` against targeted `predicate-contracts` versions), -- synchronized release notes for contract changes. - -### 1) `StateEvidenceProvider` interface (new) - -Purpose: normalize state capture into a signed/hashable evidence object across web and non-web agents. - -Proposed contract: - -```python -class StateEvidenceProvider(Protocol): - async def capture_state_evidence(self) -> "StateEvidence": - ... -``` - -```python -@dataclass -class StateEvidence: - source_type: Literal["web_snapshot", "task_context", "tool_context", "infra_context"] - state_hash: str | None - state_schema_version: str - captured_at_ms: int - confidence: float | None - refs: dict[str, str] # trace_id, step_id, snapshot_id, run_id - attributes: dict[str, Any] # optional lightweight canonical fields -``` - -Notes: - -- `state_hash` is optional at interface level (for backward compatibility), but strongly recommended in production policy. -- for web agents, compute `state_hash` from canonicalized snapshot fields (stable sort + selected fields), not raw timestamps. -- for non-web agents, compute `state_hash` from canonical task/tool context (intent payload hash, target resource, dependency versions, execution environment fingerprint). - -### 2) `VerificationEvidenceProvider` interface (new) - -Purpose: formalize what "verification labels passed" means beyond boolean shortcuts. - -Proposed contract: - -```python -class VerificationEvidenceProvider(Protocol): - def get_verification_evidence(self) -> "VerificationEvidence": - ... -``` - -```python -@dataclass -class VerificationSignal: - label: str - status: Literal["passed", "failed", "error", "unknown"] - required: bool - reason: str - details: dict[str, Any] - observed_at_ms: int - -@dataclass -class VerificationEvidence: - signals: list[VerificationSignal] - summary: dict[str, Any] # required_passed, failed_labels, signal_count -``` - -Key clarification: - -- verification labels are **not** just `true/false`; they are named assertions with status + reason + details. -- authorization policies should evaluate predicates over this richer signal set, for example: - - required labels must be `passed`, - - specific labels may allow `passed` or `unknown` in dry-run mode, - - any `error` on critical labels causes fail-closed deny. - -### Web adapter mapping (from `sdk-python`) - -- map existing runtime assertion records (`label`, `passed`, `required`, `reason`, `details`) into `VerificationSignal`. -- derive web `StateEvidence` from latest snapshot/assertion context. -- keep this mapping in an adapter module (do not hardwire authority logic into snapshot modules). - -### Non-web adapter mapping - -- generate `VerificationSignal` from deterministic checks available in that runtime (API response contracts, schema validators, policy checks, build/deploy preconditions). -- generate `StateEvidence` from canonical execution context (no Playwright dependency). - -### Policy implications - -- make `state_hash` policy-configurable: - - `required_for_actions`: list of high-risk actions requiring non-null `state_hash`. - - `accepted_source_types`: permitted evidence source types per action class. -- keep fail-closed default: - - if policy requires evidence and provider cannot produce it, deny. - - -## Identity Bridge Strategy (Enterprise-Friendly) - -Do not force a new IdP. Bridge existing enterprise identity. - -## `IdentityBridge` adapters - -- `EntraBridge` (MSAL/OIDC), -- `OktaBridge`, -- generic `OIDCBridge`. - -Flow: - -1. Get enterprise token (OIDC/OAuth standard). -2. Exchange for Predicate mandate (or embed signed context assertion). -3. Use mandate for pre-exec enforcement. -4. On invariant violation, revoke local capability and optionally signal upstream revocation. - -This mirrors familiar OBO/delegation patterns while keeping Predicate-specific context enforcement. - - -## Enforcement Planes - -Support three enforcement planes from day one: - -1. **In-process SDK guard** - - easiest adoption; wraps tool/API/browser actions directly. - -2. **Gateway mode** (HTTP/tool proxy) - - policy enforcement at network boundary (Caracal-style PEP). - -3. **Sidecar authority broker** - - secure token lifecycle + local kill switch + cached credentials handling. - -All three should share the same policy and decision schema. - - -## Policy Model (v1) - -Policy should be explicit and composable: - -- principal policy (who), -- action/resource policy (what), -- temporal policy (when), -- budget policy (how much), -- state invariant policy (in which deterministic state), -- delegation policy (how deep / from whom). - -Example policy snippet: - -```yaml -principal: agent:checkout-bot -allow: - - action: web.click - resource: https://shop.example.com/checkout* - requires: - predicates_passed: [submit_present, cart_nonempty] - max_age_seconds: 15 - risk_level: [medium, high] -limits: - max_delegation_depth: 1 - mandate_ttl_seconds: 60 - budget_usd_per_run: 100 -deny_by_default: true -``` - - -## Audit and Proof Ledger - -For each decision, emit: - -- normalized decision event, -- canonical payload hash, -- signature/proof envelope, -- links to trace/snapshot/artifacts from `sdk-python`. - -Store in: - -- customer SIEM/log infra (Splunk/Datadog/OpenTelemetry), and optionally -- a tamper-evident append-only proof store. - -This avoids "must use our DB" while preserving cryptographic accountability. - -## Monetization Design (Open Core, Hosted Governance) - -Revenue model principle: - -- **Do not charge for the SDK lock.** -- **Charge for the cloud governance system of record.** - -This works especially well for non-web agents that cannot use Snapshot Engine but still need high-volume authority checks. - -## Unified Credit Model - -Rebrand "Snapshot Credits" to **Verification Credits** so one credit system covers hybrid fleets. - -| Action Type | Product Used | Cost per Action | Why | -| --- | --- | --- | --- | -| Visual Verification | Snapshot Engine (web agents) | 10 credits | Higher GPU/CV compute and higher per-step diagnostic value | -| Authority Check | Identity/Authority SDK (non-web + web) | 1 credit | Lower crypto/policy compute, much higher request volume | - -Implication: one subscription can serve mixed fleets (web, backend, code, infra agents) without separate billing products. - -## Monetization Hooks - -## 1) Audit Vault (Compliance as a Service) - -- Problem: local sidecar logs are not sufficient for regulated audit retention. -- Product: SDK/sidecar pushes signed proof events to Predicate Cloud Audit Vault. -- Monetization: by event volume + retention tier (30 days, 1 year, 7 years/WORM). -- Value proposition: "SDK enforces policy; cloud proves compliance." - -## 2) Command Center (Fleet Management) - -- Problem: policy changes across hundreds of sidecars are operationally expensive. -- Product: centralized policy dashboard + near-real-time policy sync. -- Monetization: Teams/Enterprise feature gating, with seat or org-based base + volume overage. - -## 3) Global Kill-Switch (Security) - -- Problem: compromised key/principal/intent requires immediate coordinated revocation. -- Product: revoke by `principal_id`, `intent_hash`, `mandate lineage`, or policy tag globally. -- Runtime behavior: sidecars poll/stream revocation updates (for example every 30 seconds or push channel). -- Monetization: Enterprise security control. - -## Suggested Pricing Shape (Web + Identity) - -| Tier | Pricing | Identity / Authority Features | Snapshot Features | -| --- | --- | --- | --- | -| Hobby | Free | Local mode only: local sidecar, local YAML policy, no cloud audit log | 500 credits/month | -| Pro | $XX/month | Connected mode: 30-day cloud audit log, 50,000 authority checks (50,000 credits), basic policy sync | 5,000 credits/month | -| Teams | $XXX/month | Fleet management: centralized policy dashboard, global kill-switch, unlimited active agents (volume billed) | 20,000 credits/month | -| Enterprise | Custom | Compliance + SSO: 7-year Audit Vault (WORM), managed Entra/Okta bridge, SLA/support | Volume discounts | - -Notes: - -- Keep credit burn transparent in SDK telemetry (`credits_spent`, `credits_estimate`) so customers can forecast. -- Offer pooled org credits so hybrid agent teams can allocate credits dynamically. -- Add overage pricing bands to avoid hard stops for production workloads. - -## Why this monetizes non-web agents well - -- **Backend agents (high call volume):** burn low-cost authority credits at scale; strong usage revenue. -- **Infra/ops agents (lower volume, high risk):** buy Audit Vault + kill-switch + retention for governance/compliance. -- **Code agents:** often run in CI/CD and need deterministic authority trails; high fit for policy sync and revocation. - -## Commercial positioning - -- **Free:** the lock (SDK + local enforcement). -- **Paid:** the security camera and command center (Audit Vault + policy orchestration + kill-switch). -- **Strategic message:** Predicate is the **System of Record for Agent Authority**, not just an SDK toolkit. - - -## SDK API Proposal (v1) - -Primary API surfaces: - -- `AuthorityClient.request_mandate(action_spec, context)` -- `ActionGuard.authorize(action_spec, required_predicates=...)` -- `ActionGuard.enforce(action_callable, action_spec, postcondition=...)` -- `ProofLedger.record(decision, trace_refs, artifact_refs)` -- `IdentityBridge.exchange_token(subject, context_assertion)` - -Optional decorator form: - -```python -@guard.protected(action="mcp.execute", resource="mcp://tools/web_search") -async def web_search_tool(query: str): - ... -``` - - -## Rollout Plan - -## Phase 1: SDK-only guard (2-4 weeks) - -- In-process pre-exec gate. -- Signed local mandates. -- Basic policy DSL. -- Trace/proof event emission to existing tracer. - -Status (as of 2026-02-16): **in progress (MVP implemented; release execution pending)** - -- Completed in repo: - - `predicate-contracts` package scaffold with typed contracts and protocols. - - `predicate-authority` local `ActionGuard.authorize(...)` + `enforce(...)`. - - Signed local mandates with TTL + verification. - - Local policy evaluation and normalized deny reasons. - - In-memory proof ledger with optional OpenTelemetry trace emitter interface. - - typed `sdk-python` integration adapter path for pre-action authorization. - - pytest coverage for policy, mandate signing, integration, and proof/telemetry paths. - - package-level `pyproject.toml` for `predicate-contracts` and `predicate-authority`. - - CI workflows for tests and dependency-ordered publish. - - release runbook for PyPI publishing. -- Remaining to close full Phase 1 exit: - - publish first `predicate-contracts` and `predicate-authority` versions in dependency order. - -## Phase 2: Sidecar and IdP bridge (4-8 weeks) - -- `predicate-authorityd`. -- Entra/OIDC bridge. -- token refresh + local revoke. -- deterministic-invariant-triggered token kill. - -## Phase 3: Gateway and enterprise controls (8-12 weeks) - -- policy-managed gateway mode. -- centralized revocation and distribution. -- audit export integrations and admin tooling. - - -## Success Metrics - -Track outcomes that matter to production reliability and security: - -- % sensitive actions with pre-exec authorization gate. -- denial precision (true blocks vs false blocks). -- mean time to root cause (using trace + proof links). -- replay/reproducibility success rate. -- reduction in unauthorized or policy-violating actions. -- token misuse/replay incidents prevented. - - -## Risks and Mitigations - -- **Risk: integration friction with existing agents** - - Mitigation: wrapper/decorator APIs, sidecar mode, backward-compatible defaults. - -- **Risk: latency from extra checks** - - Mitigation: short local verification paths, cached public keys, bounded policy evaluation. - -- **Risk: policy complexity** - - Mitigation: small policy core, templates by agent type, dry-run mode. - -- **Risk: brittle state hash semantics** - - Mitigation: canonical snapshot schema, stable field selection, versioned hashing. - - -## Recommended Initial Scope - -Start with a narrow but high-impact path: - -1. Browser agent critical actions (`click`, `type`, `submit`, `navigate`). -2. MCP tool execution guard. -3. HTTP outbound call guard. - -Use existing `sdk-python` assertions as required preconditions and bind authority to those outcomes. - -For non-web agents, replace snapshot/predicate preconditions with: - -- structured execution context (task id, run id, tool name, input hash), -- policy-bound intent checks (`intent_hash`), -- risk/budget/delegation constraints, -- signed proof events for every allow/deny decision. - - -## Final Positioning - -Predicate should position this as: - -**"Deterministic execution + pre-execution authority, with your existing identity stack."** - -Not "another IdP," and not "just observability." - -This creates a defensible platform where agents are: - -- deterministic in behavior verification, -- constrained by cryptographic authority at execution time, -- and provable in audit trails for enterprise governance. - -## Milestones and Phased Deliverables - -This implementation plan assumes a small cross-functional team (SDK + platform + security + product) and can run as a 4-phase build with explicit exit criteria. - -## Phase 0: Architecture and Spec Lock (1-2 weeks) - -Objective: freeze contracts so implementation can proceed in parallel. - -Deliverables: - -- `predicate-contracts` package scaffold and ownership model (tiny shared package). -- `predicate-authority` API spec (Python): `AuthorityClient`, `ActionGuard`, `IdentityBridge`, `ProofLedger`. -- Mandate token schema (claims, signatures, TTL, `intent_hash`, `state_hash`). -- Decision event schema (allow/deny reason codes, trace/artifact references, credit fields). -- Policy DSL v1 spec (principal/action/resource/state/budget/delegation). -- Credit accounting spec for "Verification Credits" (1-credit authority check, 10-credit visual verification). -- package dependency graph + import boundaries documented (`predicate-contracts` -> `predicate-authority` and `sdk-python`). - -Exit criteria: - -- design review sign-off from SDK, platform, and security. -- versioned schema docs published. -- compatibility mapping to existing `sdk-python` step lifecycle approved. -- release orchestration design approved for multi-package PyPI publishing (`predicate-contracts` then `predicate-authority`). - -Current status: **partially complete** - -- [x] dependency graph/import boundaries documented in this proposal. -- [x] package scaffolding started in this `predicate-authority` repository (`predicate-contracts`, `predicate-authority`). -- [ ] formal design sign-off from SDK/platform/security. -- [ ] versioned schema docs publication process. -- [ ] approved compatibility mapping with `sdk-python` lifecycle owners. - -## Phase 1: Local SDK Guard (MVP) (3-5 weeks) - -Objective: deliver immediate value with in-process pre-execution authority. - -Deliverables: - -- publish `predicate-contracts` v0.x with stable protocol/data contracts. -- `predicate-authority` package with: - - local `ActionGuard.authorize(...)`, - - signed local mandates, - - local policy evaluation, - - fail-closed behavior and normalized deny reasons. -- `sdk-python` integration hooks (pre-action authority gate + postcondition linkage). -- OpenTelemetry/trace emission for authority events. -- Developer examples for browser, MCP tool, and outbound HTTP action guards. - -Exit criteria: - -- >90% of targeted sensitive actions can be wrapped with pre-exec checks. -- deterministic regression tests pass for authorize/deny paths. -- developer quickstart validated end-to-end on local-only mode. -- CI release pipeline can publish and verify `predicate-contracts` and `predicate-authority` in dependency order. - -Current status: **in progress (implementation-complete)** - -- [x] local `ActionGuard.authorize(...)`. -- [x] signed local mandates. -- [x] local policy evaluation. -- [x] fail-closed deny path with normalized reason enums. -- [x] deterministic regression tests for authorize/deny paths. -- [x] `sdk-python` runtime integration hooks (typed adapter path). -- [x] OpenTelemetry-native authority event export. -- [x] quickstart/examples for browser/MCP/outbound HTTP. -- [x] dependency-ordered package publish pipeline in CI. -- [x] package-level `pyproject.toml` for both publishable packages. -- [x] dedicated GitHub Actions test workflow. -- [ ] first production releases published to PyPI in dependency order. - -## Phase 2: Sidecar + Identity Bridge (4-6 weeks) - -Objective: production-ready token lifecycle and enterprise identity compatibility. - -Deliverables: - -- `predicate-authorityd` sidecar: - - local secure credential store, - - short-lived mandate minting/refresh, - - local revocation cache, - - policy hot-reload. -- `IdentityBridge` adapters: - - OIDC generic bridge, - - Entra bridge first, - - Okta bridge second. -- optional `predicate-authority-sdk-python-adapter` package for isolated cross-package glue. -- invariant-triggered revocation path (predicate failure can invalidate local capability). -- connected mode toggles (local-only vs cloud-connected). - -Exit criteria: - -- long-running agent workloads can run without manual token intervention. -- bridge token exchange validated against at least one enterprise IdP. -- sidecar survives restart/network partition with fail-closed guarantees. - -Current status: **not started (design only)** - -## Phase 3: Hosted Governance Control Plane (6-8 weeks) - -Objective: ship monetizable cloud governance capabilities. - -Deliverables: - -- Audit Vault ingestion API for signed proof events. -- retention tiers (30-day, 1-year, 7-year/WORM-ready path). -- Command Center policy dashboard + fleet sync. -- global kill-switch and revocation fanout (`principal_id`, `intent_hash`, policy tags). -- Verification Credits metering and usage dashboards. - -Exit criteria: - -- policy updates propagate to active sidecars within target SLA. -- kill-switch propagation meets incident response target. -- billable usage pipeline reconciles authority + snapshot credits accurately. - -Current status: **not started (design only)** - -## Phase 4: Enterprise Hardening and Scale (ongoing, first 4-6 weeks) - -Objective: make it enterprise-ready for regulated production. - -Deliverables: - -- SSO/admin controls and tenant isolation. -- signed export pipelines for SIEM/SOC workflows. -- compliance evidence packs (audit queries, retention attestations, revocation reports). -- HA/SLO hardening, runbooks, and support escalation paths. -- pricing guardrails (overage behavior, pooled credits, forecasting alerts). - -Exit criteria: - -- reference customer security review completed. -- defined SLOs met in staging/load tests. -- enterprise onboarding playbook validated with pilot accounts. - -Current status: **not started (design only)** - -## Cross-Phase Dependencies - -- `sdk-python` runtime contract stability (snapshot schema, assertion labels, step metadata). -- `predicate-contracts` semver discipline (breaking changes require coordinated version gates). -- cryptographic key management strategy (local keys vs managed KMS). -- billing/telemetry instrumentation available by Phase 3. -- legal review for open-core licensing and hosted governance terms. - -## Proposed Milestone Gates - -- **Gate A (end Phase 1):** local SDK MVP usable in one production-like agent flow; `predicate-contracts` + `predicate-authority` publish flow proven. -- **Gate B (end Phase 2):** sidecar + IdP bridge validated for long-running agents. -- **Gate C (end Phase 3):** first monetizable connected tier (Pro) launch-ready. -- **Gate D (end Phase 4):** enterprise tier controls and compliance posture launch-ready. diff --git a/docs/caracal-analysis.md b/docs/caracal-analysis.md deleted file mode 100644 index ddd5976..0000000 --- a/docs/caracal-analysis.md +++ /dev/null @@ -1,1178 +0,0 @@ -# Caracal: Pre-Execution Authority Enforcement for AI Agents - -## Executive Summary - -**Caracal** is a pre-execution authority enforcement system designed to control AI agent actions in production environments. It implements cryptographically-verified, time-bound authorization mandates that must be validated before any irreversible action (API calls, database writes, deployments) can execute. The system treats agent identity as a first-class cryptographic primitive with explicit delegation chains and immutable audit trails. - -**Repository**: `/Users/guoliangwang/Code/Python/caracal` - ---- - -## 1. Core Purpose & Problem Statement - -### The Problem -AI agents operating in production environments can: -- Execute irreversible actions (payments, deployments, data deletion) -- Operate autonomously without human oversight -- Make decisions that have real-world consequences -- Be compromised or malfunction in unpredictable ways - -Traditional approaches (API keys, role-based access control) fail because they: -- Grant broad, long-lived permissions -- Lack cryptographic verification -- Don't provide granular, context-aware control -- Can't be easily delegated or revoked - -### The Caracal Solution -Caracal enforces **explicit authority**: no action executes unless there is a cryptographically verified, time-bound mandate issued under a governing policy. It sits at the boundary where decisions become actions and provides: - -- **Cryptographic proof of authorization** (ECDSA P-256 signatures) -- **Time-bound mandates** (TTL-based expiration) -- **Immutable audit trails** (Merkle-tree-verified ledger) -- **Fail-closed semantics** (deny by default) -- **Delegation chains** (parent-child authority transfer) - ---- - -## 2. How Caracal Handles Agent Identity - -### 2.1 Agent Identity Model - -Caracal treats agents as **Principals** - first-class identities that can hold and exercise authority. - -#### Principal Attributes -```python -class Principal: - id: UUID # Unique identifier (UUID v4) - name: str # Human-readable, enforced unique - owner: str # Owner contact/email - key_pair: KeyPair # ECDSA P-256 public/private keys - parent_id: Optional[UUID] # Hierarchical relationship - metadata: dict # Extensible metadata - created_at: datetime - updated_at: datetime -``` - -**Key Characteristics:** - -1. **Cryptographic Identity**: Each principal has an ECDSA P-256 key pair - - Public key: Identity attestation - - Private key: Signs delegation tokens and mandates - -2. **Hierarchical Relationships**: Agents can have parent-child relationships - - Enables delegation chains (parent → child → grandchild) - - Authority flows downward with scope narrowing - - Delegation depth tracking and limits - -3. **Persistent Registry**: Atomic JSON persistence with rolling backups - - 100 backup history with atomic writes - - ACID guarantees for identity operations - - Corruption detection and recovery - -4. **Name Uniqueness**: Human-readable names enforced as unique - - Prevents confusion in audit trails - - Enables natural reference in policies - -### 2.2 Identity Operations - -#### Creating an Agent Identity -```bash -# CLI -caracal agent register \ - --name "web-scraper-01" \ - --owner "team@company.com" \ - --metadata '{"purpose":"product-data-collection"}' - -# SDK -from caracal.sdk import AuthorityClient - -client = AuthorityClient() -principal = client.register_principal( - name="web-scraper-01", - owner="team@company.com", - metadata={"purpose": "product-data-collection"} -) -``` - -#### Delegation (Parent-Child Identity) -```python -# Create delegation token -delegation_token = client.create_delegation( - parent_principal_id="parent-agent-uuid", - child_principal_id="child-agent-uuid", - scope_narrowing={ - "resources": ["api:openai:gpt-3.5*"], # Child can only use GPT-3.5 - "actions": ["api_call"], - "max_depth": 2 # Can delegate 2 levels deep - }, - validity_seconds=3600 -) - -# Child uses delegation token to request mandates -mandate = client.request_mandate( - issuer_id="parent-agent-uuid", - subject_id="child-agent-uuid", - delegation_token=delegation_token, - resource_scope=["api:openai:gpt-3.5-turbo"], - action_scope=["api_call"] -) -``` - -### 2.3 Identity in Authorization Flow - -Every action in Caracal is tied to a principal's identity: - -``` -┌─────────────────────────────────────────────────┐ -│ Agent Identity (Principal) │ -│ ├─ UUID: 550e8400-e29b-41d4-a716-446655440000 │ -│ ├─ Name: "data-collector-01" │ -│ ├─ Public Key: 0x04a1b2c3... │ -│ └─ Parent: "orchestrator-agent" │ -└───────────────────┬─────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────┐ -│ Authority Policy (Who can do what) │ -│ ├─ Principal: data-collector-01 │ -│ ├─ Resources: ["api:serper:*", "db:products"] │ -│ ├─ Actions: ["GET", "POST", "db_read"] │ -│ └─ Conditions: {"time": "business_hours"} │ -└───────────────────┬─────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────┐ -│ Execution Mandate (Time-bound authorization) │ -│ ├─ Mandate ID: mandate-abc123 │ -│ ├─ Subject: data-collector-01 │ -│ ├─ Issued At: 2025-01-15T10:00:00Z │ -│ ├─ Expires At: 2025-01-15T11:00:00Z │ -│ ├─ Signature: ES256(issuer_private_key) │ -│ └─ Scope: matches policy │ -└───────────────────┬─────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────┐ -│ Action Execution │ -│ Request Header: X-Execution-Mandate: abc123 │ -│ ├─ AuthorityEvaluator validates mandate │ -│ ├─ Checks: signature, expiration, scope │ -│ └─ Decision: ALLOW or DENY │ -└───────────────────┬─────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────┐ -│ Authority Ledger (Immutable Audit Trail) │ -│ ├─ Principal: data-collector-01 │ -│ ├─ Action: api_call("serper.dev/search") │ -│ ├─ Decision: ALLOWED │ -│ ├─ Timestamp: 2025-01-15T10:15:23Z │ -│ └─ Merkle Root: 0xabcd1234... (verified) │ -└─────────────────────────────────────────────────┘ -``` - ---- - -## 3. Architecture Overview - -### 3.1 System Components - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Caracal Architecture │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ -│ │ CLI Tools │ │ Caracal Flow │ │ Web UI │ │ -│ │ (Click) │ │ (TUI) │ │ (React) │ │ -│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ -│ │ │ │ │ -│ └──────────────────┼──────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌────────────────────────────────────┐ │ -│ │ SDK / Authority Client │ │ -│ │ ├─ Mandate Request/Validation │ │ -│ │ ├─ Principal Management │ │ -│ │ └─ Ledger Queries │ │ -│ └────────────┬───────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌──────────────────────────────────────────────────┐ │ -│ │ Authority Gateway (Enforcement) │ │ -│ │ ├─ Mandate Validation │ │ -│ │ ├─ Replay Protection │ │ -│ │ ├─ Rate Limiting │ │ -│ │ └─ Circuit Breaker │ │ -│ └───────────────┬──────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌──────────────────────────────────────────────────┐ │ -│ │ Core Authority Engine │ │ -│ │ ├─ Principal Identity (identity.py) │ │ -│ │ ├─ Authority Evaluation (authority.py) │ │ -│ │ ├─ Mandate Management (mandate.py) │ │ -│ │ ├─ Delegation (delegation.py) │ │ -│ │ └─ Crypto Operations (crypto.py) │ │ -│ └───────────────┬──────────────────────────────────┘ │ -│ │ │ -│ ┌────────┴─────────┐ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌──────────────┐ ┌────────────────────┐ │ -│ │ Database │ │ Authority Ledger │ │ -│ │ (PostgreSQL)│ │ ├─ Immutable Events │ │ -│ │ ├─ Principals│ │ ├─ Merkle Tree │ │ -│ │ ├─ Policies │ │ ├─ Kafka Stream │ │ -│ │ ├─ Mandates │ │ └─ Redis Cache │ │ -│ │ └─ Audit Log │ └────────────────────┘ │ -│ └──────────────┘ │ -│ │ -│ ┌────────────────────────────────────────────────────┐ │ -│ │ MCP Integration (Optional) │ │ -│ │ ├─ Tool Call Interception │ │ -│ │ ├─ Resource Read Enforcement │ │ -│ │ └─ Agentic Workflow Control │ │ -│ └────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ -``` - -### 3.2 Key Components - -#### Core Authority Engine (`caracal/core/`) -- **identity.py**: Principal management with cryptographic key pairs -- **authority.py**: Policy evaluation and authorization decisions -- **mandate.py**: Time-bound authorization issuance and validation -- **delegation.py**: Parent-child authority delegation -- **crypto.py**: ECDSA P-256 signing and verification - -#### Database Layer (`caracal/db/`) -- SQLAlchemy models for principals, policies, mandates, ledger events -- PostgreSQL (JSONB), SQLite support -- Alembic migrations - -#### Gateway (`caracal/gateway/`) -- **authority_proxy.py**: Request interception and mandate validation -- **replay_protection.py**: Prevents mandate reuse attacks -- **metering_interceptor.py**: Resource usage tracking - -#### MCP Integration (`caracal/mcp/`) -- **adapter.py**: Intercepts MCP tool calls -- **service.py**: HTTP API for MCP request proxying -- Integrates with Model Context Protocol for agentic workflows - -#### Cryptographic Integrity (`caracal/merkle/`) -- Merkle tree construction with SHA-256 hashing -- Root signing and verification -- Batch-based snapshot scheduling -- Immutable ledger integrity - -#### SDK (`caracal/sdk/`) -- High-level Python SDK for developers -- Async/await support -- Fail-closed semantics - -#### CLI & TUI -- **CLI** (`caracal/cli/`): Command-line tools -- **Caracal Flow** (`caracal/flow/`): Interactive terminal UI with onboarding wizard - ---- - -## 4. Agent Identity in Practice - -### 4.1 Agent Registration Workflow - -```python -# Step 1: Register principal identity -from caracal.core.identity import PrincipalRegistry - -registry = PrincipalRegistry() -principal = registry.register_principal( - name="sentiment-analyzer", - owner="ai-team@company.com", - metadata={ - "team": "nlp", - "purpose": "sentiment-analysis", - "model": "gpt-4" - } -) - -# Principal receives: -# - UUID: 550e8400-e29b-41d4-a716-446655440000 -# - ECDSA P-256 key pair (public/private) -# - Persistent storage with atomic writes -``` - -### 4.2 Authority Policy Creation - -```python -# Step 2: Define what the agent can do -from caracal.core.authority import AuthorityPolicy - -policy = AuthorityPolicy.create( - principal_id=principal.id, - resource_scope=[ - "api:openai:gpt-4", - "db:reviews:read", - "s3:sentiment-results:write" - ], - action_scope=[ - "api_call", - "db_read", - "s3_write" - ], - conditions={ - "time_window": "business_hours", - "cost_limit_usd": 100.00 - } -) -``` - -### 4.3 Mandate Issuance - -```python -# Step 3: Issue time-bound mandate -from caracal.core.mandate import MandateManager - -mandate_mgr = MandateManager() -mandate = mandate_mgr.issue_mandate( - issuer_id="admin-principal-id", - subject_id=principal.id, - resource_scope=policy.resource_scope, - action_scope=policy.action_scope, - validity_seconds=3600 # 1 hour -) - -# Mandate contains: -# - Cryptographic signature (ES256) -# - Expiration timestamp -# - Scope constraints -# - Revocation status -``` - -### 4.4 Action Execution with Identity Verification - -```python -# Step 4: Agent executes action with mandate -from caracal.gateway import caracal_require_mandate - -@caracal_require_mandate( - required_action="api_call", - required_resource="api:openai:gpt-4" -) -def analyze_sentiment(text: str, mandate: dict): - """ - Mandate is validated BEFORE this function executes. - - Validation checks: - - Principal identity (signature verification) - - Mandate expiration - - Resource/action scope match - - Revocation status - - Replay protection - """ - # Call OpenAI API - result = openai.chat.completions.create(...) - return result - -# Usage -result = analyze_sentiment( - text="This product is amazing!", - mandate=mandate # Passed in request context -) -``` - -### 4.5 Delegation to Child Agents - -```python -# Step 5: Parent agent delegates to child -from caracal.core.delegation import DelegationManager - -delegation_mgr = DelegationManager() - -# Parent: sentiment-analyzer -# Child: tweet-sentiment-worker - -delegation_token = delegation_mgr.create_delegation_token( - parent_principal_id=principal.id, - child_principal_id="tweet-worker-id", - scope_narrowing={ - "resources": ["api:openai:gpt-3.5-turbo"], # More restricted - "actions": ["api_call"], - "max_cost_usd": 10.00, # Lower limit - "max_delegation_depth": 1 # Can't delegate further - }, - validity_seconds=1800 # 30 minutes -) - -# Child can now request mandates with delegation token -child_mandate = mandate_mgr.issue_mandate( - issuer_id=principal.id, - subject_id="tweet-worker-id", - delegation_token=delegation_token, - resource_scope=["api:openai:gpt-3.5-turbo"], - action_scope=["api_call"] -) -``` - -### 4.6 Audit Trail with Identity - -```python -# Step 6: Query audit trail by identity -from caracal.core.authority_ledger import AuthorityLedger - -ledger = AuthorityLedger() - -# Query all actions by principal -events = ledger.query_events( - principal_id=principal.id, - time_range="24h", - event_types=["MANDATE_VALIDATED", "ACTION_ALLOWED", "ACTION_DENIED"] -) - -# Example event -{ - "event_id": "evt-abc123", - "event_type": "ACTION_ALLOWED", - "timestamp": "2025-01-15T10:15:23Z", - "principal_id": "550e8400-e29b-41d4-a716-446655440000", - "principal_name": "sentiment-analyzer", - "mandate_id": "mandate-xyz789", - "resource": "api:openai:gpt-4", - "action": "api_call", - "decision": "ALLOWED", - "correlation_id": "req-456", - "merkle_root": "0xabcd1234...", - "signature_verified": true -} -``` - ---- - -## 5. Agent Identity Issues Addressed - -### 5.1 Identity Theft & Impersonation - -**Problem**: Agent credentials (API keys) can be stolen or leaked. - -**Caracal Solution**: -- Cryptographic signatures (ECDSA P-256) for every mandate -- Private keys never transmitted over network -- Mandate tied to specific principal identity -- Signature verification on every request - -```python -# Attacker cannot forge mandate without private key -def validate_mandate_signature(mandate: dict, principal: Principal) -> bool: - signature = mandate["signature"] - message = f"{mandate['id']}:{mandate['subject_id']}:{mandate['expires_at']}" - - # Verify signature with principal's public key - return verify_ecdsa_signature( - message=message, - signature=signature, - public_key=principal.key_pair.public_key - ) -``` - -### 5.2 Privilege Escalation - -**Problem**: Agent gains more permissions than intended. - -**Caracal Solution**: -- Explicit scope in every mandate (resources + actions) -- No implicit permissions or wildcard grants -- Delegation can only narrow scope, never broaden -- Policy evaluation fails closed (deny by default) - -```python -# Example: Parent can delegate to child, but only with narrower scope -parent_mandate_scope = ["api:openai:*", "db:*"] -child_mandate_scope = ["api:openai:gpt-3.5-turbo", "db:reviews:read"] - -# This is ALLOWED (narrowing) -assert is_scope_narrower(child_mandate_scope, parent_mandate_scope) - -# This would be DENIED (broadening) -child_invalid_scope = ["api:openai:*", "db:*", "s3:*"] -assert not is_scope_narrower(child_invalid_scope, parent_mandate_scope) -``` - -### 5.3 Long-Lived Credentials - -**Problem**: Static API keys live forever, can't be easily revoked. - -**Caracal Solution**: -- All mandates have TTL (time-to-live) -- Short-lived by default (1-24 hours typical) -- Automatic expiration enforcement -- Can be revoked at any time - -```python -# Mandate automatically expires -mandate = { - "issued_at": "2025-01-15T10:00:00Z", - "expires_at": "2025-01-15T11:00:00Z", - "revoked": False -} - -# Validation checks expiration -def is_mandate_valid(mandate: dict) -> bool: - now = datetime.utcnow() - - if mandate["revoked"]: - return False - - if now > mandate["expires_at"]: - return False # Expired - - return True -``` - -### 5.4 Lack of Audit Trail - -**Problem**: No record of what agent did, when, and with what authority. - -**Caracal Solution**: -- Immutable authority ledger -- Every decision logged (allowed/denied) -- Merkle tree integrity verification -- Optional Kafka streaming for real-time monitoring - -```python -# Every action creates ledger event -ledger_event = { - "event_id": str(uuid.uuid4()), - "event_type": "ACTION_ALLOWED", - "timestamp": datetime.utcnow().isoformat(), - "principal_id": principal.id, - "principal_name": principal.name, - "mandate_id": mandate.id, - "resource": "api:openai:gpt-4", - "action": "api_call", - "decision": "ALLOWED", - "decision_reason": "Mandate valid and scope matches", - "correlation_id": request.headers.get("X-Correlation-ID"), - "metadata": {...} -} - -# Append to ledger (immutable) -ledger.append(ledger_event) - -# Periodically create Merkle tree snapshot -merkle_root = create_merkle_tree(ledger_events_batch) -sign_merkle_root(merkle_root, issuer_private_key) -``` - -### 5.5 Replay Attacks - -**Problem**: Attacker intercepts mandate and reuses it multiple times. - -**Caracal Solution**: -- Nonce-based replay protection -- One-time use enforcement (optional) -- Request deduplication with Redis -- TTL on replay cache - -```python -from caracal.gateway.replay_protection import ReplayProtector - -protector = ReplayProtector(redis_client=redis) - -# Check if mandate already used -def validate_no_replay(mandate_id: str, nonce: str) -> bool: - cache_key = f"mandate:{mandate_id}:nonce:{nonce}" - - # Atomic check-and-set - if protector.is_nonce_seen(cache_key): - raise ReplayAttackDetected(f"Mandate {mandate_id} already used") - - # Mark as seen (with TTL matching mandate expiration) - protector.mark_nonce_seen(cache_key, ttl_seconds=mandate_ttl) - return True -``` - -### 5.6 Delegation Without Control - -**Problem**: Agent delegates authority without oversight or limits. - -**Caracal Solution**: -- Explicit delegation tokens (JWT-based) -- Scope narrowing enforcement (child can't have more than parent) -- Delegation depth limits -- Delegation chain verification - -```python -# Delegation token structure -delegation_token = { - "parent_principal_id": "parent-uuid", - "child_principal_id": "child-uuid", - "scope": { - "resources": ["api:openai:gpt-3.5-turbo"], # Must be subset of parent - "actions": ["api_call"], - "max_cost_usd": 10.00 - }, - "delegation_depth": 1, # Current depth in chain - "max_delegation_depth": 2, # Can delegate 1 more level - "issued_at": "2025-01-15T10:00:00Z", - "expires_at": "2025-01-15T12:00:00Z", - "signature": "ES256(parent_private_key, ...)" -} - -# Validation -def validate_delegation(token: dict, child_mandate_scope: list) -> bool: - # 1. Verify parent signature - verify_signature(token, parent_public_key) - - # 2. Check expiration - assert token["expires_at"] > now() - - # 3. Enforce scope narrowing - assert is_scope_narrower(child_mandate_scope, token["scope"]) - - # 4. Check delegation depth - assert token["delegation_depth"] < token["max_delegation_depth"] - - return True -``` - -### 5.7 Identity Confusion in Multi-Agent Systems - -**Problem**: Multiple agents, unclear who did what. - -**Caracal Solution**: -- Unique UUID per principal -- Human-readable names (enforced unique) -- Parent-child relationships tracked -- Correlation IDs across request chains - -```python -# Clear identity in logs -{ - "event_type": "ACTION_ALLOWED", - "principal_id": "550e8400-e29b-41d4-a716-446655440000", - "principal_name": "sentiment-analyzer", - "parent_principal_name": "orchestrator-agent", - "delegation_chain": [ - "orchestrator-agent", - "sentiment-analyzer", - "tweet-worker" - ], - "correlation_id": "req-abc123", - "resource": "api:openai:gpt-4", - "action": "api_call" -} -``` - ---- - -## 6. Technical Implementation Details - -### 6.1 Cryptographic Components - -#### Key Generation (ECDSA P-256) -```python -from cryptography.hazmat.primitives.asymmetric import ec -from cryptography.hazmat.primitives import serialization - -def generate_key_pair(): - # Generate ECDSA P-256 private key - private_key = ec.generate_private_key(ec.SECP256R1()) - - # Derive public key - public_key = private_key.public_key() - - # Serialize for storage - private_pem = private_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption() - ) - - public_pem = public_key.public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo - ) - - return KeyPair(private_key=private_pem, public_key=public_pem) -``` - -#### Mandate Signing -```python -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.asymmetric import ec - -def sign_mandate(mandate: dict, private_key: ec.EllipticCurvePrivateKey) -> str: - # Create canonical message - message = f"{mandate['id']}:{mandate['subject_id']}:{mandate['expires_at']}:{mandate['resource_scope']}" - message_bytes = message.encode('utf-8') - - # Sign with ECDSA - signature = private_key.sign( - message_bytes, - ec.ECDSA(hashes.SHA256()) - ) - - # Base64 encode signature - return base64.b64encode(signature).decode('utf-8') -``` - -### 6.2 Authority Evaluation Algorithm - -```python -def evaluate_authority( - mandate: dict, - principal: Principal, - requested_action: str, - requested_resource: str -) -> AuthorityDecision: - """ - Fail-closed authority evaluation. - Any uncertainty or error results in DENY. - """ - try: - # 1. Verify mandate exists - if not mandate: - return AuthorityDecision(allowed=False, reason="No mandate provided") - - # 2. Verify signature (cryptographic identity proof) - if not verify_mandate_signature(mandate, principal): - return AuthorityDecision(allowed=False, reason="Invalid signature") - - # 3. Check expiration - if is_expired(mandate): - return AuthorityDecision(allowed=False, reason="Mandate expired") - - # 4. Check revocation - if is_revoked(mandate): - return AuthorityDecision(allowed=False, reason="Mandate revoked") - - # 5. Verify scope match - if not scope_matches(mandate, requested_action, requested_resource): - return AuthorityDecision(allowed=False, reason="Scope mismatch") - - # 6. Check delegation chain (if delegated) - if mandate.get("delegation_token"): - if not validate_delegation_chain(mandate["delegation_token"]): - return AuthorityDecision(allowed=False, reason="Invalid delegation") - - # 7. Check replay protection - if is_replay(mandate): - return AuthorityDecision(allowed=False, reason="Replay detected") - - # All checks passed - return AuthorityDecision(allowed=True, reason="All checks passed") - - except Exception as e: - # Fail closed on any error - log.error(f"Authority evaluation error: {e}") - return AuthorityDecision(allowed=False, reason=f"Evaluation error: {e}") -``` - -### 6.3 Merkle Tree Integrity - -```python -import hashlib -from typing import List - -def create_merkle_tree(events: List[dict]) -> str: - """ - Create Merkle tree from ledger events for cryptographic integrity. - """ - # Hash each event - leaf_hashes = [ - hashlib.sha256(json.dumps(event, sort_keys=True).encode()).hexdigest() - for event in events - ] - - # Build tree bottom-up - while len(leaf_hashes) > 1: - next_level = [] - for i in range(0, len(leaf_hashes), 2): - left = leaf_hashes[i] - right = leaf_hashes[i+1] if i+1 < len(leaf_hashes) else left - - combined = left + right - parent_hash = hashlib.sha256(combined.encode()).hexdigest() - next_level.append(parent_hash) - - leaf_hashes = next_level - - # Root hash - return leaf_hashes[0] - -def verify_merkle_proof(event: dict, merkle_proof: List[str], merkle_root: str) -> bool: - """ - Verify that an event is part of a Merkle tree with given root. - """ - current_hash = hashlib.sha256(json.dumps(event, sort_keys=True).encode()).hexdigest() - - for sibling_hash in merkle_proof: - combined = current_hash + sibling_hash - current_hash = hashlib.sha256(combined.encode()).hexdigest() - - return current_hash == merkle_root -``` - ---- - -## 7. Deployment & Operations - -### 7.1 Infrastructure Setup - -```bash -# Using Caracal Flow (TUI) - one-click setup -caracal-flow -# Navigate to "Infrastructure" → "Provision" → Select PostgreSQL + Kafka - -# Or manual setup -docker-compose up -d # PostgreSQL + Redis + Kafka - -# Initialize database -caracal db migrate upgrade head - -# Create admin principal -caracal agent register \ - --name "admin" \ - --owner "security@company.com" \ - --admin -``` - -### 7.2 Integration Patterns - -#### Pattern 1: SDK Integration (Python) -```python -from caracal.sdk import AuthorityClient - -# Initialize client -client = AuthorityClient(base_url="http://caracal.internal:8000") - -# Request mandate before action -mandate = client.request_mandate( - issuer_id=admin_principal_id, - subject_id=agent_principal_id, - resource_scope=["api:stripe:charges"], - action_scope=["create_charge"], - validity_seconds=300 # 5 minutes for this transaction -) - -# Validate before executing sensitive action -decision = client.validate_mandate( - mandate_id=mandate["mandate_id"], - requested_action="create_charge", - requested_resource="api:stripe:charges" -) - -if decision["allowed"]: - # Execute action with mandate in context - result = stripe.Charge.create( - amount=1000, - currency="usd", - headers={"X-Execution-Mandate": mandate["mandate_id"]} - ) -``` - -#### Pattern 2: Gateway Proxy (HTTP Middleware) -```python -from fastapi import FastAPI, Header, HTTPException -from caracal.gateway import AuthorityEvaluator - -app = FastAPI() -evaluator = AuthorityEvaluator() - -@app.middleware("http") -async def authority_enforcement(request: Request, call_next): - # Extract mandate from header - mandate_id = request.headers.get("X-Execution-Mandate") - - if not mandate_id: - raise HTTPException(status_code=401, detail="No mandate provided") - - # Validate mandate - decision = evaluator.evaluate( - mandate_id=mandate_id, - requested_action=request.method, - requested_resource=request.url.path - ) - - if not decision.allowed: - raise HTTPException(status_code=403, detail=decision.reason) - - # Proceed with request - response = await call_next(request) - return response -``` - -#### Pattern 3: MCP Integration (Model Context Protocol) -```python -from caracal.mcp import MCPAdapter, MCPContext - -# Initialize MCP adapter -adapter = MCPAdapter(authority_evaluator=evaluator) - -# Intercept tool calls -async def handle_mcp_request(request: MCPRequest): - # Extract agent identity and mandate from context - context = MCPContext( - principal_id=request.agent_id, - mandate_id=request.headers.get("X-Execution-Mandate") - ) - - # Validate before executing tool - result = await adapter.intercept_tool_call( - tool_name=request.tool_name, - tool_args=request.tool_args, - mcp_context=context - ) - - return result -``` - -### 7.3 Monitoring & Observability - -```python -# Prometheus metrics -from caracal.monitoring import CaracalMetrics - -metrics = CaracalMetrics() - -# Authority decisions -metrics.authority_decisions_total.labels( - principal="agent-01", - decision="allowed" -).inc() - -# Mandate issuance -metrics.mandates_issued_total.labels( - issuer="admin", - subject="agent-01" -).inc() - -# Delegation depth -metrics.delegation_depth.labels( - principal="agent-01" -).set(2) - -# Ledger events -metrics.ledger_events_total.labels( - event_type="ACTION_ALLOWED" -).inc() -``` - ---- - -## 8. Key Strengths & Limitations - -### Strengths - -1. **Cryptographic Identity**: Agents have verifiable, unforgeable identities -2. **Explicit Authority**: No action without explicit mandate -3. **Time-Bound**: All permissions expire automatically -4. **Immutable Audit**: Complete trail of all decisions -5. **Fail-Closed**: Deny by default on any error -6. **Delegation Control**: Parent-child authority with scope narrowing -7. **Replay Protection**: Prevents mandate reuse attacks -8. **MCP Integration**: Works with agentic frameworks - -### Limitations - -1. **Performance Overhead**: Cryptographic operations add latency (~10-50ms per request) -2. **Infrastructure Complexity**: Requires PostgreSQL, Redis, optional Kafka -3. **Key Management**: Private keys must be securely stored and rotated -4. **Learning Curve**: Developers must understand authority model -5. **Single Point of Failure**: Authority gateway must be highly available -6. **Not for Real-Time**: Validation latency unsuitable for sub-millisecond systems - ---- - -## 9. Comparison to Traditional Approaches - -| Aspect | API Keys | OAuth 2.0 | RBAC | Caracal | -|--------|----------|-----------|------|---------| -| **Identity Verification** | None | Token-based | Role-based | Cryptographic (ECDSA) | -| **Authority Granularity** | All-or-nothing | Scope-based | Role-based | Resource + Action + Conditions | -| **Time-Bound** | No (long-lived) | Yes (access tokens) | No | Yes (TTL mandates) | -| **Audit Trail** | Optional | Optional | Optional | Immutable (always) | -| **Delegation** | Not supported | Delegation tokens | Role inheritance | Cryptographic chain | -| **Revocation** | Delete key | Revoke token | Change role | Instant revocation | -| **Fail Semantics** | Fail open | Fail open | Fail open | Fail closed | -| **Replay Protection** | No | No | No | Yes (nonce-based) | -| **Agent-Specific** | No | No | No | Yes (designed for agents) | - ---- - -## 10. Use Cases - -### Use Case 1: Autonomous Trading Bot -```python -# Problem: Bot can access trading API 24/7 with static API key -# Solution: Time-bound mandates with cost limits - -# Register trading bot identity -trading_bot = registry.register_principal( - name="trading-bot-prod", - owner="quant-team@hedge.fund" -) - -# Create policy: can only trade during market hours -policy = AuthorityPolicy.create( - principal_id=trading_bot.id, - resource_scope=["api:alpaca:orders"], - action_scope=["place_order"], - conditions={ - "time_window": "market_hours", - "max_order_value_usd": 10000, - "max_daily_trades": 100 - } -) - -# Issue mandate (expires in 1 hour) -mandate = mandate_mgr.issue_mandate( - subject_id=trading_bot.id, - validity_seconds=3600 -) - -# Bot must renew mandate every hour -# If bot malfunctions, mandate expires and trading stops -``` - -### Use Case 2: Multi-Agent Customer Support -```python -# Problem: Parent orchestrator delegates to specialized agents -# Solution: Delegation chains with scope narrowing - -# Orchestrator (parent) -orchestrator = registry.register_principal(name="support-orchestrator") - -# Specialized agents (children) -email_agent = registry.register_principal(name="email-responder") -ticket_agent = registry.register_principal(name="ticket-creator") - -# Orchestrator delegates limited authority to email agent -delegation_token = delegation_mgr.create_delegation_token( - parent_principal_id=orchestrator.id, - child_principal_id=email_agent.id, - scope_narrowing={ - "resources": ["api:gmail:send"], # Can only send emails - "actions": ["send_email"], - "max_emails_per_hour": 50 - }, - validity_seconds=3600 -) - -# Email agent requests mandate with delegation token -email_mandate = mandate_mgr.issue_mandate( - issuer_id=orchestrator.id, - subject_id=email_agent.id, - delegation_token=delegation_token -) - -# Complete audit trail shows delegation chain -# orchestrator → email_agent → [email sent] -``` - -### Use Case 3: Data Pipeline with Multiple Agents -```python -# Problem: Complex pipeline with 5 agents, unclear who modified data -# Solution: Identity tracking through pipeline - -# Register pipeline agents -scraper = registry.register_principal(name="web-scraper") -cleaner = registry.register_principal(name="data-cleaner") -enricher = registry.register_principal(name="data-enricher") -validator = registry.register_principal(name="data-validator") -publisher = registry.register_principal(name="data-publisher") - -# Each agent gets narrow mandate for its step -scraper_mandate = mandate_mgr.issue_mandate( - subject_id=scraper.id, - resource_scope=["web:target-site.com"], - action_scope=["http_get"], - validity_seconds=1800 -) - -cleaner_mandate = mandate_mgr.issue_mandate( - subject_id=cleaner.id, - resource_scope=["db:raw_data:read", "db:clean_data:write"], - action_scope=["db_read", "db_write"] -) - -# Ledger shows complete lineage -# scraper → raw_data → cleaner → clean_data → enricher → ... -``` - ---- - -## 11. Summary & Key Takeaways - -### What Makes Caracal Different - -Caracal is **not** another authentication/authorization system. It's a pre-execution authority enforcement layer specifically designed for AI agents in production. - -**Key Differentiators:** - -1. **Agent-First Design**: Every concept (principals, mandates, delegation) designed for autonomous agents, not humans -2. **Cryptographic Proof**: ECDSA signatures provide unforgeable proof of authorization -3. **Fail-Closed Everywhere**: Any error or uncertainty results in denial, not execution -4. **Time-Bound Everything**: No long-lived credentials; all authority expires -5. **Immutable Audit**: Merkle-tree-verified ledger provides cryptographic proof of all decisions -6. **Explicit Delegation**: Parent-child authority transfer with cryptographic chain and scope narrowing - -### Agent Identity Model - -Caracal treats agent identity as a **cryptographic primitive**: - -- **Unique identity**: UUID + human-readable name -- **Cryptographic proof**: ECDSA P-256 key pair -- **Hierarchical relationships**: Parent-child delegation chains -- **Persistent registry**: Atomic writes with corruption protection -- **Complete audit**: Every action tied to principal identity - -### When to Use Caracal - -**Good fit:** -- Production AI agents executing irreversible actions -- Multi-agent systems with complex delegation -- Regulated industries requiring audit trails -- High-value transactions (financial, healthcare, legal) -- Autonomous systems requiring human oversight - -**Poor fit:** -- Prototype/demo agents -- Real-time systems requiring sub-millisecond latency -- Simple single-agent scripts -- Low-risk read-only operations - -### Getting Started - -```bash -# Install -pip install caracal-authority - -# Start Caracal Flow (interactive TUI) -caracal-flow - -# Or use CLI -caracal agent register --name "my-first-agent" --owner "me@company.com" -caracal policies create --principal-id --resources "*" --actions "*" -caracal mandates issue --principal-id --ttl 3600 - -# Integrate in code -from caracal.sdk import AuthorityClient -client = AuthorityClient() -mandate = client.request_mandate(...) -``` - ---- - -## References - -- **Repository**: `/Users/guoliangwang/Code/Python/caracal` -- **Documentation**: `caracal/docs/` -- **Examples**: `caracal/examples/` -- **Tests**: `caracal/tests/` - ---- - -*Document created: 2026-02-16* -*Analysis of Caracal v0.x.x* diff --git a/docs/caracal-codebase-overview.md b/docs/caracal-codebase-overview.md deleted file mode 100644 index 783d375..0000000 --- a/docs/caracal-codebase-overview.md +++ /dev/null @@ -1,187 +0,0 @@ -# Caracal Codebase Overview (Agent Identity Focus) - -## What This Project Is For - -Caracal is a **pre-execution authority enforcement system** for AI agents and automated software. -Its core goal is to ensure actions are allowed **before execution**, not just audited after the fact. - -In practical terms, Caracal sits between an agent and sensitive operations (API calls, tool use, data access) and enforces: - -- a known principal identity, -- an active authority policy, -- a valid cryptographic execution mandate, -- and fail-closed behavior on uncertainty or errors. - -The repository supports both: - -- **Developer/operator interfaces** (`caracal` CLI and Python SDK), and -- **Enforcement runtime paths** (Gateway proxy and MCP adapter service). - - -## Overall Architecture and Key Components - -Caracal is organized into several layers that map to identity, policy, enforcement, and audit. - -### 1) Identity and Authority Domain (`caracal/core`, `caracal/db/models`) - -- `core/identity.py`: - - `AgentRegistry` manages identity lifecycle (`register`, `get`, `list`) with file persistence support. - - Represents identities as `AgentIdentity` with optional parent-child hierarchy. - - Can generate key pairs for delegation when integrated with token management. -- `db/models.py` (v0.5 schema): - - `Principal`: canonical identity model (agent/user/service) with keys and optional parent. - - `AuthorityPolicy`: per-principal constraints (allowed resources/actions, TTL limits, delegation depth). - - `ExecutionMandate`: signed, time-bound authority grants with scopes and optional parent mandate. - - `AuthorityLedgerEvent`: immutable record of issuance, validation, denial, revocation. - -### 2) Cryptographic and Mandate Lifecycle (`caracal/core/crypto.py`, `core/mandate.py`, `core/authority.py`, `core/delegation.py`) - -- `core/crypto.py`: - - ECDSA P-256 signing and verification for mandates and Merkle roots. - - Canonicalized JSON + SHA-256 for deterministic verification behavior. -- `core/mandate.py` (`MandateManager`): - - Issues mandates after validating policy constraints and scope subset rules. - - Supports delegation through parent mandates with depth and validity checks. - - Supports revocation (with optional cascading down delegation chains). -- `core/authority.py` (`AuthorityEvaluator`): - - Validates mandates at request time (signature, expiry, revocation, action/resource scope, delegation chain). - - Implements explicit fail-closed semantics. -- `core/delegation.py` (`DelegationTokenManager`): - - Generates and validates ASE-style JWT delegation tokens (ES256), tied to parent/child identities. - -### 3) Enforcement Surfaces (`caracal/gateway`, `caracal/mcp`) - -- `gateway/proxy.py`: - - FastAPI reverse proxy for network-level enforcement. - - Request flow: authenticate -> replay-check -> mandate validate -> forward -> meter -> respond. - - Requires `X-Caracal-Mandate-ID` and `X-Caracal-Target-URL` headers for authority checks. -- `gateway/auth.py`: - - Supports authentication via mTLS, JWT, or API key. - - Maps successful auth to registered agent identity. -- `mcp/adapter.py`: - - Intercepts MCP tool calls and resource reads. - - Validates mandate against operation (`execute`/`read`) and target tool/resource. - - Emits metering, with fail-closed denial on validation errors. -- `mcp/service.py`: - - Standalone HTTP service wrapping MCP enforcement endpoints. - -### 4) Ledger, Integrity, and Operations (`caracal/core/authority_ledger.py`, `caracal/merkle`, `caracal/monitoring`, `caracal/db`) - -- `core/authority_ledger.py`: - - Writes immutable authority events and supports filtered querying. -- `merkle/*`: - - Cryptographic integrity pipeline (root signing, verification, snapshot/backfill/recovery paths). -- `db/connection.py` + migrations: - - SQLAlchemy-based DB setup, pooling, session lifecycle, health checks, Alembic migrations. -- `monitoring/*`: - - Health and metrics endpoints (Prometheus-compatible). - -### 5) Interfaces and Experience Layers (`caracal/cli`, `caracal/sdk`, `caracal/flow`) - -- `cli/main.py`: - - Main entrypoint for administrative workflows (agents, policies, authority, ledger, db, mcp-service, etc.). -- `sdk/client.py`: - - Programmatic interface for integration into applications/agent frameworks. -- `flow/main.py`: - - `caracal-flow` interactive TUI for onboarding and operational management. - - -## How It Relates to Agent Identity - -Agent identity is central to Caracal’s trust model. Identity is not just naming; it is a cryptographic and governance anchor. - -### Identity Model - -- Identity entities are represented as: - - `AgentIdentity` in JSON/file-backed flows (legacy/developer workflows), and - - `Principal` in DB-backed authority enforcement flows (v0.5 model). -- Principals can form parent-child hierarchies for delegated authority chains. - -### Cryptographic Binding - -- Mandates are signed by issuer private keys and verified using issuer public keys. -- Delegation tokens are also cryptographically signed (ES256), binding parent/child identity relationships. -- This means authority is both policy-constrained and cryptographically attestable. - -### Policy-Bound Identity - -- A principal’s allowed actions and resources are constrained by `AuthorityPolicy`. -- `MandateManager.issue_mandate()` enforces that issued scope/TTL/delegation depth stay within policy. -- Delegated mandates must be strict subsets of parent scope and validity windows. - -### Runtime Enforcement with Identity Context - -- Gateway and MCP adapter both require identity context + mandate. -- `AuthorityEvaluator.validate_mandate()` checks: - - who issued the mandate, - - who it authorizes (subject), - - whether operation/resource is in scope, - - whether chain and timing are valid. -- Any ambiguity/error defaults to denial (fail-closed). - -### Identity-Centric Auditability - -- Authority decisions are persisted to immutable authority ledger events keyed by principal/mandate. -- This creates a traceable chain of identity -> policy -> mandate -> decision. - - -## Main Entry Points and Workflows - -## Runtime and CLI Entry Points - -- Python package scripts (`pyproject.toml`): - - `caracal` -> `caracal.cli.main:cli` - - `caracal-flow` -> `caracal.flow.main:main` -- Additional service-style entry: - - `caracal mcp-service start` (wraps MCP adapter HTTP service) - - Gateway proxy is launched programmatically from `caracal.gateway.proxy.GatewayProxy`. - -### Core Administrative Workflow - -1. Register identity (`agent register` / principal creation path). -2. Define policy bounds (`policy create`). -3. Issue mandate (`authority issue`). -4. Use mandate at enforcement point (Gateway/MCP). -5. Validate or deny operation in real time (`AuthorityEvaluator`). -6. Persist authority decision to immutable authority ledger. -7. Revoke/rotate/delegate as needed (`authority revoke` / `authority delegate`). - -### Gateway Enforcement Workflow - -1. Receive request with auth credentials + Caracal headers. -2. Authenticate principal identity. -3. Check replay protection. -4. Load and validate mandate against action/resource. -5. If allowed, forward request; if denied, return fail-closed error. -6. Emit usage/metering and expose metrics/health for ops. - -### MCP Enforcement Workflow - -1. Intercept MCP call/read with context metadata. -2. Extract principal identity + mandate ID. -3. Validate mandate against tool or resource operation. -4. Execute/forward only if authorized. -5. Emit metering and authority outcomes. - - -## Architectural Notes and Observations - -- The codebase currently shows a transition from **agent-centric v0.1/v0.2 models** to **principal-centric v0.5 authority models**. Both concepts coexist in parts of the repo. -- Enforcement logic consistently follows **fail-closed** principles across authority, gateway, and MCP adapters. -- The repository includes open-source stubs for enterprise-only modules (`caracal/enterprise`) while keeping core authority features available in OSS. -- Caracal supports both local/file-oriented workflows and production-oriented DB + service deployments, with compatibility modes for rollout. - - -## Direct Answers to Your Four Questions - -1. **Main purpose** - Caracal prevents unauthorized agent actions by enforcing cryptographically verifiable, time-bound authority *before* execution. - -2. **Overall architecture and key components** - It is layered around identity/principals, authority policies, signed mandates, runtime enforcement surfaces (Gateway/MCP), immutable authority ledgering, and operational tooling (CLI/SDK/TUI/monitoring). - -3. **Relation to agent identity** - Identity is foundational: mandates are issued to identities, signed by identities, validated against identity-bound policies, and audited per identity in an immutable ledger. - -4. **Main entry points and workflows** - Entry points are `caracal`, `caracal-flow`, and MCP/gateway services; workflows center on register -> policy -> issue mandate -> enforce at runtime -> log -> revoke/delegate. diff --git a/docs/northstar.md b/docs/northstar.md deleted file mode 100644 index 24a7081..0000000 --- a/docs/northstar.md +++ /dev/null @@ -1,103 +0,0 @@ -### **Architecture Deep Dive: Caracal vs. The "Azure AD" Approach** - -Your background in Azure Active Directory (Entra ID) is the perfect lens for this. Caracal is essentially trying to build a **"Conditional Access Policy + Managed Identity"** system specifically for AI Agents, but it reinvents several wheels that you likely already know how to build better using standard protocols (OIDC, OAuth 2.0). - -Here is the technical breakdown of how Caracal works, followed by the "North Star" architecture for your **Identity Bridge SDK**. - ---- - -### **1. Deconstructing Caracal: The "Agent Identity" Stack** - -Caracal solves the "Who authorized this?" problem. Based on its architecture (Gateway, Core, Flow), here is how it maps to the concepts you know from Azure AD: - -| Caracal Component | Azure AD Equivalent | How It Works in Caracal | -| --- | --- | --- | -| **Ephemeral Mandate** | **Managed Identity / Access Token** | Instead of a static API Key, the agent requests a short-lived "Mandate" (Token) from the Caracal Core. This token contains specific **scopes** (e.g., `budget:50.00`, `zone:prod`). | -| **Gateway (PEP)** | **Conditional Access Policy** | A reverse proxy that sits in front of the tools. It intercepts the request, inspects the "Mandate," checks the **Budget** (Quota), and allows/blocks the traffic. | -| **Immutable Ledger** | **Sign-in Logs + Merkle Tree** | Every approved action is hashed and chained (likely in Postgres) to create a tamper-proof audit trail. It proves *who* delegated the action. | -| **Delegation Chain** | **On-Behalf-Of (OBO) Flow** | It tracks the "Call Stack" of agents. Agent A calls Agent B; the Mandate includes the chain `A -> B` so you know the root cause. | - -**The Weakness in Caracal:** -It appears to use a **Custom Protocol** for mandates and a **Centralized Ledger** (Postgres). This is a "Walled Garden." If an enterprise already uses Okta or Azure AD, they don't want a separate identity provider just for agents. - ---- - -### **2. The "Better SDK" Opportunity: "Entra for Agents"** - -Instead of building a competitor to Caracal (a full Identity Provider), you should build the **Bridge SDK** that forces standard Identity Providers (IdPs) to behave like Agent Guardians. - -**Name Idea:** `@predicatesystems/identity-bridge` - -#### **Core Feature 1: The "Context-Aware" OBO Flow** - -In Azure AD, the "On-Behalf-Of" flow is used when a User calls an API, which calls another API. -**Your Innovation:** Adapt this for **Agent-to-Agent** delegation with **State Context**. - -* **The Problem:** Standard JWTs only hold static roles (`Role: Admin`). They don't hold dynamic state (`Budget: $50`). -* **The Solution:** Your SDK wraps the standard `client_credentials` grant. -1. **Agent A** requests a token from Azure AD/Okta. -2. **Your SDK** intercepts this and appends a **"Context Assertion"** (using the `assertion` parameter in OAuth). -3. This assertion contains the **Predicate State** (e.g., `snapshot_hash`, `budget_remaining`). -4. **Result:** The downstream service receives a standard JWT, but your SDK verifies the *attached* Context Assertion against your Predicate Runtime before processing. - - - -#### **Core Feature 2: The "Sidecar" Cached Credential Service** - -You mentioned experience with **Identity Backup / Cached Credentials**. This is critical for agents because they run long jobs (hours/days) where tokens expire. - -* **Caracal's Way:** Re-issues new mandates centrally. -* **Your Way (The Azure Pattern):** A local **Sidecar Daemon**. -* The Agent talks to `localhost:8080` (Your Sidecar). -* The Sidecar manages the **Refresh Tokens** and **MFA challenges** (if needed). -* **The Moat:** The Sidecar *refuses* to refresh the token if the **Predicate Invariants** are violated. -* *Example:* Agent tries to refresh token to buy server. Sidecar checks `window.scrollY` (State). If state is "Error Page," Sidecar **kills the token**. The agent effectively loses its identity when it hallucinates. - - - -#### **Core Feature 3: The "Verifiable Ledger" (Not just a DB)** - -Caracal uses a centralized DB ledger. You can build a **Decentralized Audit Log** using **Verifiable Credentials (VCs)**. - -* Every time an agent takes an action, your SDK generates a **Cryptographic Proof** (User Key + State Hash). -* This proof is appended to a standard log (Splunk/Datadog), but because it is signed, it is mathematically immutable even if the log is text. -* **Pitch:** "We don't force you to use *our* database. We give you *proofs* you can store in *your* Splunk." - ---- - -### **3. Strategic Roadmap** - -**Step 1: Build the `ContextMixin**` -Create a lightweight library that works with standard Python/Node Identity libraries (`msal`, `auth0`). - -```python -# The "Better" SDK Approach -from predicate.identity import EntraAgentAuth - -# Wraps standard MSAL but injects Predicate State into the token request -auth = EntraAgentAuth( - client_id="...", - predicate_policy=my_wasm_policy -) - -# If the agent drifts (State violation), this function throws an error -# The agent literally CANNOT get a token to do harm. -token = auth.get_token_for_action("buy_ticket") - -``` - -**Step 2: The "Token revocation" Hook** -Since you know cached credential services: - -* Build a mechanism where a **Predicate Block Event** (Runtime Verification failure) triggers a **Revocation Signal** to the Identity Provider (or deletes the local cached token). -* **Narrative:** "When Predicate detects a hallucination, we don't just block the click. We **revoke the passport.**" - -### **Summary** - -Caracal is building a "Parallel Identity System." -You can build the **"Identity Enforcer"** that sits *on top* of existing systems (Azure AD, AWS IAM, Okta) as well as a parallel identity system - -* **Caracal:** "Use our tokens." -* **Predicate Identity Bridge:** "Use your existing Azure AD tokens, but we will strip them away the millisecond the agent acts weird." - -This leverages your Azure background to build something highly defensible that Enterprise CISOs will love (because they don't have to buy a new IdP). diff --git a/docs/predicate-sdk-verification-analysis.md b/docs/predicate-sdk-verification-analysis.md deleted file mode 100644 index c5ce30c..0000000 --- a/docs/predicate-sdk-verification-analysis.md +++ /dev/null @@ -1,1235 +0,0 @@ -# Predicate Python SDK: Adding Determinism to AI Agents Through Verification - -## Executive Summary - -The **Predicate Python SDK** (formerly Sentience SDK) is a verification-first runtime layer that transforms browser-based AI agents from probabilistic "hope-based" automation to deterministic, observable, and reproducible systems. It implements a **predicate-based verification system** that gates agent progress, ensures actions have effects, and provides complete observability through traces and artifacts. - -**Repository**: `/Users/guoliangwang/Code/Sentience/sdk-python` - -**Core Value Proposition**: *"Replace hope with proof in AI agent automation"* - ---- - -## 1. The Determinism Problem in AI Agents - -### 1.1 Traditional Agent Failures - -Without verification, AI agents exhibit non-deterministic behaviors: - -```python -# Traditional agent code (no verification) -await page.goto("https://example.com") -await page.click("button") # Did it work? Unknown. -await page.scroll(600) # Did page advance? Unknown. -# Agent continues blindly... -``` - -**Problems:** -- **Silent failures**: Actions fail but agent continues -- **Scroll ghosts**: `scroll()` returns but page didn't advance (overlay, focus issue) -- **Navigation failures**: Page loads but critical elements missing -- **Non-reproducible**: Same code produces different outcomes -- **Expensive**: Every step requires vision model ($$$) -- **No debugging**: When failure occurs, no evidence preserved - -### 1.2 The Core Insight - -> **Probabilistic reasoning cannot produce deterministic accountability.** - -Agents make probabilistic decisions (via LLMs), but **action effects should be deterministically verified**. - ---- - -## 2. Predicate's Verification System - -### 2.1 Core Architecture - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Agent Execution Loop │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ ┌────────────────┐ │ -│ │ Agent Action │ (LLM-driven: probabilistic) │ -│ │ click/scroll │ │ -│ └───────┬────────┘ │ -│ │ │ -│ ▼ │ -│ ┌────────────────┐ │ -│ │ Snapshot │ Capture current page state │ -│ │ (DOM + Screen) │ │ -│ └───────┬────────┘ │ -│ │ │ -│ ▼ │ -│ ┌────────────────────────────────┐ │ -│ │ Predicate Verification │ (Deterministic) │ -│ │ ├─ url_contains("cart") │ │ -│ │ ├─ exists("role=button") │ │ -│ │ └─ scroll_advanced(min=50px) │ │ -│ └───────┬────────────────────────┘ │ -│ │ │ -│ ┌───┴───┐ │ -│ │ │ │ -│ PASS FAIL │ -│ │ │ │ -│ ▼ ▼ │ -│ Continue Halt + Capture Artifacts │ -│ (trace, clip, snapshot, diagnostics) │ -│ │ -└─────────────────────────────────────────────────────────────┘ -``` - -### 2.2 The Predicate Type - -At the core is a simple, composable type: - -```python -from typing import Callable -from dataclasses import dataclass - -# A predicate is a function that checks current state -type Predicate = Callable[[AssertContext], AssertOutcome] - -@dataclass -class AssertContext: - """Context passed to predicates for evaluation""" - snapshot: Snapshot | None # Current page snapshot - url: str | None # Current URL - step_id: str | None # For trace correlation - downloads: list[dict] # Download tracking - -@dataclass -class AssertOutcome: - """Result of predicate evaluation""" - passed: bool # Did it pass? - reason: str # Human-readable reason - details: dict # Structured details (matched elements, etc.) -``` - -**Key Design**: Predicates are **pure functions** - no side effects, just state → boolean + reason. - -### 2.3 Three Verification Methods - -```python -class AgentRuntime: - def assert_( - self, - predicate: Predicate, - label: str, - required: bool = False - ) -> bool: - """ - Evaluate predicate immediately, record outcome. - - If required=True and fails: - - Persist failure artifacts (frames, clip, snapshot) - - Emit 'verification' trace event - - Optionally halt execution - - Returns: passed (bool) - """ - - def check( - self, - predicate: Predicate, - label: str, - required: bool = False - ) -> AssertionHandle: - """ - Fluent form for retry logic. - - Usage: - result = await runtime.check( - exists("role=heading"), - label="page_stable" - ).eventually(timeout_s=10, poll_ms=250) - - Returns: AssertionHandle for .once() or .eventually() - """ - - def assert_done( - self, - predicate: Predicate, - label: str - ) -> bool: - """ - Task completion marker. - - Equivalent to assert_(predicate, label, required=True) - but also sets self._task_done = True. - - Used to mark successful task completion. - """ -``` - ---- - -## 3. Predicate Library: Deterministic Checks - -### 3.1 URL Predicates - -```python -from predicate.verification import url_contains, url_matches - -# Substring match -runtime.assert_( - url_contains("/cart"), - label="on_cart_page", - required=True -) - -# Regex match -runtime.assert_( - url_matches(r"^https://.*\.example\.com/checkout"), - label="on_checkout_subdomain" -) -``` - -**Determinism**: URL checks are 100% deterministic - no model inference needed. - -### 3.2 Element Existence Predicates - -```python -from predicate.verification import exists, not_exists, element_count - -# Element exists -runtime.assert_( - exists("role=button text~'Add to Cart'"), - label="add_to_cart_button_visible", - required=True -) - -# Element does NOT exist (e.g., loading spinner gone) -runtime.assert_( - not_exists("text~'Loading...'"), - label="page_loaded" -) - -# Element count bounds -runtime.assert_( - element_count("role=listitem", min=5, max=20), - label="search_results_count" -) -``` - -**Determinism**: Uses semantic selector engine (role, text, value matchers) with deterministic DOM traversal. - -### 3.3 State-Aware Predicates (Pro/Enterprise) - -Require SmartElements with state fields: - -```python -from predicate.verification import ( - is_enabled, is_disabled, is_checked, - value_equals, value_contains -) - -# Checkbox/radio state -runtime.assert_( - is_checked("role=checkbox label~'Terms'"), - label="terms_accepted", - required=True -) - -# Input value -runtime.assert_( - value_equals("role=textbox name~'email'", "user@example.com"), - label="email_entered" -) - -# Button state -runtime.assert_( - is_enabled("role=button text~'Submit'"), - label="submit_enabled" -) -``` - -**Determinism**: Direct state inspection (not model-inferred) from browser's accessibility tree. - -### 3.4 Combinator Predicates - -```python -from predicate.verification import all_of, any_of, custom - -# AND logic - all must pass -page_ready = runtime.assert_( - all_of( - url_contains("/dashboard"), - exists("role=heading"), - not_exists("text~'Loading'") - ), - label="dashboard_fully_loaded", - required=True -) - -# OR logic - any can pass -has_success_indicator = runtime.assert_( - any_of( - exists("text~'Success'"), - exists("text~'Completed'"), - exists("role=alert text~'Done'") - ), - label="task_succeeded" -) - -# Custom predicate -def cart_total_exceeds(min_usd: float): - def predicate(ctx: AssertContext) -> AssertOutcome: - # Parse cart total from snapshot - total = parse_cart_total(ctx.snapshot) - passed = total >= min_usd - return AssertOutcome( - passed=passed, - reason=f"Cart total ${total} >= ${min_usd}" if passed else f"Cart total ${total} < ${min_usd}", - details={"total_usd": total, "min_usd": min_usd} - ) - return predicate - -runtime.assert_( - custom(cart_total_exceeds(100.0), label="min_total"), - label="cart_minimum_met", - required=True -) -``` - ---- - -## 4. How Verification Adds Determinism - -### 4.1 Gated Progress (Core Principle) - -**Without Verification** (probabilistic): -```python -# Traditional agent - no gates -await page.goto("https://example.com") -await page.click("button") # Hope it worked -await page.scroll(600) # Hope page advanced -result = await extract_data() # Hope data is there -``` - -**With Verification** (deterministic): -```python -# Predicate agent - explicit gates -runtime.begin_step("Navigate and verify") - -await page.goto("https://example.com") -await runtime.snapshot() - -# GATE: Verify page loaded before proceeding -runtime.assert_( - all_of( - url_contains("example.com"), - exists("role=heading"), - not_exists("text~'Error'") - ), - label="page_loaded", - required=True # <-- HALT if fails -) - -# Only reached if assertion passed -await page.click("button") -await runtime.snapshot() - -# GATE: Verify click had effect -runtime.assert_( - exists("role=dialog"), - label="dialog_opened", - required=True -) - -# GATE: Verify scroll advanced -ok = await runtime.scroll_by(600, verify=True, min_delta_px=50) -if not ok: - raise RuntimeError("Scroll blocked - likely overlay or nested scroller") - -# Extract data only after all gates passed -result = await extract_data() -``` - -**Determinism Added**: -- Actions only proceed after verification -- Failures detected immediately (not later) -- Evidence captured at point of failure -- Reproducible: same gates trigger on same failures - -### 4.2 Deterministic Scroll Verification (Unique) - -**The Problem**: `scroll()` returns success but page didn't advance (overlay, focus issue, nested scroller). - -**Predicate's Solution**: -```python -ok = await runtime.scroll_by( - dy=600, - verify=True, # Check scrollTop actually changed - min_delta_px=50, # Minimum pixel movement required - label="scroll_effective", - required=True, - timeout_s=5.0 -) - -if not ok: - # Scroll was blocked - diagnose why - snapshot = await runtime.snapshot() - # Check for overlay: exists("role=dialog") - # Check for focus: exists("[aria-hidden='false']") -``` - -**Implementation** (from `agent_runtime.py` lines 778-856): -```python -async def scroll_by(self, dy: int, verify: bool = False, min_delta_px: int = 50): - # 1. Get initial scroll position - before = await self.backend.refresh_page_info() - initial_scroll_top = before.scroll_top - - # 2. Execute scroll - await self.backend.wheel(delta_y=dy) - await asyncio.sleep(0.3) # Allow scroll to complete - - # 3. Get final scroll position - after = await self.backend.refresh_page_info() - final_scroll_top = after.scroll_top - - # 4. Verify scroll advanced - delta = abs(final_scroll_top - initial_scroll_top) - advanced = delta >= min_delta_px - - # 5. Record outcome - if verify: - outcome = AssertOutcome( - passed=advanced, - reason=f"Scroll advanced {delta}px (min: {min_delta_px}px)", - details={ - "initial_scroll_top": initial_scroll_top, - "final_scroll_top": final_scroll_top, - "delta_px": delta, - "min_delta_px": min_delta_px - } - ) - self._record_outcome(outcome, label, required=False, kind="scroll_verify") - - return advanced -``` - -**Determinism**: Scroll success is **provable** (pixel delta >= threshold), not assumed. - -### 4.3 Failure Detection with Evidence - -**Without Verification**: -```python -# Agent fails silently, no evidence -await page.click("nonexistent-button") # Silently fails -# ... 50 steps later ... -# Task fails with "data not found" - no idea where it broke -``` - -**With Verification**: -```python -runtime.begin_step("Click submit button") -await runtime.snapshot() - -# This assertion will fail if button doesn't exist -runtime.assert_( - exists("role=button text~'Submit'"), - label="submit_button_exists", - required=True # <-- Triggers artifact capture -) - -# When assertion fails: -# 1. Last 15s of frames captured -# 2. MP4 clip generated (via ffmpeg) -# 3. Snapshot + diagnostics saved -# 4. Trace event emitted with failure reason -# 5. Run halts (if required=True) -``` - -**Artifacts Captured** (from `failure_artifacts.py`): - -| Artifact | Purpose | -|----------|---------| -| **Frame buffer** | Last 15s of viewport screenshots (JPEG @ 0.5-2 fps) | -| **Video clip** | MP4 generated from frames | -| **Snapshot** | HTML representation of DOM at failure | -| **Diagnostics** | Layout analysis, modals, CAPTCHA detection, ordinality hints | -| **Trace event** | Timestamped event with reason code, details, correlation ID | - -**Determinism**: Failures are **observable** and **reproducible** via trace replay in Studio. - -### 4.4 Bounded Retries (Not Infinite Loops) - -**Without Verification**: -```python -# Agent retries forever, wasting time/cost -while True: - try: - element = page.locator("button") - element.click() - break - except: - await asyncio.sleep(1) # Infinite loop -``` - -**With Verification**: -```python -# Bounded retry with .eventually() -result = await runtime.check( - exists("role=button text~'Submit'"), - label="submit_button_visible" -).eventually( - timeout_s=10, # Max 10 seconds - poll_ms=250 # Poll every 250ms -) - -if result.passed: - # Element appeared within timeout - await page.click("role=button text~'Submit'") -else: - # Timeout reached, element never appeared - # Failure artifacts captured - raise RuntimeError(f"Button not found: {result.reason}") -``` - -**Determinism**: Retries are **bounded** (timeout) and **recorded** (trace events show poll attempts). - -### 4.5 Smart Failure Intelligence - -When assertions fail, Predicate computes **why**: - -```python -# Example: Assertion fails -runtime.assert_( - exists("role=button text~'Submitt'"), # Typo: "Submitt" - label="submit_button", - required=True -) - -# Outcome includes suggestions: -outcome = AssertOutcome( - passed=False, - reason="No element matches selector", - details={ - "reason_code": "selector_not_found", - "nearest_matches": [ - { - "selector": "role=button text~'Submit'", # <-- Close match - "similarity": 0.95, - "text": "Submit" - }, - { - "selector": "role=button text~'Cancel'", - "similarity": 0.4, - "text": "Cancel" - } - ], - "suggestions": [ - "Did you mean: role=button text~'Submit'?", - "Check for typos in selector" - ] - } -) -``` - -**Determinism**: Failures are **actionable** with suggestions, not just "element not found". - ---- - -## 5. Step-Based Verification Flow - -### 5.1 Complete Flow Example - -```python -from predicate import AgentRuntime, get_extension_dir -from predicate.verification import exists, url_contains, all_of - -# Initialize runtime -runtime = AgentRuntime.from_playwright_page(page, tracer=tracer) - -# ======================================== -# Step 1: Navigate and verify page load -# ======================================== -runtime.begin_step("Navigate to example.com") - -await page.goto("https://example.com") -await runtime.snapshot() - -# Verify page loaded correctly -page_loaded = runtime.assert_( - all_of( - url_contains("example.com"), - exists("role=heading"), - not_exists("text~'Error'") - ), - label="page_loaded", - required=True -) - -await runtime.emit_step_end() - -# ======================================== -# Step 2: Verify interactive elements -# ======================================== -runtime.begin_step("Check interactive elements") - -await runtime.snapshot() - -# Check multiple elements -runtime.assert_(exists("role=link"), "has_links") -runtime.assert_(exists("role=heading"), "has_heading") -runtime.assert_(element_count("role=link", min=1), "link_count") - -# Composite check with bounded retry -page_ready = await runtime.check( - all_of( - exists("role=link"), - not_exists("text~'Loading'") - ), - label="page_fully_ready" -).eventually(timeout_s=10, poll_ms=250) - -await runtime.emit_step_end() - -# ======================================== -# Step 3: Task completion -# ======================================== -runtime.begin_step("Verify task completion") - -await runtime.snapshot() - -# Mark task as done -task_done = runtime.assert_done( - exists("text~'Example Domain'"), - label="reached_goal" -) - -await runtime.emit_step_end() -``` - -### 5.2 Trace Events Emitted - -For each step, these events are emitted to `Tracer`: - -1. **snapshot** - Page state captured (includes screenshot_base64, elements) -2. **verification** - Each assertion result (passed, reason, details) -3. **step_end** - Step complete with accumulated assertions - -**Example trace event** (verification): -```json -{ - "type": "verification", - "timestamp": "2025-01-15T10:15:23.456Z", - "step_id": "step-1", - "label": "page_loaded", - "passed": true, - "required": true, - "reason": "All predicates passed", - "details": { - "predicates": [ - { - "type": "url_contains", - "substring": "example.com", - "matched": true - }, - { - "type": "exists", - "selector": "role=heading", - "matched": true, - "element_id": "elem-42" - } - ] - }, - "correlation_id": "run-abc123" -} -``` - ---- - -## 6. Integration with Agent Frameworks - -### 6.1 Browser-Use Integration - -```python -from browser_use import BrowserSession, BrowserProfile -from predicate import get_extension_dir, AgentRuntime -from predicate.backends import BrowserUseAdapter - -# 1. Load Predicate extension in browser-use -extension_dir = get_extension_dir() -profile = BrowserProfile(args=[f"--load-extension={extension_dir}"]) -session = BrowserSession(browser_profile=profile) -await session.start() - -# 2. Create BrowserBackend from session -adapter = BrowserUseAdapter(session) -backend = await adapter.create_backend() - -# 3. Create runtime -runtime = AgentRuntime(backend=backend, tracer=tracer) - -# 4. browser-use drives navigation, Predicate verifies -page = await session.get_current_page() -await page.goto("https://example.com") - -runtime.begin_step("Verify page loaded") -await runtime.snapshot() -runtime.assert_(exists("role=heading"), label="has_heading", required=True) -await runtime.emit_step_end() -``` - -**Key Design**: Predicate's `BrowserBackend` protocol is minimal (10 methods): -- `refresh_page_info()` - viewport + scroll -- `eval(js)` / `call(fn, args)` - JavaScript execution -- `screenshot_png()` / `screenshot_jpeg()` -- `mouse_move()` / `mouse_click()` / `wheel()` -- `type_text()` / `press_key()` - -Any browser automation framework can implement this protocol. - -### 6.2 Playwright Integration (Direct) - -```python -from playwright.async_api import async_playwright -from predicate import AgentRuntime - -async with async_playwright() as p: - browser = await p.chromium.launch() - page = await browser.new_page() - - # Direct Playwright page → AgentRuntime - runtime = AgentRuntime.from_playwright_page(page=page, tracer=tracer) - - # Use runtime for verification - await page.goto("https://example.com") - await runtime.snapshot() - runtime.assert_(exists("role=heading"), "has_heading", required=True) -``` - -### 6.3 Sidecar Mode (Attach to Existing Agents) - -```python -from predicate import SentienceDebugger - -# Your framework (LangGraph, AutoGen, custom) drives execution -# Predicate just snapshots + verifies - -dbg = SentienceDebugger.attach(page, tracer=tracer) - -async with dbg.step("agent_step: navigate"): - # Your agent does work - await your_agent.navigate_to_page() - - # Snapshot result - await dbg.snapshot() - - # Verify with bounded retry - result = await dbg.check( - exists("role=heading"), - label="heading_present" - ).eventually(timeout_s=10) - - if not result.passed: - raise RuntimeError(f"Navigation failed: {result.reason}") -``` - -### 6.4 LangChain Integration (Tools-based) - -From `/predicate/integrations/langchain/`: - -```python -from langchain.agents import AgentExecutor -from predicate.integrations.langchain import get_langchain_tools - -# Get LLM-callable tools -tools = get_langchain_tools(runtime) -# tools = [ -# "sentience_snapshot", -# "sentience_click", -# "sentience_type_text", -# "sentience_scroll", -# "sentience_assert_exists", -# ... -# ] - -agent = AgentExecutor.from_llm_and_tools( - llm=llm, - tools=tools, - verbose=True -) - -result = agent.run("Navigate to example.com and verify heading exists") -``` - ---- - -## 7. Controlled Perception (Token Efficiency) - -### 7.1 The Token Problem - -**Without Predicate**: -```python -# Full DOM dump to LLM (thousands of tokens) -html = await page.content() -prompt = f"Find the submit button in: {html}" # 10,000+ tokens -response = llm.complete(prompt) # $$$ expensive -``` - -**With Predicate**: -```python -# Semantic snapshot with pruning -snapshot = await runtime.snapshot( - limit=50, # Max 50 elements - filter=SnapshotFilter( - clickable_only=True, - in_viewport_only=True, - min_importance=100 - ), - screenshot=False # No vision model needed -) - -# Snapshot contains 50 SmartElements (with state) -# vs. 1000s of raw DOM nodes -# → Lower token cost, faster LLM inference -``` - -### 7.2 SmartElements (Pro/Enterprise) - -Server-side refinement adds state fields: - -```json -{ - "id": "elem-42", - "role": "button", - "text": "Submit", - "value": null, - "state": { - "enabled": true, - "disabled": false, - "checked": false, - "expanded": false - }, - "bbox": {"x": 100, "y": 200, "width": 80, "height": 40}, - "importance": 250 -} -``` - -**Determinism**: State fields are directly inspected (not model-inferred) from browser's accessibility tree. - ---- - -## 8. Failure Artifacts & Debugging - -### 8.1 Automatic Artifact Capture - -When `required=True` assertion fails: - -```python -# Automatically triggered -self._persist_failure_artifacts(reason="assert_failed:selector_not_found") -``` - -**Captured Artifacts**: - -| Artifact | Purpose | Size | -|----------|---------|------| -| **Frame buffer** | Last 15s of viewport screenshots (JPEG @ 0.5-2 fps) | ~300KB/frame | -| **Video clip** | MP4 generated from frames (requires ffmpeg) | ~5MB for 15s | -| **Snapshot** | HTML representation of DOM at failure | ~50KB | -| **Diagnostics** | Layout analysis, modals, CAPTCHA detection | ~10KB | -| **Trace event** | Timestamped event with reason code, details | ~5KB | - -**Configuration**: -```python -from predicate.failure_artifacts import FailureArtifactsOptions - -options = FailureArtifactsOptions( - buffer_seconds=15.0, - persist_mode="onFail", # or "always" - frame_format="jpeg", - fps=0.5, - redact_snapshot_values=True, # PII masking -) - -runtime = AgentRuntime(..., failure_artifacts_options=options) -``` - -### 8.2 Studio Replay - -All trace events → Sentience Studio for: -- Step-by-step replay -- Snapshot inspection at each step -- Assertion timeline (passed/failed) -- Video clip playback -- Diagnostics analysis - -**Determinism**: Full run is **reproducible** via trace replay. - ---- - -## 9. Comparison: With vs. Without Verification - -| Aspect | Without Predicate | With Predicate | -|--------|-------------------|----------------| -| **Action Verification** | "Hope it worked" | `assert_(exists(...))` - proved | -| **Scroll Verification** | Returns but page didn't advance | `scroll_by(verify=True)` - checked | -| **Failure Detection** | Silent, discovered later | Immediate with evidence | -| **Debugging** | No artifacts, guess where it broke | Trace + clip + snapshot + diagnostics | -| **Retries** | Infinite loops or hardcoded | Bounded `.eventually(timeout_s=10)` | -| **Token Cost** | Full DOM dump (10K+ tokens) | Semantic snapshot (50 elements) | -| **Vision Models** | Required for every step | Optional (local 3B models sufficient) | -| **Reproducibility** | Non-deterministic | Trace-driven replay | -| **Privacy** | Screenshots sent to LLM | Optional redaction, local processing | -| **Observability** | Logs if lucky | Full trace with correlation IDs | - ---- - -## 10. Key Design Principles - -### 10.1 Minimal Backend Protocol - -Only 10 methods required to implement `BrowserBackend`: - -```python -class BrowserBackend(Protocol): - async def refresh_page_info(self) -> PageInfo: ... - async def eval(self, js: str) -> Any: ... - async def call(self, fn: str, args: list) -> Any: ... - async def screenshot_png(self) -> bytes: ... - async def screenshot_jpeg(self, quality: int) -> bytes: ... - async def mouse_move(self, x: int, y: int): ... - async def mouse_click(self, x: int, y: int): ... - async def wheel(self, delta_y: int): ... - async def type_text(self, text: str): ... - async def press_key(self, key: str): ... -``` - -**Result**: Works with Playwright, Puppeteer, CDP, browser-use, Selenium (via adapters). - -### 10.2 Composable Predicates - -Predicates are **pure functions** - easy to compose: - -```python -# Base predicates -p1 = url_contains("/cart") -p2 = exists("role=button") -p3 = not_exists("text~'Loading'") - -# Composite predicates -page_ready = all_of(p1, p2, p3) -has_success = any_of( - exists("text~'Success'"), - exists("text~'Done'") -) -``` - -### 10.3 Fail-Closed Semantics - -When `required=True`, failures **halt execution**: - -```python -runtime.assert_( - exists("role=button"), - label="button_exists", - required=True # <-- HALT if fails -) - -# This line only reached if assertion passed -await page.click("role=button") -``` - -**Result**: Failures detected immediately, not propagated silently. - -### 10.4 Trace-Driven Observability - -Every event indexed + queryable: - -```python -# Query trace events -events = tracer.query( - step_id="step-1", - event_type="verification", - passed=False # Only failures -) - -for event in events: - print(f"Failed: {event['label']} - {event['reason']}") -``` - ---- - -## 11. Real-World Example: E-Commerce Checkout - -### Without Verification (Hope-Based) - -```python -# Traditional agent - no verification -await page.goto("https://shop.example.com") -await page.click("text=Add to Cart") # Hope it worked -await page.click("text=Checkout") # Hope cart has items -await page.fill("input[name=email]", "user@example.com") -await page.click("text=Submit") # Hope form valid -# ... agent continues blindly -``` - -**Problems**: -- If "Add to Cart" fails (out of stock), agent continues -- If cart is empty, checkout fails silently -- If form validation fails, agent doesn't know -- No evidence of what went wrong - -### With Verification (Proof-Based) - -```python -from predicate import AgentRuntime -from predicate.verification import exists, url_contains, all_of - -runtime = AgentRuntime.from_playwright_page(page, tracer=tracer) - -# ======================================== -# Step 1: Add product to cart -# ======================================== -runtime.begin_step("Add product to cart") - -await page.goto("https://shop.example.com/products/widget") -await runtime.snapshot() - -# Verify product page loaded -runtime.assert_( - all_of( - url_contains("/products/"), - exists("role=button text~'Add to Cart'"), - not_exists("text~'Out of Stock'") - ), - label="product_page_ready", - required=True -) - -await page.click("role=button text~'Add to Cart'") -await runtime.snapshot() - -# Verify item added to cart -cart_updated = await runtime.check( - exists("text~'Item added to cart'"), - label="cart_confirmation" -).eventually(timeout_s=5) - -if not cart_updated.passed: - raise RuntimeError("Failed to add item to cart") - -await runtime.emit_step_end() - -# ======================================== -# Step 2: Proceed to checkout -# ======================================== -runtime.begin_step("Navigate to checkout") - -await page.click("text=View Cart") -await runtime.snapshot() - -# Verify cart has items -runtime.assert_( - all_of( - url_contains("/cart"), - exists("role=button text~'Checkout'"), - element_count("role=row", min=1) # At least 1 cart item - ), - label="cart_has_items", - required=True -) - -await page.click("role=button text~'Checkout'") -await runtime.snapshot() - -# Verify checkout page loaded -runtime.assert_( - url_contains("/checkout"), - label="on_checkout_page", - required=True -) - -await runtime.emit_step_end() - -# ======================================== -# Step 3: Fill checkout form -# ======================================== -runtime.begin_step("Fill checkout form") - -await runtime.snapshot() - -# Verify form visible -runtime.assert_( - all_of( - exists("role=textbox name~'email'"), - exists("role=textbox name~'address'"), - exists("role=button text~'Submit'") - ), - label="checkout_form_visible", - required=True -) - -await page.fill("role=textbox name~'email'", "user@example.com") -await page.fill("role=textbox name~'address'", "123 Main St") -await runtime.snapshot() - -# Verify form filled -runtime.assert_( - all_of( - value_equals("role=textbox name~'email'", "user@example.com"), - value_contains("role=textbox name~'address'", "Main St") - ), - label="form_filled_correctly", - required=True -) - -await page.click("role=button text~'Submit'") -await runtime.snapshot() - -# Verify order success -order_success = await runtime.check( - any_of( - exists("text~'Order Confirmed'"), - exists("text~'Thank you for your order'") - ), - label="order_confirmed" -).eventually(timeout_s=10) - -if not order_success.passed: - raise RuntimeError("Order submission failed") - -await runtime.emit_step_end() - -# ======================================== -# Step 4: Mark task done -# ======================================== -runtime.begin_step("Task completion") - -await runtime.snapshot() - -task_done = runtime.assert_done( - exists("text~'Order #'), - label="order_number_visible" -) - -await runtime.emit_step_end() -``` - -**Benefits**: -- Every step is gated with verification -- Failures detected immediately with evidence -- Trace shows exact step where failure occurred -- Artifacts available for debugging -- Reproducible via trace replay - ---- - -## 12. Summary: How Verification Adds Determinism - -### Core Transformation - -``` -┌─────────────────────────────────────────────────────────────┐ -│ From → To │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ Probabilistic → Deterministic │ -│ "Hope it worked" → "Proved it worked" │ -│ Silent failures → Immediate detection + evidence │ -│ Non-reproducible → Trace-driven replay │ -│ Expensive (vision) → Cost-aware (semantic snapshots) │ -│ Flaky retries → Bounded .eventually() │ -│ No debugging → Full observability (trace/clips) │ -│ Hope-based automation → Proof-based automation │ -│ │ -└─────────────────────────────────────────────────────────────┘ -``` - -### Key Determinism Mechanisms - -1. **Gated Progress**: Actions only proceed after verification passes -2. **Proof of Effect**: Scroll, click, type all verified (not assumed) -3. **Fail-Closed**: Failures halt execution (not silent propagation) -4. **Bounded Retries**: `.eventually(timeout_s=10)` vs. infinite loops -5. **Evidence Capture**: Artifacts at point of failure (trace, clip, snapshot) -6. **Trace-Driven Replay**: Full run reproducible in Studio -7. **Smart Failures**: Reason codes + suggestions (not just "failed") -8. **State Inspection**: Direct state fields (not model-inferred) - -### When to Use Predicate - -**Good fit:** -- Production browser automation -- Multi-step workflows requiring reliability -- Cost-sensitive deployments (reduce vision model usage) -- Privacy/compliance (local processing, redaction) -- Debugging complex agent failures - -**Poor fit:** -- Prototype/demo agents (overhead not justified) -- Real-time systems (verification adds latency) -- Simple single-step scripts - ---- - -## 13. Getting Started - -### Installation - -```bash -pip install predicate-sdk -``` - -### Basic Usage - -```python -from playwright.async_api import async_playwright -from predicate import AgentRuntime, get_extension_dir -from predicate.verification import exists, url_contains - -async with async_playwright() as p: - # Launch browser with Predicate extension - extension_dir = get_extension_dir() - browser = await p.chromium.launch_persistent_context( - user_data_dir="./browser-data", - args=[f"--load-extension={extension_dir}"] - ) - page = await browser.new_page() - - # Create runtime - runtime = AgentRuntime.from_playwright_page(page) - - # Navigate and verify - await page.goto("https://example.com") - await runtime.snapshot() - - runtime.assert_( - url_contains("example.com"), - label="on_correct_domain", - required=True - ) - - runtime.assert_( - exists("role=heading"), - label="has_heading", - required=True - ) - - # Task completion - runtime.assert_done( - exists("text~'Example Domain'"), - label="task_complete" - ) - - await browser.close() -``` - ---- - -## References - -- **Repository**: `/Users/guoliangwang/Code/Sentience/sdk-python` -- **Documentation**: `sdk-python/docs/` -- **Examples**: `sdk-python/examples/` -- **Tests**: `sdk-python/tests/` - ---- - -*Document created: 2026-02-16* -*Analysis of Predicate SDK v2.x.x* diff --git a/docs/pypi-release-guide.md b/docs/pypi-release-guide.md deleted file mode 100644 index 4c61c87..0000000 --- a/docs/pypi-release-guide.md +++ /dev/null @@ -1,74 +0,0 @@ -# PyPI Release Guide - -This repo publishes two Python packages in strict order: - -1. `predicate-contracts` -2. `predicate-authority` (depends on `predicate-contracts`) - -## 1) One-time setup - -### Reserve package names on PyPI - -Ensure both package names exist under your organization: - -- `predicate-contracts` -- `predicate-authority` - -### Add GitHub repository secrets - -In GitHub repository settings -> Secrets and variables -> Actions, add: - -- `PYPI_TOKEN_PREDICATE_CONTRACTS` -- `PYPI_TOKEN_PREDICATE_AUTHORITY` - -Use PyPI API tokens scoped to each package where possible. - -## 2) Prepare a release - -1. Update versions: - - `predicate_contracts/pyproject.toml` -> `project.version` - - `predicate_authority/pyproject.toml` -> `project.version` -2. If `predicate-contracts` version changes, update dependency pin in: - - `predicate_authority/pyproject.toml` (`predicate-contracts>=X, `phase1-ci-and-release`. -3. Click **Run workflow** with input `publish=true`. -4. Workflow order is enforced: - - `publish-predicate-contracts` - - `publish-predicate-authority` (runs only after contracts publish succeeds) - -## 4) Verify published artifacts - -```bash -python -m pip install --upgrade predicate-contracts predicate-authority -python - <<'PY' -import predicate_contracts -import predicate_authority -print("ok", predicate_contracts.__name__, predicate_authority.__name__) -PY -``` - -## 5) Manual fallback publish (if needed) - -```bash -python -m pip install --upgrade build twine -python -m build predicate_contracts -twine check predicate_contracts/dist/* -TWINE_USERNAME=__token__ TWINE_PASSWORD="$PYPI_TOKEN_PREDICATE_CONTRACTS" twine upload predicate_contracts/dist/* - -python -m build predicate_authority -twine check predicate_authority/dist/* -TWINE_USERNAME=__token__ TWINE_PASSWORD="$PYPI_TOKEN_PREDICATE_AUTHORITY" twine upload predicate_authority/dist/* -``` diff --git a/docs/roadmap.md b/docs/roadmap.md deleted file mode 100644 index caa1bab..0000000 --- a/docs/roadmap.md +++ /dev/null @@ -1,370 +0,0 @@ -### Non-Web AI Agents: 2026-02-16 - -This is a billion-dollar roadmap. - -You currently own the **DOM** (Document Object Model). To build the "North Star," you need to abstract your platform to own the **State** of any environment. - -Your core value proposition is **Deterministic Verification**. This means you reject probabilistic inputs (like computer vision/pixels) and focus on **Structured State Trees**. - -Here is the strategic breakdown of how **Predicate Systems** expands beyond the browser. - ---- - -### **The Universal Architecture** - -To scale, you need to decouple your engine from the browser. - -* **Current:** `Interceptor (Playwright)` `State (DOM)` `Policy (WASM)` -* **North Star:** `Interceptor (Adapter)` `State (Universal Tree)` `Policy (WASM)` - -The **WASM Policy Engine** remains constant. You only need to build new **Adapters** for different environments. - ---- - -### **Category 1: The "Computer Use" Agents (OS Automation)** - -*Examples: Anthropic Computer Use, Apple Intelligence, Microsoft Recall.* - -These agents interact with desktop apps (Excel, Slack, Terminal). Most competitors try to use Vision (OCR/Screenshots) to verify them, which is slow and prone to error. -**Predicate’s Edge:** You hook into the OS **Accessibility Tree (AXTree)**. - -* **The Approach:** -* **Interceptor:** Build a lightweight OS Daemon (Mac/Windows) that wraps the `Accessibility API`. -* **State Snapshot:** instead of HTML DOM, you serialize the **AXTree** (Accessibility Tree). This gives you a structured JSON of every window, button, and text field on the screen. -* **Invariants:** -* `IF App == "Terminal" AND Text contains "sudo rm -rf" -> BLOCK` -* `IF App == "Zoom" AND Button == "Share Screen" -> REQUIRE_APPROVAL` - - - - -* **Why You Win:** AXTree is deterministic. Pixels are not. You can verify "Click 'Pay'" without needing to "see" the button. - -### **Category 2: The "Backend" Agents (API & SaaS)** - -*Examples: LangChain Tools, Zapier AI, AutoGPT connecting to Salesforce/Stripe.* - -These agents don't have a UI. They just chain API calls. -**Predicate’s Edge:** You become the **Firewall for Function Calling**. - -* **The Approach:** -* **Interceptor:** A Python/Node.js SDK wrapper around `requests` or `openai.chat.completions`. -* **State Snapshot:** The **JSON Payload** of the intended API request + The current **User Context** (e.g., User ID, Role). -* **Invariants:** -* `IF Tool == "Stripe_Payout" AND Amount > $500 -> BLOCK` -* `IF Tool == "Slack_Post" AND Channel == "#public" AND Content contains "password" -> BLOCK` - - -* **Why You Win:** LLMs are bad at math and strict logic. Your WASM runtime enforces business logic (e.g., "Budget Constraints") that the LLM ignores. - - - -### **Category 3: The "Data" Agents (Text-to-SQL)** - -*Examples: Enterprise RAG, Finance Chatbots querying internal DBs.* - -These agents generate SQL to answer questions. The risk is data exfiltration or destructive queries (`DROP TABLE`). -**Predicate’s Edge:** You verify the **Abstract Syntax Tree (AST)** of the query. - -* **The Approach:** -* **Interceptor:** A database driver wrapper (e.g., wrapping `SQLAlchemy` or `pg`). -* **State Snapshot:** The parsed **SQL AST** (not just the raw string). -* **Invariants:** -* `IF QueryType != "SELECT" -> BLOCK` (Read-only enforcement). -* `IF Table == "Salaries" AND UserRole != "HR" -> BLOCK` (Row/Column-level security). -* `IF Complexity_Score > 100 -> BLOCK` (Prevent Denial of Service queries). - - - - - -### **Category 4: The "Coding" Agents (Devin/GitHub Copilot)** - -*Examples: Devin, OpenDevin, Cursor.* - -These agents write and execute code. -**Predicate’s Edge:** You verify the **File System** and **Shell Command** safety. - -* **The Approach:** -* **Interceptor:** A generic "Sandboxed Shell" wrapper or LSP (Language Server Protocol) proxy. -* **State Snapshot:** The **Diff** (Code changes) and **Command Line Arguments**. -* **Invariants:** -* `IF FilePath starts_with "/etc/" -> BLOCK` -* `IF CodeImport contains "crypto_miner" -> BLOCK` -* `IF GitCommitMessage is EMPTY -> BLOCK` - - - - - ---- - -### **The North Star Roadmap** - -**Phase 1: Web (Q1-Q2 2026)** - -* **Target:** `browser-use`, Playwright agents. -* **Tech:** DOM Snapshot + Chrome Extension. -* **Goal:** Own the browser. - -**Phase 2: API & SQL (Q3-Q4 2026)** - -* **Target:** LangChain Backend Tools, Enterprise RAG. -* **Tech:** JSON Schema Validation + SQL AST Parser in WASM. -* **Why:** Easiest to build. High enterprise demand (Banks hate SQL injection). - -**Phase 3: The OS Layer (2027)** - -* **Target:** "Computer Use" Agents. -* **Tech:** Cross-platform Accessibility Tree (AXTree) serialization. -* **Why:** This is the endgame. When AI runs the whole computer, you are the seatbelt. - -### **Summary for Investors** - -"Predicate Systems started with the **Browser** because it's the noisiest environment. But our technology is not a 'Browser Tool.' It is a **State Verification Engine.** We can swap out the HTML DOM for a SQL AST, a JSON Payload, or a Mac OS Accessibility Tree, and the verification logic remains exactly the same. We are building the universal brake system for all autonomous compute." ---- -ran into this reddit post: https://www.reddit.com/r/aiagents/comments/1r655kk/how_are_you_enforcing_actionlevel_authorization/ - -**How are you enforcing action-level authorization in multi-agent systems?** - -post content: - -``` -For those building multi-agent or tool-using AI systems (e.g. agents that can call Git, Bash, APIs, MCP servers, deploy infra, trigger workflows, etc.): - -How are you handling permission scoping and revocation at execution time? - -Specifically: - -Are you relying purely on IAM + short-lived tokens? - -How do you prevent delegation chains from silently expanding over time? - -If one agent delegates to another (or invokes a tool), how do you trace who actually authorized the final action? - -Can you revoke authority mid-workflow safely? - -Is enforcement happening before execution, or are you mostly relying on logging and monitoring after the fact? - -Curious how people are solving this in production — especially as agent autonomy increases. -``` - -This seems to fall into one of the following non-web agents we talked above: - -* Category 1: The "Computer Use" Agents (OS Automation) -* Category 2: The "Backend" Agents (API & SaaS) -* Category 3: The "Data" Agents (Text-to-SQL) - -#### Gemini Answer -This Reddit post is a **gold mine**. It is not just a category match; it is a **perfect problem statement** for Predicate Systems. - -The user is explicitly asking for **Runtime Enforcement** ("before execution") because they realize that "IAM + short-lived tokens" (Identity) are not enough. - -Here is the breakdown of how this post maps to your categories and how to leverage it. - -### **1. Categorization Analysis** - -You are right. This falls squarely into **Category 2 (Backend/API Agents)** with a strong overlap of **Category 4 (Coding/Infra Agents)** (which we discussed earlier but you didn't list in this specific prompt). - -* **"APIs, trigger workflows"** **Category 2 (Backend Agents).** -* *The Pain:* Agent A (authorized) calls Agent B (unauthorized). Does Agent B inherit permissions? (The "Confused Deputy" problem). - - -* **"Git, Bash, deploy infra"** **Category 4 (Coding/Infra Agents).** -* *The Pain:* Giving an agent AWS admin keys is terrifying. You need to scope *what* it can do with those keys (e.g., "Only deploy to Staging," not "Delete Production"). - - -* **"MCP Servers"** **The Bridge.** -* MCP (Model Context Protocol) is the "USB port" for agents. This user is asking: *If I plug a dangerous tool (Bash) into a safe agent (Claude), how do I stop it from exploding?* - - - -### **2. The "Predicate Pitch" Hidden in the Post** - -Look at the specific questions they asked. These are softballs for you to hit: - -| **Their Question** | **The Current Bad Solution** | **The Predicate Solution** | -| --- | --- | --- | -| *"Are you relying purely on IAM?"* | Yes, and it fails because IAM is too coarse (User-level, not Task-level). | **No. We use Runtime Invariants (Action-level).** | -| *"How do you prevent delegation chains?"* | You can't. You just trust the chain. | **State Propagation.** We pass a signed "Policy Context" with every request. | -| *"Is enforcement happening before execution?"* | Mostly no. Most use "Auditing" (Logs). | **YES.** That is literally our value prop. We block the socket *before* the byte leaves. | - -### **3. Strategic Action: The "Expert" Reply** - -This is a high-intent lead. Do not pitch them. **Educate them.** -This user is likely a Senior/Staff Engineer or Architect given the depth of the question. - -**Draft Reply for Reddit:** - -> "Great question. We ran into this exact issue with 'Confused Deputy' problems in agent chains (Agent A has permission, delegates to Agent B who shouldn't). -> Relying on IAM/Tokens is insufficient because IAM authorizes the **Identity** ('Who are you?'), but agents need **Action** authorization ('Are you allowed to do *this* specific thing *right now*?'). -> We moved away from 'Permission Scoping' (static) to **'Runtime Invariants' (dynamic)**. -> Basically, inserting a lightweight WASM interceptor at the tool execution layer (e.g., wrapping the Bash/Git call). -> It checks two things *before* execution: -> 1. **State:** Is the target resource valid? (e.g., `git push` is allowed to `feature-branch` but BLOCKED on `main`). -> 2. **Context:** logic checks that persist across the delegation chain (e.g., `budget_remaining > 0`). -> -> -> If you rely on logging/monitoring, you only find out about the unauthorized infrastructure changes after the bill hits. You have to block at the socket/runtime layer." - -### **Summary** - -* **Category:** It spans **Backend (API)** and **Infra (Bash/Git)**. -* **Significance:** This user perfectly articulates the gap between **Identity (AuthZ)** and **Behavior (Predicate)**. -* **Next Step:** Reply to the post. This person is a potential **Design Partner** #1. - -**Potential solution git repo:** https://github.com/Garudex-Labs/caracal (https://www.garudexlabs.com/) - -### **This is a "category-defining" find.** - -You just found the "Identity Layer" counterpart to your "Verification Layer." - -**Garudex Labs' Caracal** is not a direct competitor to **Predicate Systems**. It is your **Architecture Sibling.** - -* **Caracal** solves: *"Who authorized this?"* (Identity, Delegation, Budget). -* **Predicate** solves: *"Is this safe to execute?"* (State, Logic, Side Effects). - -Here is the deep-dive analysis of Caracal vs. Predicate to help you refine your positioning. - ---- - -### **1. The "Caracal" Approach (The Passport Control)** - -Based on the repo analysis, Caracal focuses on **Authority & FinOps**. - -* **The Problem:** "My agent has an API key. If it loops 1,000 times, it bankrupts me. If it calls a sub-agent, does the sub-agent have my permission?" -* **The Solution:** -* **Ephemeral Mandates:** Instead of static keys, it issues "temporary passports" for specific tasks. -* **Ledgering:** It records *"Agent A delegated to Agent B at 12:00"* on an immutable ledger. -* **FinOps Gates:** It blocks actions based on **Cost** (Token usage / $$$) and **Rate Limits**. - - -* **The Gap:** Caracal checks the **Budget**, but it does not check the **Payload**. -* *Caracal says:* "Agent allowed to spend $50? **YES.**" -* *Caracal misses:* "Agent is spending that $50 on a **Phishing Site** instead of Amazon." (Because it doesn't verify the DOM/State). - - - -### **2. The "Predicate" Approach (The Safety Officer)** - -* **The Problem:** "My agent has permission to buy a ticket, but it's about to buy the *wrong* ticket because it hallucinated the date." -* **The Solution:** **Runtime Invariants.** -* **The Comparison:** -* **Caracal** stops the agent from stealing your **Keys**. -* **Predicate** stops the agent from breaking your **Business Logic**. - - - -### **3. How to Position This (The "Defense in Depth" Stack)** - -This GitHub repo actually validates your market. It proves that "Post-Training Safety" is exploding. - -**The "Secure Agent Stack" Pitch:** -| Layer | Tool | Question Answered | -| :--- | :--- | :--- | -| **L1: Identity** | **Caracal** | *"Is this agent authorized to use the Credit Card?"* | -| **L2: Runtime** | **Predicate** | *"Is the item in the cart actually what the user asked for?"* | -| **L3: Network** | **Cloudflare** | *"Is this IP address malicious?"* | - -### **Strategic Action Items** - -1. **Don't Fight Caracal, Cite Them:** -* In your Reddit reply, you can say: *"Tools like **Caracal** are great for solving the Identity/Delegation chain. But for the actual 'Execution Safety' (preventing logic drifts like buying the wrong item), you need a Runtime Verifier like **Predicate**."* -* This makes you look like a ecosystem expert, not just a shill. - - -2. **The "AGPL" Trap (Your Enterprise Wedge):** -* Caracal is **AGPL-3.0**. This is "radioactive" for many large enterprises (Banks/Faang) because it forces them to open-source their own modifications if they link to it. -* **Predicate Opportunity:** Release your SDK as **MIT/Apache 2.0**. -* *Pitch:* "Enterprise-grade safety without the viral license risk." - - - -**Verdict:** Caracal is solving the "Confused Deputy" problem. You are solving the "Confused Model" problem. Both are needed, but yours is harder to build (requires Deep State integration vs. just Token passing). - ----- -### **Recommendation: Do NOT merge Identity/Delegation into your core SDK.** - -You must keep your core `@predicatesystems/sdk` focused purely on **Deterministic Verification** (State & Logic). Trying to solve the "Identity/Confused Deputy" problem inside your main SDK is a strategic error for three reasons: - -1. **The "AGPL" Poison Pill:** -* **Your License:** MIT/Apache 2.0 (Enterprise Safe). -* **Caracal's License:** **AGPL-3.0**. -* **The Risk:** AGPL is "viral." If you import Caracal's code into your core SDK, legal teams at banks and large enterprises may classify your entire SDK as "infected," forcing them to open-source their private agents if they use it. **You cannot mix these licenses.** - - -2. **The "Tar Pit" of IAM:** -* Building a robust Identity/Delegation system (handling OAuth OBO, Token Exchange, Key Rotation, Ledgers) is a massive undertaking. If you start building this, you stop building your **Verification Engine**. You will become a "Bad Okta" instead of a "Great Predicate." - - -3. **Separation of Concerns:** -* **Caracal** answers: *"Who is this agent?"* (Passport Control). -* **Predicate** answers: *"Is this action safe?"* (Customs/Security). -* Enterprises want to swap these layers independently. - - - ---- - -### **Strategy Plan: How to Leverage Caracal** - -Since Caracal is also very early (launched Jan 28, 2026) and solves the exact problem your customers (like the Reddit user) are asking about, you should treat them as a **Strategic Partner**, not a competitor or a feature to clone. - -#### **Phase 1: The "Clean" Integration (The Adapter Pattern)** - -Create a separate, optional package. This isolates the AGPL risk and keeps your core SDK lightweight. - -* **Package Name:** `@predicatesystems/adapter-caracal` (or `predicate-caracal-bridge`). -* **Function:** -* This adapter extracts the **Budget/Mandate Context** from Caracal’s token. -* It feeds that context into Predicate's **WASM Invariants**. - - -* **The Value Prop:** -* *Caracal* says: "Agent has $50 budget." -* *Predicate* enforces: "Block action if `price > $50`." -* **Combined:** You turn Caracal's "Policy" into Predicate's "Enforcement." - - - -#### **Phase 2: The "Confused Deputy" Fix (Context Propagation)** - -You don't need a full Identity engine to fix the "Confused Deputy" problem. You just need **Context Propagation**. - -* **Add a `context` object to your `verify()` function.** -```typescript -// Instead of building IAM, just accept the 'Chain' as a verified fact -predicate.verify({ - action: "transfer_funds", - state: domSnapshot, - // The context can come from Caracal, LangChain, or simple JSON - context: { - principal: "Alice", - delegator: "ResearchAgent", - risk_score: "high" - } -}); - -``` - - -* **Why this wins:** You remain agnostic. If the customer uses Caracal, great. If they use Okta, great. You just verify the payload against the context. - -#### **Phase 3: The "Go-to-Market" Alliance** - -Garudex Labs (Caracal) is likely desperate for validation and usage, just like you. - -* **The Pitch to Garudex:** "You handle the *Ledger* and *Identity*. We handle the *Runtime Blocking*. Together, we are the **'Zero Trust Agent Stack'**." -* **Joint Content:** Write a blog post: *"How to prevent Confused Deputy attacks in Agents using Caracal + Predicate."* -* This targets the exact Reddit/Hacker News crowd you are engaging with. -* It gives you "Security Credibility" without writing IAM code. - - - -### **Summary of Action Items** - -1. **Keep `@predicatesystems/sdk` MIT/Apache.** Do not import Caracal. -2. **Build a "Context" parameter** into your SDK to allow users to pass Identity data (from Caracal or others) into your invariants. -3. **Reach out to the Garudex founders.** Propose a "Tech Alliance." You are both fighting the "Vibes-based AI" narrative. -4. **Reply to the Reddit user:** -> "We are sticking to **Runtime Verification** (Logic/State) to keep our SDK lightweight and MIT-licensed. For Identity/Delegation, we recommend pairing us with tools like **Caracal** (for the ledger/mandates). We are building an adapter to enforce their budget rules inside our runtime." diff --git a/examples/README.md b/examples/README.md index 7895665..7f15422 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,4 @@ -# Phase 1 Examples +# Examples These scripts show the intended local guard patterns for Phase 1: @@ -13,3 +13,26 @@ PYTHONPATH=. python examples/browser_guard_example.py PYTHONPATH=. python examples/mcp_tool_guard_example.py PYTHONPATH=. python examples/outbound_http_guard_example.py ``` + +## `predicate-authorityd` operations example (Phase 2) + +- `authorityd/policy.json`: sample sidecar policy file. +- `authorityd/daemon_endpoint_check.py`: checks `/health` and `/status` endpoints. + +Start daemon: + +```bash +PYTHONPATH=. predicate-authorityd \ + --host 127.0.0.1 \ + --port 8787 \ + --mode local_only \ + --policy-file examples/authorityd/policy.json \ + --policy-poll-interval-s 2.0 \ + --credential-store-file ./.predicate-authorityd/credentials.json +``` + +Check endpoints: + +```bash +PYTHONPATH=. python examples/authorityd/daemon_endpoint_check.py +``` diff --git a/examples/authorityd/daemon_endpoint_check.py b/examples/authorityd/daemon_endpoint_check.py new file mode 100644 index 0000000..1a4b215 --- /dev/null +++ b/examples/authorityd/daemon_endpoint_check.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import http.client +import json +from urllib.parse import urlsplit + + +def fetch_json(url: str) -> dict[str, object]: + parsed = urlsplit(url) + if parsed.scheme not in {"http", "https"}: + raise RuntimeError(f"Unsupported URL scheme: {parsed.scheme}") + if parsed.netloc == "": + raise RuntimeError("URL must include host:port.") + path = parsed.path or "/" + if parsed.query: + path = f"{path}?{parsed.query}" + + connection_cls = ( + http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection + ) + connection = connection_cls(parsed.netloc, timeout=2.0) + try: + connection.request("GET", path) + response = connection.getresponse() + payload = response.read().decode("utf-8") + if response.status >= 400: + raise RuntimeError(f"HTTP {response.status}: {payload}") + finally: + connection.close() + + loaded = json.loads(payload) + if not isinstance(loaded, dict): + raise RuntimeError("Expected JSON object response.") + return loaded + + +def main() -> None: + base_url = "http://127.0.0.1:8787" + health = fetch_json(f"{base_url}/health") + status = fetch_json(f"{base_url}/status") + print("health:", json.dumps(health, indent=2)) + print("status:", json.dumps(status, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/examples/authorityd/policy.json b/examples/authorityd/policy.json new file mode 100644 index 0000000..6e761c7 --- /dev/null +++ b/examples/authorityd/policy.json @@ -0,0 +1,20 @@ +{ + "rules": [ + { + "name": "allow-orders-http-post", + "effect": "allow", + "principals": ["agent:orders-*"], + "actions": ["http.post"], + "resources": ["https://api.vendor.com/orders"], + "required_labels": [] + }, + { + "name": "deny-admin-delete", + "effect": "deny", + "principals": ["agent:*"], + "actions": ["http.delete"], + "resources": ["https://api.vendor.com/admin/*"], + "required_labels": [] + } + ] +} diff --git a/predicate_authority/__init__.py b/predicate_authority/__init__.py index 6870533..b431480 100644 --- a/predicate_authority/__init__.py +++ b/predicate_authority/__init__.py @@ -1,20 +1,56 @@ -from predicate_authority.bridge import IdentityBridge, TokenExchangeResult +from predicate_authority.bridge import ( + EntraBridgeConfig, + EntraIdentityBridge, + IdentityBridge, + IdentityProviderType, + OIDCBridgeConfig, + OIDCIdentityBridge, + TokenExchangeResult, +) +from predicate_authority.daemon import DaemonConfig, PredicateAuthorityDaemon from predicate_authority.errors import AuthorizationDeniedError from predicate_authority.guard import ActionExecutionResult, ActionGuard from predicate_authority.mandate import LocalMandateSigner from predicate_authority.policy import PolicyEngine, PolicyMatchResult +from predicate_authority.policy_source import PolicyFileSource, PolicyReloadResult from predicate_authority.proof import InMemoryProofLedger +from predicate_authority.revocation import LocalRevocationCache +from predicate_authority.sidecar import ( + AuthorityMode, + PredicateAuthoritySidecar, + SidecarConfig, + SidecarError, + SidecarStatus, +) +from predicate_authority.sidecar_store import CredentialRecord, LocalCredentialStore from predicate_authority.telemetry import OpenTelemetryTraceEmitter __all__ = [ "ActionExecutionResult", "ActionGuard", + "AuthorityMode", "AuthorizationDeniedError", + "CredentialRecord", + "DaemonConfig", + "EntraBridgeConfig", + "EntraIdentityBridge", "IdentityBridge", + "IdentityProviderType", "InMemoryProofLedger", + "LocalCredentialStore", "LocalMandateSigner", + "LocalRevocationCache", + "OIDCBridgeConfig", + "OIDCIdentityBridge", "OpenTelemetryTraceEmitter", "PolicyEngine", + "PolicyFileSource", "PolicyMatchResult", + "PolicyReloadResult", + "PredicateAuthorityDaemon", + "PredicateAuthoritySidecar", + "SidecarConfig", + "SidecarError", + "SidecarStatus", "TokenExchangeResult", ] diff --git a/predicate_authority/bridge.py b/predicate_authority/bridge.py index 37978c8..8460e2a 100644 --- a/predicate_authority/bridge.py +++ b/predicate_authority/bridge.py @@ -3,22 +3,44 @@ import hashlib import time from dataclasses import dataclass +from enum import Enum from predicate_contracts import PrincipalRef, StateEvidence +class IdentityProviderType(str, Enum): + LOCAL = "local" + OIDC = "oidc" + ENTRA = "entra" + OKTA = "okta" + + @dataclass(frozen=True) class TokenExchangeResult: access_token: str expires_at_epoch_s: int token_type: str = "Bearer" + provider: IdentityProviderType = IdentityProviderType.LOCAL -class IdentityBridge: - """Local placeholder bridge for Phase 1. +@dataclass(frozen=True) +class OIDCBridgeConfig: + issuer: str + client_id: str + audience: str + token_ttl_seconds: int = 300 - This keeps an explicit interface so Phase 2 can swap in a real OIDC/Entra bridge. - """ + +@dataclass(frozen=True) +class EntraBridgeConfig: + tenant_id: str + client_id: str + audience: str + token_ttl_seconds: int = 300 + + +class IdentityBridge: + """Local bridge implementation for development/local-only mode.""" def __init__(self, token_ttl_seconds: int = 300) -> None: self._token_ttl_seconds = token_ttl_seconds @@ -30,5 +52,71 @@ def exchange_token( token_seed = f"{subject.principal_id}|{state_evidence.state_hash}|{expires_at}" token_hash = hashlib.sha256(token_seed.encode("utf-8")).hexdigest() return TokenExchangeResult( - access_token=f"local.{token_hash}", expires_at_epoch_s=expires_at + access_token=f"local.{token_hash}", + expires_at_epoch_s=expires_at, + provider=IdentityProviderType.LOCAL, + ) + + +class OIDCIdentityBridge: + """Generic OIDC bridge adapter. + + Phase 2 keeps this as a deterministic local stand-in for real IdP token exchange. + """ + + def __init__(self, config: OIDCBridgeConfig) -> None: + self._config = config + + def exchange_token( + self, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: + expires_at = int(time.time()) + self._config.token_ttl_seconds + token_seed = ( + f"{self._config.issuer}|{self._config.client_id}|{self._config.audience}|" + f"{subject.principal_id}|{state_evidence.state_hash}|{expires_at}" + ) + token_hash = hashlib.sha256(token_seed.encode("utf-8")).hexdigest() + return TokenExchangeResult( + access_token=f"oidc.{token_hash}", + expires_at_epoch_s=expires_at, + provider=IdentityProviderType.OIDC, + ) + + def refresh_token( + self, refresh_token: str, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: + expires_at = int(time.time()) + self._config.token_ttl_seconds + token_seed = ( + f"{refresh_token}|{self._config.issuer}|{subject.principal_id}|" + f"{state_evidence.state_hash}|{expires_at}" + ) + token_hash = hashlib.sha256(token_seed.encode("utf-8")).hexdigest() + return TokenExchangeResult( + access_token=f"oidc-refresh.{token_hash}", + expires_at_epoch_s=expires_at, + provider=IdentityProviderType.OIDC, + ) + + +class EntraIdentityBridge(OIDCIdentityBridge): + """Microsoft Entra adapter built on generic OIDC behavior.""" + + def __init__(self, config: EntraBridgeConfig) -> None: + oidc_config = OIDCBridgeConfig( + issuer=f"https://login.microsoftonline.com/{config.tenant_id}/v2.0", + client_id=config.client_id, + audience=config.audience, + token_ttl_seconds=config.token_ttl_seconds, + ) + super().__init__(oidc_config) + + def exchange_token( + self, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: + result = super().exchange_token(subject, state_evidence) + return TokenExchangeResult( + access_token=result.access_token, + expires_at_epoch_s=result.expires_at_epoch_s, + token_type=result.token_type, + provider=IdentityProviderType.ENTRA, ) diff --git a/predicate_authority/daemon.py b/predicate_authority/daemon.py new file mode 100644 index 0000000..80fb49e --- /dev/null +++ b/predicate_authority/daemon.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +import argparse +import json +import secrets +import threading +import time +from dataclasses import asdict, dataclass +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any +from urllib.parse import urlparse + +from predicate_authority.bridge import IdentityBridge +from predicate_authority.guard import ActionGuard +from predicate_authority.mandate import LocalMandateSigner +from predicate_authority.policy import PolicyEngine +from predicate_authority.policy_source import PolicyFileSource +from predicate_authority.proof import InMemoryProofLedger +from predicate_authority.revocation import LocalRevocationCache +from predicate_authority.sidecar import AuthorityMode, PredicateAuthoritySidecar, SidecarConfig +from predicate_authority.sidecar_store import LocalCredentialStore +from predicate_contracts import PolicyRule + + +@dataclass(frozen=True) +class DaemonConfig: + host: str = "127.0.0.1" + port: int = 8787 + policy_poll_interval_s: float = 2.0 + + +@dataclass +class DaemonRuntime: + started_at_epoch_s: float + is_running: bool = False + policy_reload_count: int = 0 + policy_poll_error_count: int = 0 + last_policy_reload_epoch_s: float | None = None + last_policy_poll_error: str | None = None + + +class _DaemonHTTPServer(ThreadingHTTPServer): + daemon_threads = True + + def __init__( + self, + server_address: tuple[str, int], + request_handler_class: type[BaseHTTPRequestHandler], + daemon_ref: PredicateAuthorityDaemon, + ) -> None: + super().__init__(server_address, request_handler_class) + self.daemon_ref = daemon_ref + + +class _DaemonRequestHandler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 + parsed = urlparse(self.path) + if parsed.path == "/health": + self._send_json(200, self.server.daemon_ref.health_payload()) # type: ignore[attr-defined] + return + if parsed.path == "/status": + self._send_json(200, self.server.daemon_ref.status_payload()) # type: ignore[attr-defined] + return + self._send_json(404, {"error": "not_found"}) + + def log_message(self, format: str, *args: Any) -> None: # noqa: A003 + # Keep daemon output deterministic and quiet by default. + return + + def _send_json(self, code: int, payload: dict[str, Any]) -> None: + encoded = json.dumps(payload).encode("utf-8") + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + self.wfile.write(encoded) + + +class PredicateAuthorityDaemon: + def __init__(self, sidecar: PredicateAuthoritySidecar, config: DaemonConfig) -> None: + self._sidecar = sidecar + self._config = config + self._runtime = DaemonRuntime(started_at_epoch_s=time.time()) + self._stop_event = threading.Event() + self._http_server: _DaemonHTTPServer | None = None + self._server_thread: threading.Thread | None = None + self._poll_thread: threading.Thread | None = None + + @property + def bound_port(self) -> int: + if self._http_server is None: + return self._config.port + return int(self._http_server.server_address[1]) + + def start(self) -> None: + if self._runtime.is_running: + return + self._runtime.is_running = True + self._http_server = _DaemonHTTPServer( + (self._config.host, self._config.port), + _DaemonRequestHandler, + self, + ) + self._server_thread = threading.Thread(target=self._http_server.serve_forever, daemon=True) + self._poll_thread = threading.Thread(target=self._policy_poll_loop, daemon=True) + self._server_thread.start() + self._poll_thread.start() + + def stop(self) -> None: + if not self._runtime.is_running: + return + self._runtime.is_running = False + self._stop_event.set() + if self._http_server is not None: + self._http_server.shutdown() + self._http_server.server_close() + if self._server_thread is not None: + self._server_thread.join(timeout=3.0) + if self._poll_thread is not None: + self._poll_thread.join(timeout=3.0) + + def health_payload(self) -> dict[str, Any]: + uptime_s = int(max(0, time.time() - self._runtime.started_at_epoch_s)) + return { + "status": "ok" if self._runtime.is_running else "stopped", + "mode": self._sidecar.status().mode.value, + "uptime_s": uptime_s, + } + + def status_payload(self) -> dict[str, Any]: + sidecar_status = self._sidecar.status() + payload = asdict(sidecar_status) + payload["mode"] = sidecar_status.mode.value + payload.update( + { + "daemon_running": self._runtime.is_running, + "policy_reload_count": self._runtime.policy_reload_count, + "policy_poll_error_count": self._runtime.policy_poll_error_count, + "last_policy_reload_epoch_s": self._runtime.last_policy_reload_epoch_s, + "last_policy_poll_error": self._runtime.last_policy_poll_error, + } + ) + return payload + + def _policy_poll_loop(self) -> None: + while not self._stop_event.is_set(): + try: + changed = self._sidecar.hot_reload_policy() + if changed: + self._runtime.policy_reload_count += 1 + self._runtime.last_policy_reload_epoch_s = time.time() + except Exception as exc: # noqa: BLE001 + self._runtime.policy_poll_error_count += 1 + self._runtime.last_policy_poll_error = str(exc) + self._stop_event.wait(timeout=self._config.policy_poll_interval_s) + + +def _build_default_sidecar( + mode: AuthorityMode, policy_file: str | None, credential_store_file: str +) -> PredicateAuthoritySidecar: + policy_rules: tuple[PolicyRule, ...] = () + if policy_file is not None and Path(policy_file).exists(): + policy_rules = PolicyFileSource(policy_file).load_rules() + policy_engine = PolicyEngine(rules=policy_rules) + proof_ledger = InMemoryProofLedger() + guard = ActionGuard( + policy_engine=policy_engine, + mandate_signer=LocalMandateSigner(secret_key=secrets.token_hex(32)), + proof_ledger=proof_ledger, + ) + return PredicateAuthoritySidecar( + config=SidecarConfig(mode=mode, policy_file_path=policy_file), + action_guard=guard, + proof_ledger=proof_ledger, + identity_bridge=IdentityBridge(), + credential_store=LocalCredentialStore(credential_store_file), + revocation_cache=LocalRevocationCache(), + policy_engine=policy_engine, + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="predicate-authorityd sidecar daemon") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=8787) + parser.add_argument( + "--mode", + choices=[AuthorityMode.LOCAL_ONLY.value, AuthorityMode.CLOUD_CONNECTED.value], + default=AuthorityMode.LOCAL_ONLY.value, + ) + parser.add_argument("--policy-file", default=None) + parser.add_argument("--policy-poll-interval-s", type=float, default=2.0) + parser.add_argument( + "--credential-store-file", + default=str(Path.home() / ".predicate-authorityd" / "credentials.json"), + ) + args = parser.parse_args() + + mode = AuthorityMode(args.mode) + sidecar = _build_default_sidecar( + mode=mode, + policy_file=args.policy_file, + credential_store_file=args.credential_store_file, + ) + daemon = PredicateAuthorityDaemon( + sidecar=sidecar, + config=DaemonConfig( + host=args.host, + port=args.port, + policy_poll_interval_s=args.policy_poll_interval_s, + ), + ) + daemon.start() + print( + f"predicate-authorityd listening on http://{args.host}:{daemon.bound_port} " + f"(mode={mode.value})" + ) + try: + while True: + time.sleep(1.0) + except KeyboardInterrupt: + daemon.stop() + print("predicate-authorityd stopped") + + +if __name__ == "__main__": + main() diff --git a/predicate_authority/policy.py b/predicate_authority/policy.py index 99b9835..41ce33e 100644 --- a/predicate_authority/policy.py +++ b/predicate_authority/policy.py @@ -18,6 +18,9 @@ class PolicyEngine: def __init__(self, rules: tuple[PolicyRule, ...]) -> None: self._rules = rules + def replace_rules(self, rules: tuple[PolicyRule, ...]) -> None: + self._rules = rules + def evaluate(self, request: ActionRequest) -> PolicyMatchResult: matching_rules = [rule for rule in self._rules if self._matches_rule(rule, request)] if not matching_rules: diff --git a/predicate_authority/policy_source.py b/predicate_authority/policy_source.py new file mode 100644 index 0000000..69d1447 --- /dev/null +++ b/predicate_authority/policy_source.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +from predicate_contracts import PolicyEffect, PolicyRule + + +@dataclass(frozen=True) +class PolicyReloadResult: + changed: bool + rules: tuple[PolicyRule, ...] + + +class PolicyFileSource: + def __init__(self, policy_path: str) -> None: + self._policy_path = Path(policy_path) + self._last_mtime_ns: int | None = None + + def load_rules(self) -> tuple[PolicyRule, ...]: + payload = json.loads(self._policy_path.read_text(encoding="utf-8")) + rules_payload = payload.get("rules", []) + rules: list[PolicyRule] = [] + for item in rules_payload: + rules.append( + PolicyRule( + name=item["name"], + effect=PolicyEffect(item["effect"]), + principals=tuple(item["principals"]), + actions=tuple(item["actions"]), + resources=tuple(item["resources"]), + required_labels=tuple(item.get("required_labels", [])), + ) + ) + stat = self._policy_path.stat() + self._last_mtime_ns = stat.st_mtime_ns + return tuple(rules) + + def reload_if_changed(self) -> PolicyReloadResult: + stat = self._policy_path.stat() + if self._last_mtime_ns is None or stat.st_mtime_ns != self._last_mtime_ns: + rules = self.load_rules() + return PolicyReloadResult(changed=True, rules=rules) + return PolicyReloadResult(changed=False, rules=()) diff --git a/predicate_authority/pyproject.toml b/predicate_authority/pyproject.toml index 915e2f7..5726f95 100644 --- a/predicate_authority/pyproject.toml +++ b/predicate_authority/pyproject.toml @@ -19,6 +19,9 @@ dependencies = [ "predicate-contracts>=0.1.0,<0.2.0", ] +[project.scripts] +predicate-authorityd = "predicate_authority.daemon:main" + [project.optional-dependencies] telemetry = ["opentelemetry-api>=1.24.0"] diff --git a/predicate_authority/revocation.py b/predicate_authority/revocation.py new file mode 100644 index 0000000..de25f16 --- /dev/null +++ b/predicate_authority/revocation.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import hashlib +from dataclasses import dataclass, field + +from predicate_contracts import ActionRequest, SignedMandate + + +@dataclass +class LocalRevocationCache: + revoked_principal_ids: set[str] = field(default_factory=set) + revoked_intent_hashes: set[str] = field(default_factory=set) + revoked_mandate_ids: set[str] = field(default_factory=set) + + def revoke_principal(self, principal_id: str) -> None: + self.revoked_principal_ids.add(principal_id) + + def revoke_intent_hash(self, intent_hash: str) -> None: + self.revoked_intent_hashes.add(intent_hash) + + def revoke_mandate_id(self, mandate_id: str) -> None: + self.revoked_mandate_ids.add(mandate_id) + + def is_request_revoked(self, request: ActionRequest) -> bool: + if request.principal.principal_id in self.revoked_principal_ids: + return True + intent_hash = hashlib.sha256(request.action_spec.intent.encode("utf-8")).hexdigest() + return intent_hash in self.revoked_intent_hashes + + def is_mandate_revoked(self, mandate: SignedMandate) -> bool: + if mandate.claims.principal_id in self.revoked_principal_ids: + return True + if mandate.claims.intent_hash in self.revoked_intent_hashes: + return True + return mandate.claims.mandate_id in self.revoked_mandate_ids diff --git a/predicate_authority/sidecar.py b/predicate_authority/sidecar.py new file mode 100644 index 0000000..0ebd750 --- /dev/null +++ b/predicate_authority/sidecar.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Protocol, cast + +from predicate_authority.bridge import TokenExchangeResult +from predicate_authority.guard import ActionGuard +from predicate_authority.policy import PolicyEngine +from predicate_authority.policy_source import PolicyFileSource +from predicate_authority.proof import InMemoryProofLedger +from predicate_authority.revocation import LocalRevocationCache +from predicate_authority.sidecar_store import CredentialRecord, LocalCredentialStore +from predicate_contracts import ( + ActionRequest, + AuthorizationDecision, + AuthorizationReason, + PrincipalRef, + StateEvidence, +) + + +class AuthorityMode(str, Enum): + LOCAL_ONLY = "local_only" + CLOUD_CONNECTED = "cloud_connected" + + +@dataclass(frozen=True) +class SidecarConfig: + mode: AuthorityMode = AuthorityMode.LOCAL_ONLY + policy_file_path: str | None = None + + +@dataclass(frozen=True) +class SidecarStatus: + mode: AuthorityMode + policy_hot_reload_enabled: bool + revoked_principal_count: int + revoked_intent_count: int + revoked_mandate_count: int + proof_event_count: int + + +class SidecarError(RuntimeError): + pass + + +class ExchangeTokenBridge(Protocol): + def exchange_token( + self, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: ... + + +class RefreshTokenBridge(ExchangeTokenBridge, Protocol): + def refresh_token( + self, refresh_token: str, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: ... + + +class PredicateAuthoritySidecar: + def __init__( + self, + config: SidecarConfig, + action_guard: ActionGuard, + proof_ledger: InMemoryProofLedger, + identity_bridge: ExchangeTokenBridge, + credential_store: LocalCredentialStore, + revocation_cache: LocalRevocationCache, + policy_engine: PolicyEngine, + ) -> None: + self._config = config + self._action_guard = action_guard + self._proof_ledger = proof_ledger + self._identity_bridge = identity_bridge + self._credential_store = credential_store + self._revocation_cache = revocation_cache + self._policy_engine = policy_engine + self._policy_source = ( + PolicyFileSource(config.policy_file_path) + if config.policy_file_path is not None + else None + ) + + def issue_mandate(self, request: ActionRequest) -> AuthorizationDecision: + if self._revocation_cache.is_request_revoked(request): + decision = AuthorizationDecision( + allowed=False, + reason=AuthorizationReason.INVALID_MANDATE, + violated_rule="revocation_cache", + ) + self._proof_ledger.record(decision, request) + return decision + decision = self._action_guard.authorize(request) + if decision.allowed and decision.mandate is not None: + if self._revocation_cache.is_mandate_revoked(decision.mandate): + revoked_decision = AuthorizationDecision( + allowed=False, + reason=AuthorizationReason.INVALID_MANDATE, + violated_rule="revocation_cache", + ) + self._proof_ledger.record(revoked_decision, request) + return revoked_decision + return decision + + def store_refresh_token( + self, principal_id: str, refresh_token: str, expires_at_epoch_s: int + ) -> None: + self._credential_store.save( + CredentialRecord( + principal_id=principal_id, + refresh_token=refresh_token, + expires_at_epoch_s=expires_at_epoch_s, + ) + ) + + def exchange_access_token( + self, subject: PrincipalRef, state_evidence: StateEvidence + ) -> TokenExchangeResult: + if self._config.mode == AuthorityMode.LOCAL_ONLY: + return self._identity_bridge.exchange_token(subject, state_evidence) + + record = self._credential_store.get(subject.principal_id) + if record is None: + raise SidecarError("Missing refresh token for connected mode principal.") + + if hasattr(self._identity_bridge, "refresh_token"): + refreshable_bridge = cast(RefreshTokenBridge, self._identity_bridge) + return refreshable_bridge.refresh_token(record.refresh_token, subject, state_evidence) + raise SidecarError("Connected mode requires an identity bridge with refresh_token support.") + + def revoke_by_invariant(self, principal_id: str) -> None: + self._revocation_cache.revoke_principal(principal_id) + + def hot_reload_policy(self) -> bool: + if self._policy_source is None: + return False + result = self._policy_source.reload_if_changed() + if result.changed: + self._policy_engine.replace_rules(result.rules) + return True + return False + + def status(self) -> SidecarStatus: + return SidecarStatus( + mode=self._config.mode, + policy_hot_reload_enabled=self._policy_source is not None, + revoked_principal_count=len(self._revocation_cache.revoked_principal_ids), + revoked_intent_count=len(self._revocation_cache.revoked_intent_hashes), + revoked_mandate_count=len(self._revocation_cache.revoked_mandate_ids), + proof_event_count=len(self._proof_ledger.events), + ) diff --git a/predicate_authority/sidecar_store.py b/predicate_authority/sidecar_store.py new file mode 100644 index 0000000..745b18f --- /dev/null +++ b/predicate_authority/sidecar_store.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import json +import os +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + + +@dataclass(frozen=True) +class CredentialRecord: + principal_id: str + refresh_token: str + expires_at_epoch_s: int + + +class LocalCredentialStore: + """Local credential persistence for sidecar mode. + + This MVP store uses file permissions for local hardening and avoids logging secrets. + """ + + def __init__(self, file_path: str) -> None: + self._file_path = Path(file_path) + self._ensure_store_path() + + def save(self, record: CredentialRecord) -> None: + payload = self._read_all() + payload[record.principal_id] = asdict(record) + self._file_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + self._chmod_file_safe() + + def get(self, principal_id: str) -> CredentialRecord | None: + payload = self._read_all() + item = payload.get(principal_id) + if not isinstance(item, dict): + return None + item_principal = item.get("principal_id") + item_refresh = item.get("refresh_token") + item_expires = item.get("expires_at_epoch_s") + if not isinstance(item_principal, str) or not isinstance(item_refresh, str): + return None + if not isinstance(item_expires, (int, str)): + return None + return CredentialRecord( + principal_id=item_principal, + refresh_token=item_refresh, + expires_at_epoch_s=int(item_expires), + ) + + def _read_all(self) -> dict[str, Any]: + if not self._file_path.exists(): + return {} + content = self._file_path.read_text(encoding="utf-8").strip() + if content == "": + return {} + loaded = json.loads(content) + if isinstance(loaded, dict): + return loaded + return {} + + def _ensure_store_path(self) -> None: + self._file_path.parent.mkdir(parents=True, exist_ok=True) + try: + os.chmod(self._file_path.parent, 0o700) + except OSError: + pass + if not self._file_path.exists(): + self._file_path.write_text("{}", encoding="utf-8") + self._chmod_file_safe() + + def _chmod_file_safe(self) -> None: + try: + os.chmod(self._file_path, 0o600) + except OSError: + pass diff --git a/tests/test_daemon_phase2.py b/tests/test_daemon_phase2.py new file mode 100644 index 0000000..2d4100f --- /dev/null +++ b/tests/test_daemon_phase2.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path +from urllib.request import urlopen + +from predicate_authority import ( + ActionGuard, + AuthorityMode, + DaemonConfig, + IdentityBridge, + InMemoryProofLedger, + LocalCredentialStore, + LocalMandateSigner, + LocalRevocationCache, + PolicyEngine, + PredicateAuthorityDaemon, + PredicateAuthoritySidecar, + SidecarConfig, +) +from predicate_contracts import PolicyEffect, PolicyRule + +# pylint: disable=import-error + + +def _build_sidecar(tmp_path: Path, policy_file: Path) -> PredicateAuthoritySidecar: + policy_engine = PolicyEngine( + rules=( + PolicyRule( + name="allow-any-http", + effect=PolicyEffect.ALLOW, + principals=("agent:*",), + actions=("http.*",), + resources=("https://*/*",), + ), + ) + ) + proof_ledger = InMemoryProofLedger() + guard = ActionGuard( + policy_engine=policy_engine, + mandate_signer=LocalMandateSigner(secret_key="test-secret"), + proof_ledger=proof_ledger, + ) + return PredicateAuthoritySidecar( + config=SidecarConfig( + mode=AuthorityMode.LOCAL_ONLY, + policy_file_path=str(policy_file), + ), + action_guard=guard, + proof_ledger=proof_ledger, + identity_bridge=IdentityBridge(), + credential_store=LocalCredentialStore(str(tmp_path / "credentials.json")), + revocation_cache=LocalRevocationCache(), + policy_engine=policy_engine, + ) + + +def _fetch_json(url: str) -> dict[str, object]: + with urlopen(url, timeout=2.0) as response: # noqa: S310 + payload = response.read().decode("utf-8") + loaded = json.loads(payload) + assert isinstance(loaded, dict) + return loaded + + +def test_daemon_exposes_health_and_status_endpoints(tmp_path: Path) -> None: + policy_file = tmp_path / "policy.json" + policy_file.write_text(json.dumps({"rules": []}), encoding="utf-8") + sidecar = _build_sidecar(tmp_path, policy_file) + daemon = PredicateAuthorityDaemon( + sidecar=sidecar, + config=DaemonConfig(host="127.0.0.1", port=0, policy_poll_interval_s=0.05), + ) + daemon.start() + try: + base_url = f"http://127.0.0.1:{daemon.bound_port}" + health = _fetch_json(f"{base_url}/health") + status = _fetch_json(f"{base_url}/status") + assert health["status"] == "ok" + assert health["mode"] == "local_only" + assert status["daemon_running"] is True + assert status["policy_hot_reload_enabled"] is True + finally: + daemon.stop() + + +def test_daemon_policy_polling_tracks_reload_count(tmp_path: Path) -> None: + policy_file = tmp_path / "policy.json" + policy_file.write_text(json.dumps({"rules": []}), encoding="utf-8") + sidecar = _build_sidecar(tmp_path, policy_file) + daemon = PredicateAuthorityDaemon( + sidecar=sidecar, + config=DaemonConfig(host="127.0.0.1", port=0, policy_poll_interval_s=0.05), + ) + daemon.start() + try: + base_url = f"http://127.0.0.1:{daemon.bound_port}" + initial = _fetch_json(f"{base_url}/status") + initial_reload_count = int(initial["policy_reload_count"]) + + time.sleep(0.1) + policy_file.write_text( + json.dumps( + { + "rules": [ + { + "name": "allow-policy-updated", + "effect": "allow", + "principals": ["agent:*"], + "actions": ["http.*"], + "resources": ["https://*/*"], + } + ] + } + ), + encoding="utf-8", + ) + + deadline = time.time() + 2.0 + while time.time() < deadline: + status = _fetch_json(f"{base_url}/status") + if int(status["policy_reload_count"]) > initial_reload_count: + break + time.sleep(0.05) + else: + raise AssertionError("Policy reload count did not increase after policy file update.") + + finally: + daemon.stop() diff --git a/tests/test_identity_bridge_phase2.py b/tests/test_identity_bridge_phase2.py new file mode 100644 index 0000000..5192ada --- /dev/null +++ b/tests/test_identity_bridge_phase2.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from predicate_authority import ( + EntraBridgeConfig, + EntraIdentityBridge, + OIDCBridgeConfig, + OIDCIdentityBridge, +) +from predicate_contracts import PrincipalRef, StateEvidence + + +def test_oidc_bridge_exchange_and_refresh() -> None: + bridge = OIDCIdentityBridge( + OIDCBridgeConfig( + issuer="https://issuer.example.com", + client_id="client-a", + audience="api://service", + token_ttl_seconds=120, + ) + ) + subject = PrincipalRef(principal_id="agent:web") + state = StateEvidence(source="sdk-python", state_hash="state-1") + + access = bridge.exchange_token(subject, state) + refreshed = bridge.refresh_token("refresh-token-xyz", subject, state) + + assert access.provider.value == "oidc" + assert access.access_token.startswith("oidc.") + assert refreshed.access_token.startswith("oidc-refresh.") + + +def test_entra_bridge_marks_provider() -> None: + bridge = EntraIdentityBridge( + EntraBridgeConfig( + tenant_id="tenant-123", + client_id="client-entra", + audience="api://predicate", + token_ttl_seconds=120, + ) + ) + subject = PrincipalRef(principal_id="agent:backend") + state = StateEvidence(source="backend", state_hash="state-2") + + result = bridge.exchange_token(subject, state) + + assert result.provider.value == "entra" diff --git a/tests/test_sidecar_phase2.py b/tests/test_sidecar_phase2.py new file mode 100644 index 0000000..6b922e6 --- /dev/null +++ b/tests/test_sidecar_phase2.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path + +from predicate_authority import ( + ActionGuard, + AuthorityMode, + IdentityBridge, + InMemoryProofLedger, + LocalCredentialStore, + LocalMandateSigner, + LocalRevocationCache, + OIDCBridgeConfig, + OIDCIdentityBridge, + PolicyEngine, + PredicateAuthoritySidecar, + SidecarConfig, +) +from predicate_contracts import ( + ActionRequest, + ActionSpec, + AuthorizationReason, + PolicyEffect, + PolicyRule, + PrincipalRef, + StateEvidence, + VerificationEvidence, +) + + +def _request() -> ActionRequest: + return ActionRequest( + principal=PrincipalRef(principal_id="agent:ops"), + action_spec=ActionSpec( + action="http.post", + resource="https://api.vendor.com/orders", + intent="create order", + ), + state_evidence=StateEvidence(source="backend", state_hash="state-abc"), + verification_evidence=VerificationEvidence(), + ) + + +def _guard(policy_engine: PolicyEngine, proof_ledger: InMemoryProofLedger) -> ActionGuard: + return ActionGuard( + policy_engine=policy_engine, + mandate_signer=LocalMandateSigner(secret_key="test-secret", ttl_seconds=60), + proof_ledger=proof_ledger, + ) + + +def test_sidecar_connected_mode_requires_refresh_and_uses_bridge(tmp_path: Path) -> None: + policy_engine = PolicyEngine( + rules=( + PolicyRule( + name="allow-orders", + effect=PolicyEffect.ALLOW, + principals=("agent:*",), + actions=("http.*",), + resources=("https://api.vendor.com/*",), + ), + ) + ) + proof_ledger = InMemoryProofLedger() + bridge = OIDCIdentityBridge( + OIDCBridgeConfig( + issuer="https://issuer.example.com", + client_id="client-id", + audience="api://orders", + token_ttl_seconds=120, + ) + ) + store = LocalCredentialStore(str(tmp_path / "credentials.json")) + revocation = LocalRevocationCache() + sidecar = PredicateAuthoritySidecar( + config=SidecarConfig(mode=AuthorityMode.CLOUD_CONNECTED), + action_guard=_guard(policy_engine, proof_ledger), + proof_ledger=proof_ledger, + identity_bridge=bridge, + credential_store=store, + revocation_cache=revocation, + policy_engine=policy_engine, + ) + sidecar.store_refresh_token( + principal_id="agent:ops", + refresh_token="refresh-123", + expires_at_epoch_s=int(time.time()) + 300, + ) + + decision = sidecar.issue_mandate(_request()) + exchanged = sidecar.exchange_access_token( + PrincipalRef(principal_id="agent:ops"), + StateEvidence(source="backend", state_hash="state-abc"), + ) + + assert decision.allowed is True + assert exchanged.access_token.startswith("oidc-refresh.") + + +def test_sidecar_revocation_and_policy_hot_reload(tmp_path: Path) -> None: + policy_file = tmp_path / "policy.json" + policy_file.write_text( + json.dumps( + { + "rules": [ + { + "name": "allow-orders", + "effect": "allow", + "principals": ["agent:*"], + "actions": ["http.*"], + "resources": ["https://api.vendor.com/*"], + } + ] + } + ), + encoding="utf-8", + ) + policy_engine = PolicyEngine(rules=()) + proof_ledger = InMemoryProofLedger() + sidecar = PredicateAuthoritySidecar( + config=SidecarConfig(mode=AuthorityMode.LOCAL_ONLY, policy_file_path=str(policy_file)), + action_guard=_guard(policy_engine, proof_ledger), + proof_ledger=proof_ledger, + identity_bridge=IdentityBridge(), + credential_store=LocalCredentialStore(str(tmp_path / "credentials.json")), + revocation_cache=LocalRevocationCache(), + policy_engine=policy_engine, + ) + + changed = sidecar.hot_reload_policy() + allowed = sidecar.issue_mandate(_request()) + sidecar.revoke_by_invariant("agent:ops") + revoked = sidecar.issue_mandate(_request()) + + assert changed is True + assert allowed.allowed is True + assert revoked.allowed is False + assert revoked.reason == AuthorizationReason.INVALID_MANDATE