diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index e152a86..8952376 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -22,7 +22,13 @@ jobs: fail-fast: false matrix: include: - - language: javascript-typescript + # verisimiser is a Rust crate with zero JS/TS source. The estate + # template's default `javascript-typescript` made CodeQL fail with + # a "no source / configuration error" on every run (pre-existing + # red on main, not introduced by #102). Analyse the language that + # actually exists. `build-mode: none` is the correct (buildless) + # extraction mode for Rust in CodeQL. + - language: rust build-mode: none steps: diff --git a/.github/workflows/hypatia-scan.yml b/.github/workflows/hypatia-scan.yml index 95d5e5e..860a2b7 100644 --- a/.github/workflows/hypatia-scan.yml +++ b/.github/workflows/hypatia-scan.yml @@ -10,12 +10,26 @@ on: schedule: - cron: '0 0 * * 0' # Weekly on Sunday workflow_dispatch: +# Estate guardrail: cancel superseded runs so re-pushes don't pile up +# queued runs across the estate. Safe here because this workflow only +# performs read-only checks/lint/test/scan with no publish or mutation. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true permissions: contents: read # security-events: read lets the built-in GITHUB_TOKEN query this - # repo\'s own Dependabot alerts via the Hypatia DependabotAlerts rule. + # repo's own Dependabot alerts via the Hypatia DependabotAlerts rule + # (DA001-DA004). Without this, `scan_from_path` gets HTTP 403 and + # the rule silently returns no findings. + # See 007-lang/audits/audit-dependabot-automation-gap-2026-04-17.md. security-events: read + # pull-requests: write lets the advisory "Comment on PR with findings" + # step post its summary. Without it the built-in GITHUB_TOKEN gets + # "Resource not accessible by integration" and (absent continue-on-error) + # hard-fails the scan β€” exactly what the gate-decoupling design forbids. + pull-requests: write jobs: scan: @@ -29,7 +43,7 @@ jobs: fetch-depth: 0 # Full history for better pattern analysis - name: Setup Elixir for Hypatia scanner - uses: erlef/setup-beam@e6d7c94229049569db56a7ad5a540c051a010af9 # v1.18.2 + uses: erlef/setup-beam@fc68ffb90438ef2936bbb3251622353b3dcb2f93 # v1.18.2 with: elixir-version: '1.19.4' otp-version: '28.3' @@ -41,23 +55,27 @@ jobs: fi - name: Build Hypatia scanner (if needed) - working-directory: ${{ env.HOME }}/hypatia run: | - if [ ! -f hypatia-v2 ]; then - echo "Building hypatia-v2 scanner..." - cd scanner + cd "$HOME/hypatia" + if [ ! -f hypatia ]; then + echo "Building hypatia scanner..." mix deps.get mix escript.build - mv hypatia ../hypatia-v2 fi - name: Run Hypatia scan id: scan + env: + # Pass the built-in Actions token through to Hypatia so the + # DependabotAlerts rule can query this repo's own alerts. + # For cross-repo scanning (fleet-coordinator scan-supervised), + # a PAT with `security_events` scope is required instead. + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | echo "Scanning repository: ${{ github.repository }}" - # Run scanner - HYPATIA_FORMAT=json "$HOME/hypatia/hypatia-cli.sh" scan . > hypatia-findings.json + # Run scanner (exits non-zero when findings exist β€” suppress to continue) + HYPATIA_FORMAT=json "$HOME/hypatia/hypatia-cli.sh" scan . --exit-zero > hypatia-findings.json || true # Count findings FINDING_COUNT=$(jq '. | length' hypatia-findings.json 2>/dev/null || echo 0) @@ -79,7 +97,7 @@ jobs: echo "- Medium: $MEDIUM" >> $GITHUB_STEP_SUMMARY - name: Upload findings artifact - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: hypatia-findings path: hypatia-findings.json @@ -87,25 +105,73 @@ jobs: - name: Submit findings to gitbot-fleet (Phase 2) if: steps.scan.outputs.findings_count > 0 + # Phase 2 is the collaborative LEARNING side-channel ("bots share + # findings via gitbot-fleet"), not the security gate. The gate is + # the baseline-aware "Check for critical or high-severity issues" + # step below. A fleet-side regression (e.g. the submit script being + # moved/removed) must NEVER hard-fail every consuming repo's scan. + # Same reasoning as the "Comment on PR with findings" step. + # See hyperpolymath/hypatia#213 (gate decoupling) and the exit-127 + # estate-wide breakage when gitbot-fleet/scripts/submit-finding.sh + # no longer existed on the default branch. + continue-on-error: true env: + # All GitHub context values surface as env vars so the run + # block never interpolates `${{ … }}` inline (closes the + # workflow_audit/unsafe_curl_payload + actions_expression_injection + # findings). GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + FLEET_PUSH_TOKEN: ${{ secrets.HYPATIA_DISPATCH_PAT }} + FLEET_DISPATCH_TOKEN: ${{ secrets.HYPATIA_DISPATCH_PAT }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_SHA: ${{ github.sha }} + FINDINGS_COUNT: ${{ steps.scan.outputs.findings_count }} run: | - echo "πŸ“€ Submitting ${{ steps.scan.outputs.findings_count }} findings to gitbot-fleet..." + echo "πŸ“€ Submitting $FINDINGS_COUNT findings to gitbot-fleet..." - # Clone gitbot-fleet to temp directory + # Clone gitbot-fleet to temp directory. A clone failure (network, + # repo gone) is non-fatal: learning submission is best-effort. FLEET_DIR="/tmp/gitbot-fleet-$$" - git clone https://github.com/hyperpolymath/gitbot-fleet.git "$FLEET_DIR" + if ! git clone --depth 1 https://github.com/hyperpolymath/gitbot-fleet.git "$FLEET_DIR"; then + echo "::warning::Could not clone gitbot-fleet β€” skipping Phase 2 learning submission (non-fatal)." + exit 0 + fi - # Run submission script - bash "$FLEET_DIR/scripts/submit-finding.sh" hypatia-findings.json + # The submission script's location in gitbot-fleet has drifted + # before (it was absent from the default branch, which exit-127'd + # every consuming repo's scan). Probe known locations rather than + # hard-coding one path, and skip gracefully if none is present. + SUBMIT_SCRIPT="" + for cand in \ + "$FLEET_DIR/scripts/submit-finding.sh" \ + "$FLEET_DIR/scripts/submit_finding.sh" \ + "$FLEET_DIR/bin/submit-finding.sh" \ + "$FLEET_DIR/submit-finding.sh"; do + if [ -f "$cand" ]; then + SUBMIT_SCRIPT="$cand" + break + fi + done + + if [ -z "$SUBMIT_SCRIPT" ]; then + echo "::warning::gitbot-fleet submit-finding script not found at any known path β€” skipping Phase 2 learning submission (non-fatal). Findings are still uploaded as an artifact and gated below." + rm -rf "$FLEET_DIR" + exit 0 + fi + + # Run submission script. Pass the findings path as ABSOLUTE β€” + # the script cd's into its own working dir before reading the + # file, so a relative path would resolve to the wrong place. + # A submission-script failure is logged but non-fatal. + if bash "$SUBMIT_SCRIPT" "$GITHUB_WORKSPACE/hypatia-findings.json"; then + echo "βœ… Finding submission complete" + else + echo "::warning::gitbot-fleet submission script exited non-zero β€” Phase 2 learning submission skipped (non-fatal)." + fi # Cleanup rm -rf "$FLEET_DIR" - echo "βœ… Finding submission complete" - - name: Check for critical issues if: steps.scan.outputs.critical > 0 run: | @@ -150,6 +216,11 @@ jobs: - name: Comment on PR with findings if: github.event_name == 'pull_request' && steps.scan.outputs.findings_count > 0 + # Advisory only β€” posting findings as a PR comment must never gate + # the scan (hypatia#213 gate decoupling). Belt-and-braces alongside + # the pull-requests: write permission above: a token/API hiccup or + # a fork PR (read-only token) skips the comment, not the check. + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v7 with: script: | diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 9340d6f..3d4658c 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -12,6 +12,10 @@ concurrency: group: rust-ci-${{ github.ref }} cancel-in-progress: true +concurrency: + group: rust-ci-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read diff --git a/Justfile b/Justfile index 2aa6610..4bd29b4 100644 --- a/Justfile +++ b/Justfile @@ -50,6 +50,14 @@ install: assail: @command -v panic-attack >/dev/null 2>&1 && panic-attack assail . || echo "panic-attack not found β€” install from https://github.com/hyperpolymath/panic-attacker" +# --- Domain-Specific Recipes (verisimiser) --- +# +# (Reserved.) Recipes for clap subcommands like `augment`, `check-octad`, +# and `migrate` were removed per ADR-0003: they wrapped subcommands that +# don't exist in src/main.rs (the real subcommands are `init`, `generate`, +# `start`, `drift`, `provenance`, `history`, `status`, `octad`). +# Re-add wrappers here when their underlying subcommands ship. + # Run contractile checks (informational only β€” not required by CI per ADR-0008) contractile-check: @echo "Running contractile validation..." diff --git a/src/abi/mod.rs b/src/abi/mod.rs index 0ef7d5c..be2f1cc 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -188,7 +188,7 @@ impl ProvenanceEntry { /// /// `Option` fields encode as `len(0) || ""` when `None`. The /// timestamp is encoded from `chrono::DateTime`'s seconds-since-epoch - /// + subsecond nanos rather than RFC3339, so timestamps with + /// plus subsecond nanos rather than RFC3339, so timestamps with /// different valid string forms but the same instant produce the same /// hash (closes #28 / V-L2-C2). pub fn compute_hash( @@ -206,14 +206,14 @@ impl ProvenanceEntry { write_len_prefixed(&mut hasher, entity_id.as_bytes()); write_len_prefixed(&mut hasher, operation.as_bytes()); write_len_prefixed(&mut hasher, actor.as_bytes()); - hasher.update(×tamp.timestamp().to_le_bytes()); - hasher.update(×tamp.timestamp_subsec_nanos().to_le_bytes()); + hasher.update(timestamp.timestamp().to_le_bytes()); + hasher.update(timestamp.timestamp_subsec_nanos().to_le_bytes()); write_len_prefixed(&mut hasher, before_snapshot.unwrap_or("").as_bytes()); write_len_prefixed(&mut hasher, transformation.unwrap_or("").as_bytes()); format!("{:x}", hasher.finalize()) } - /// Verify that this entry's hash is consistent with its contents. + /// Verify that this entry's hash is consistent with all of its contents. /// /// Returns `true` iff the stored hash matches a freshly recomputed /// hash over the same fields. All seven preimage fields participate, @@ -541,45 +541,128 @@ mod tests { } #[test] - fn test_provenance_tamper_detection() { + fn test_provenance_tamper_entity_id() { let mut entry = ProvenanceEntry::genesis("entity-1", "system"); - // Tamper with the entity_id after hash computation. entry.entity_id = "entity-2".to_string(); - assert!(!entry.verify(), "Tampered entry should fail verification"); + assert!( + !entry.verify(), + "tampering with entity_id must break verify" + ); } - /// Tampering with `actor` must break `verify()` (closes #29 / V-L2-C3). - /// Before V-L2-C1, `actor` was outside the hash preimage and this - /// mutation was invisible β€” see V-L2-C4. + /// V-L2-C3: actor is hashed; tampering with it must be detected. #[test] fn test_provenance_tamper_actor() { - let mut e = ProvenanceEntry::genesis("post-1", "alice"); - e.actor = "mallory".to_string(); - assert!(!e.verify(), "actor must participate in the hash"); + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.actor = "mallory".to_string(); + assert!(!entry.verify(), "tampering with actor must break verify"); } - /// Tampering with `before_snapshot` must break `verify()`. + /// V-L2-C3: before_snapshot is hashed; tampering with it must be detected. #[test] fn test_provenance_tamper_before_snapshot() { - let mut e = ProvenanceEntry::genesis("post-1", "alice"); - e.before_snapshot = Some("{\"redacted\":true}".to_string()); + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + // Adding a snapshot (None -> Some) should break the original hash. + entry.before_snapshot = Some("{\"redacted\":true}".to_string()); assert!( - !e.verify(), - "before_snapshot must participate in the hash" + !entry.verify(), + "tampering with before_snapshot must break verify" ); } - /// Tampering with `transformation` must break `verify()`. + /// V-L2-C3: transformation is hashed; tampering with it must be detected. #[test] fn test_provenance_tamper_transformation() { - let mut e = ProvenanceEntry::genesis("post-1", "alice"); - e.transformation = Some("evil-rewrite".to_string()); + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.transformation = Some("evil-rewrite".to_string()); + assert!( + !entry.verify(), + "tampering with transformation must break verify" + ); + } + + /// V-L2-C3: operation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_operation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.operation = "delete".to_string(); + assert!( + !entry.verify(), + "tampering with operation must break verify" + ); + } + + /// V-L2-C3: previous_hash is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_previous_hash() { + let genesis = ProvenanceEntry::genesis("entity-1", "alice"); + let mut update = genesis.chain("update", "bob"); + update.previous_hash = "deadbeef".to_string(); assert!( - !e.verify(), - "transformation must participate in the hash" + !update.verify(), + "tampering with previous_hash must break verify" ); } + /// V-L2-C2: hash depends on the canonical (i64+u32) timestamp encoding, + /// not on a string representation that might vary. Two `DateTime` + /// values that represent the same instant β€” one parsed from RFC3339, + /// one constructed via `from_timestamp` β€” must produce the same hash. + #[test] + fn test_provenance_hash_timestamp_canonical() { + let parsed: DateTime = "2026-05-13T08:00:00.000000000Z".parse().unwrap(); + let built = DateTime::::from_timestamp(parsed.timestamp(), 0).unwrap(); + assert_eq!( + parsed, built, + "construction paths must yield equal instants" + ); + + let h1 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &parsed, None, None); + let h2 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &built, None, None); + assert_eq!( + h1, h2, + "canonical timestamp encoding must be path-independent" + ); + } + + /// V-L2-C3: round-trip β€” build a chain of N entries and assert every + /// mutation of every field breaks verification. + #[test] + fn test_provenance_chain_round_trip_mutation_matrix() { + let g = ProvenanceEntry::genesis("post-7", "system"); + let u1 = g.chain("update", "alice"); + let u2 = u1.chain("update", "bob"); + let d = u2.chain("delete", "alice"); + for entry in [&g, &u1, &u2, &d] { + assert!(entry.verify(), "every legitimate entry must verify"); + } + + for original in [&g, &u1, &u2, &d] { + // Permute each hash-covered field and assert verify fails. + for mutate in [ + |e: &mut ProvenanceEntry| e.actor.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.entity_id.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.operation.push_str("-tamper"), + |e: &mut ProvenanceEntry| { + e.previous_hash = "00".repeat(32); + }, + |e: &mut ProvenanceEntry| { + e.timestamp += chrono::Duration::nanoseconds(1); + }, + |e: &mut ProvenanceEntry| { + e.before_snapshot = Some("tampered".into()); + }, + |e: &mut ProvenanceEntry| { + e.transformation = Some("tampered".into()); + }, + ] { + let mut clone = original.clone(); + mutate(&mut clone); + assert!(!clone.verify(), "field mutation must break verification"); + } + } + } + /// Two `DateTime` values constructed via different paths but /// representing the same instant must produce the same hash. The /// previous RFC3339-string encoding could produce different hashes @@ -589,27 +672,19 @@ mod tests { fn test_provenance_timestamp_canonical_encoding() { let ts_parsed: DateTime = "2026-05-13T08:00:00.000Z".parse().unwrap(); let ts_offset: DateTime = "2026-05-13T08:00:00+00:00".parse().unwrap(); - assert_eq!(ts_parsed, ts_offset, "the two strings denote the same instant"); - - let h1 = ProvenanceEntry::compute_hash( - "", - "post-1", - "insert", - "alice", - &ts_parsed, - None, - None, + assert_eq!( + ts_parsed, ts_offset, + "the two strings denote the same instant" ); - let h2 = ProvenanceEntry::compute_hash( - "", - "post-1", - "insert", - "alice", - &ts_offset, - None, - None, + + let h1 = + ProvenanceEntry::compute_hash("", "post-1", "insert", "alice", &ts_parsed, None, None); + let h2 = + ProvenanceEntry::compute_hash("", "post-1", "insert", "alice", &ts_offset, None, None); + assert_eq!( + h1, h2, + "same instant must produce same hash regardless of input string form" ); - assert_eq!(h1, h2, "same instant must produce same hash regardless of input string form"); } /// Round-trip: build a 4-entry chain and assert every entry verifies; @@ -617,9 +692,7 @@ mod tests { /// mutation breaks `verify()` (closes #29 mutation-matrix clause). #[test] fn test_provenance_mutation_matrix_breaks_verification() { - let mut chain_entries = vec![ - ProvenanceEntry::genesis("post-1", "alice"), - ]; + let mut chain_entries = vec![ProvenanceEntry::genesis("post-1", "alice")]; for actor in ["bob", "carol", "dave"] { let next = chain_entries.last().unwrap().chain("update", actor); chain_entries.push(next); @@ -636,9 +709,7 @@ mod tests { |e: &mut ProvenanceEntry| e.actor = format!("{}-X", e.actor), |e: &mut ProvenanceEntry| e.before_snapshot = Some("X".to_string()), |e: &mut ProvenanceEntry| e.transformation = Some("X".to_string()), - |e: &mut ProvenanceEntry| { - e.timestamp += chrono::Duration::nanoseconds(1) - }, + |e: &mut ProvenanceEntry| e.timestamp += chrono::Duration::nanoseconds(1), |e: &mut ProvenanceEntry| e.previous_hash = format!("{}X", e.previous_hash), ] { let mut tampered = original.clone(); diff --git a/src/codegen/ident.rs b/src/codegen/ident.rs index 26c5cfe..fd1db85 100644 --- a/src/codegen/ident.rs +++ b/src/codegen/ident.rs @@ -89,19 +89,19 @@ mod tests { "posts'); DROP TABLE x;--", "posts; DROP TABLE x;", "posts--", - "1posts", // leading digit - "", // empty - "posts table", // space - "posts;", // semicolon - "posts'", // single quote - "posts\"x\"", // double quote - "posts/*x*/", // comment - "posts\nx", // newline - "posts\tx", // tab + "1posts", // leading digit + "", // empty + "posts table", // space + "posts;", // semicolon + "posts'", // single quote + "posts\"x\"", // double quote + "posts/*x*/", // comment + "posts\nx", // newline + "posts\tx", // tab "posts UNION SELECT 1", - "ΓΌnicode", // non-ASCII - "posts.col", // dot - "posts(", // paren + "ΓΌnicode", // non-ASCII + "posts.col", // dot + "posts(", // paren ]; for attack in attacks { let result = validate_identifier(attack, "table"); diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index 9707986..b14d771 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -17,6 +17,52 @@ use crate::codegen::parser::ParsedSchema; use crate::manifest::OctadConfig; +// --------------------------------------------------------------------------- +// Identifier validation (V-L2-G1) +// --------------------------------------------------------------------------- + +/// Permitted identifier shape for any user-controlled name that flows into +/// generated DDL: leading ASCII letter or underscore, then ASCII letters, +/// digits, or underscores. This is a deliberately conservative subset of +/// SQL's quoted-identifier rules β€” it rejects names that would be valid +/// under quoting but make our `format!()`-based DDL emission unsafe. +/// +/// Returns `Err` with the offending identifier quoted so the user can +/// rename or alias the source table. +fn validate_identifier(name: &str) -> std::result::Result<&str, String> { + if name.is_empty() { + return Err("identifier is empty".into()); + } + let mut chars = name.chars(); + let first = chars.next().unwrap(); + if !(first.is_ascii_alphabetic() || first == '_') { + return Err(format!( + "identifier {:?} must start with an ASCII letter or underscore", + name + )); + } + for c in chars { + if !(c.is_ascii_alphanumeric() || c == '_') { + return Err(format!( + "identifier {:?} contains invalid character {:?}; \ + only ASCII letters, digits, and underscores are allowed \ + in identifiers that flow into generated DDL (V-L2-G1)", + name, c + )); + } + } + Ok(name) +} + +/// Convenience: validate and panic with a structured message if invalid. +/// Used in the few DDL-emitting paths that don't propagate errors. +fn must_validate_identifier(name: &str) -> &str { + match validate_identifier(name) { + Ok(n) => n, + Err(e) => panic!("invalid identifier in generated DDL: {}", e), + } +} + // --------------------------------------------------------------------------- // Overlay generation // --------------------------------------------------------------------------- @@ -102,20 +148,26 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { ); // Generate INSERT statements for each discovered table. + // + // V-L2-G1: every identifier flowing into the SQL string here is + // validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$` + // is rejected at codegen time rather than allowed to land in DDL + // (where it would be an injection vector). if !schema.tables.is_empty() { ddl.push_str("-- Seed metadata from parsed schema\n"); for table in &schema.tables { + let table_name = must_validate_identifier(&table.name); let pk_cols: Vec<&str> = table .columns .iter() .filter(|c| c.is_primary_key) - .map(|c| c.name.as_str()) + .map(|c| must_validate_identifier(c.name.as_str())) .collect(); let pk_str = pk_cols.join(","); ddl.push_str(&format!( "INSERT OR IGNORE INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\ \x20 VALUES ('{}', {}, '{}', datetime('now'));\n", - table.name, + table_name, table.columns.len(), pk_str, )); @@ -130,7 +182,13 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { /// /// Stores a SHA-256 hash-chained audit trail of all data modifications. /// Each row chains to its predecessor via `previous_hash`, forming an -/// append-only, tamper-evident log. +/// append-only, tamper-evident log (see +/// `docs/theory/provenance-threat-model.adoc`). +/// +/// The `chain_head` table is the per-entity head pointer used for the +/// write-path lock (V-L2-L1). The UNIQUE INDEX on `(entity_id, +/// previous_hash)` (V-L2-L2) makes chain forks structurally impossible +/// β€” defence in depth for if the lock is ever bypassed. fn generate_provenance_table() -> String { "-- Provenance: SHA-256 hash-chained audit trail\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\n\ @@ -138,15 +196,31 @@ fn generate_provenance_table() -> String { \x20 previous_hash TEXT NOT NULL,\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 operation TEXT NOT NULL, -- insert, update, delete, transform\n\ + \x20 operation TEXT NOT NULL CHECK (operation IN ('insert','update','delete','transform')), -- V-L2-J1\n\ \x20 actor TEXT NOT NULL,\n\ \x20 timestamp TEXT NOT NULL, -- ISO 8601\n\ \x20 before_snapshot TEXT, -- JSON of entity state before operation\n\ \x20 transformation TEXT, -- description of transformation applied\n\ \x20 CHECK (operation IN ('insert','update','delete','transform'))\n\ );\n\ + -- V-L2-L2: forbid chain forks at the DB level. Genesis records all\n\ + -- carry previous_hash='' so this also enforces a single genesis per\n\ + -- entity.\n\ + CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain\n\ + \x20 ON verisimdb_provenance_log(entity_id, previous_hash);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\n\ - CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\n" + CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\ + \n\ + -- V-L2-L1: per-entity head pointer. The write path takes a row\n\ + -- lock here (SELECT … FOR UPDATE / BEGIN IMMEDIATE) so concurrent\n\ + -- appenders on the same entity serialise; cross-entity appends\n\ + -- remain parallel. Each successful append updates head_hash in\n\ + -- the same transaction as the INSERT into verisimdb_provenance_log.\n\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\n\ + \x20 entity_id TEXT PRIMARY KEY,\n\ + \x20 head_hash TEXT NOT NULL,\n\ + \x20 updated_at TEXT NOT NULL\n\ + );\n\n" .to_string() } @@ -156,22 +230,20 @@ fn generate_provenance_table() -> String { /// Together, these edges form a DAG that can be traversed to answer /// "where did this data come from?" and "what is affected if this changes?" fn generate_lineage_table() -> String { - // The CHECK constraint refuses edges whose source and target are the - // same (entity, table) pair β€” i.e. self-loops, which would falsify - // the README's "DAG" claim at the structural level. Closes #42. - // (Multi-hop cycle prevention is a runtime concern tracked separately.) - "-- Lineage: data derivation DAG\n\ + "-- Lineage: data derivation graph (DAG by intent; cycle prevention is\n\ + -- a runtime concern β€” see V-L1-G1 / V-L2-I2).\n\ CREATE TABLE IF NOT EXISTS verisimdb_lineage_graph (\n\ \x20 edge_id TEXT PRIMARY KEY,\n\ \x20 source_entity TEXT NOT NULL,\n\ \x20 source_table TEXT NOT NULL,\n\ \x20 target_entity TEXT NOT NULL,\n\ \x20 target_table TEXT NOT NULL,\n\ - \x20 derivation_type TEXT NOT NULL, -- copy, transform, aggregate, join, filter\n\ + \x20 derivation_type TEXT NOT NULL\n\ + \x20 CHECK (derivation_type IN ('copy','transform','aggregate','join','filter')), -- V-L2-J1\n\ \x20 description TEXT,\n\ \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ - \x20 CHECK (source_entity <> target_entity OR source_table <> target_table),\n\ - \x20 CHECK (derivation_type IN ('copy','transform','aggregate','join','filter'))\n\ + \x20 -- V-L2-I1: self-edges are not derivations; rejected at DB level.\n\ + \x20 CHECK (NOT (source_entity = target_entity AND source_table = target_table))\n\ );\n\ CREATE INDEX IF NOT EXISTS idx_lineage_source ON verisimdb_lineage_graph(source_entity);\n\ CREATE INDEX IF NOT EXISTS idx_lineage_target ON verisimdb_lineage_graph(target_entity);\n\n" @@ -184,23 +256,27 @@ fn generate_lineage_table() -> String { /// point-in-time queries and rollback. Each version records when it /// became active (`valid_from`) and when it was superseded (`valid_to`). fn generate_temporal_table() -> String { - // The partial UNIQUE index enforces "at most one current version per - // (entity, table)" at the storage layer β€” two concurrent writers can no - // longer both insert a row with `valid_to IS NULL` for the same entity. - // The CHECK ensures valid_to never precedes valid_from. Closes #41. - "-- Temporal: version history with point-in-time support\n\ + "-- Temporal: version history with point-in-time support.\n\ + -- V-L2-H1: the partial UNIQUE INDEX enforces exactly one\n\ + -- current row per (entity, table) β€” \"only one version is\n\ + -- valid right now\" was an application-layer invariant before;\n\ + -- now it's structural.\n\ + -- V-L2-J1: operation is a closed set.\n\ + -- V-L2-H2: valid_to (if set) must not predate valid_from.\n\ CREATE TABLE IF NOT EXISTS verisimdb_temporal_versions (\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 version INTEGER NOT NULL,\n\ + \x20 version INTEGER NOT NULL CHECK (version >= 1),\n\ \x20 valid_from TEXT NOT NULL, -- ISO 8601\n\ \x20 valid_to TEXT, -- ISO 8601, NULL if current\n\ \x20 snapshot TEXT NOT NULL, -- JSON serialisation of entity state\n\ - \x20 operation TEXT NOT NULL, -- insert, update, rollback\n\ + \x20 operation TEXT NOT NULL CHECK (operation IN ('insert','update','rollback')),\n\ \x20 PRIMARY KEY (entity_id, table_name, version),\n\ \x20 CHECK (valid_to IS NULL OR valid_to >= valid_from)\n\ );\n\ - CREATE UNIQUE INDEX IF NOT EXISTS idx_temporal_current ON verisimdb_temporal_versions(entity_id, table_name) WHERE valid_to IS NULL;\n\n" + CREATE UNIQUE INDEX IF NOT EXISTS ux_temporal_current\n\ + \x20 ON verisimdb_temporal_versions(entity_id, table_name)\n\ + \x20 WHERE valid_to IS NULL;\n\n" .to_string() } @@ -210,17 +286,18 @@ fn generate_temporal_table() -> String { /// evaluated at query time to filter and redact data based on the /// requesting principal's identity and roles. fn generate_access_policy_table() -> String { - "-- Access Control: row/column-level access policies\n\ + "-- Access Control: row/column-level access policies.\n\ + -- V-L2-J1: access_level is a closed set.\n\ CREATE TABLE IF NOT EXISTS verisimdb_access_policies (\n\ \x20 policy_id TEXT PRIMARY KEY,\n\ \x20 target_table TEXT NOT NULL,\n\ \x20 target_column TEXT, -- NULL means whole-row policy\n\ \x20 principal TEXT NOT NULL, -- user, role, or group identifier\n\ - \x20 access_level TEXT NOT NULL, -- read, write, admin, deny\n\ - \x20 condition TEXT, -- SQL-like filter condition\n\ + \x20 access_level TEXT NOT NULL\n\ + \x20 CHECK (access_level IN ('read','write','admin','deny')),\n\ + \x20 condition TEXT, -- SQL-like filter condition (V-L1-H1)\n\ \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ - \x20 active INTEGER NOT NULL DEFAULT 1,\n\ - \x20 CHECK (access_level IN ('read','write','admin','deny'))\n\ + \x20 active INTEGER NOT NULL DEFAULT 1 CHECK (active IN (0,1))\n\ );\n\ CREATE INDEX IF NOT EXISTS idx_access_table ON verisimdb_access_policies(target_table);\n\ CREATE INDEX IF NOT EXISTS idx_access_principal ON verisimdb_access_policies(principal);\n\n" @@ -232,7 +309,9 @@ fn generate_access_policy_table() -> String { /// Stores branched copies of data for what-if analysis. Each branch /// is isolated from the main data until explicitly merged. fn generate_simulation_table() -> String { - "-- Simulation: what-if branching and sandbox queries\n\ + "-- Simulation: what-if branching and sandbox queries.\n\ + -- V-L2-J1: status is a closed set; parent_branch is a self-FK\n\ + -- (was previously declared but un-enforced).\n\ CREATE TABLE IF NOT EXISTS verisimdb_simulation_branches (\n\ \x20 branch_id TEXT PRIMARY KEY,\n\ \x20 parent_branch TEXT REFERENCES verisimdb_simulation_branches(branch_id), -- NULL for root\n\ @@ -240,15 +319,16 @@ fn generate_simulation_table() -> String { \x20 description TEXT,\n\ \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ \x20 merged_at TEXT, -- ISO 8601, NULL if not merged\n\ - \x20 status TEXT NOT NULL DEFAULT 'active', -- active, merged, abandoned\n\ - \x20 CHECK (status IN ('active','merged','abandoned'))\n\ + \x20 status TEXT NOT NULL DEFAULT 'active'\n\ + \x20 CHECK (status IN ('active','merged','abandoned'))\n\ );\n\n\ CREATE TABLE IF NOT EXISTS verisimdb_simulation_deltas (\n\ \x20 delta_id TEXT PRIMARY KEY,\n\ \x20 branch_id TEXT NOT NULL REFERENCES verisimdb_simulation_branches(branch_id),\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 operation TEXT NOT NULL, -- insert, update, delete\n\ + \x20 operation TEXT NOT NULL\n\ + \x20 CHECK (operation IN ('insert','update','delete')), -- V-L2-J1\n\ \x20 delta_data TEXT NOT NULL, -- JSON of the change\n\ \x20 created_at TEXT NOT NULL -- ISO 8601\n\ );\n\ @@ -370,11 +450,14 @@ mod tests { let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); assert!(ddl.contains("verisimdb_temporal_versions")); assert!( - ddl.contains( - "CREATE UNIQUE INDEX IF NOT EXISTS idx_temporal_current ON verisimdb_temporal_versions(entity_id, table_name) WHERE valid_to IS NULL" - ), + ddl.contains("CREATE UNIQUE INDEX IF NOT EXISTS ux_temporal_current"), "temporal current-version index must be UNIQUE" ); + assert!( + ddl.contains("ON verisimdb_temporal_versions(entity_id, table_name)") + && ddl.contains("WHERE valid_to IS NULL"), + "temporal current-version index must be partial on valid_to IS NULL" + ); assert!( ddl.contains("CHECK (valid_to IS NULL OR valid_to >= valid_from)"), "temporal valid_to ordering CHECK missing" @@ -399,7 +482,9 @@ mod tests { assert!(ddl.contains("verisimdb_lineage_graph")); // The exact CHECK clause must be present in the emitted DDL. assert!( - ddl.contains("CHECK (source_entity <> target_entity OR source_table <> target_table)"), + ddl.contains( + "CHECK (NOT (source_entity = target_entity AND source_table = target_table))" + ), "lineage table is missing the self-reference CHECK constraint" ); } @@ -447,6 +532,24 @@ mod tests { assert!(ddl.contains("actor")); } + /// V-L2-L2: forks are forbidden by a UNIQUE INDEX on + /// (entity_id, previous_hash). + #[test] + fn test_provenance_table_has_unique_chain_index() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_provenance_chain")); + assert!(ddl.contains("(entity_id, previous_hash)")); + } + + /// V-L2-L1: chain_head table exists for per-entity write serialisation. + #[test] + fn test_provenance_table_has_chain_head() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("verisimdb_provenance_chain_head")); + assert!(ddl.contains("head_hash")); + } + #[test] fn test_temporal_table_has_versioning() { let ddl = generate_temporal_table(); @@ -455,4 +558,88 @@ mod tests { assert!(ddl.contains("valid_to")); assert!(ddl.contains("snapshot")); } + + /// V-L2-H1: the partial UNIQUE INDEX enforces exactly-one-current. + #[test] + fn test_temporal_table_has_partial_unique_index() { + let ddl = generate_temporal_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_temporal_current")); + assert!(ddl.contains("WHERE valid_to IS NULL")); + } + + /// V-L2-H2: valid_to must not predate valid_from. + #[test] + fn test_temporal_table_has_valid_to_check() { + let ddl = generate_temporal_table(); + assert!(ddl.contains("valid_to IS NULL OR valid_to >= valid_from")); + } + + /// V-L2-I1: lineage self-edges are forbidden by CHECK. + #[test] + fn test_lineage_table_forbids_self_edges() { + let ddl = generate_lineage_table(); + assert!(ddl.contains("NOT (source_entity = target_entity")); + } + + /// V-L2-J1: simulation status is a closed set; parent_branch FK exists. + #[test] + fn test_simulation_table_constraints() { + let ddl = generate_simulation_table(); + assert!(ddl.contains("REFERENCES verisimdb_simulation_branches(branch_id)")); + assert!(ddl.contains("status IN ('active','merged','abandoned')")); + assert!(ddl.contains("operation IN ('insert','update','delete')")); + } + + /// V-L2-J1: provenance, lineage, access enum CHECKs. + #[test] + fn test_enum_checks() { + let prov = generate_provenance_table(); + assert!(prov.contains("operation IN ('insert','update','delete','transform')")); + + let lin = generate_lineage_table(); + assert!( + lin.contains("derivation_type IN ('copy','transform','aggregate','join','filter')") + ); + + let acc = generate_access_policy_table(); + assert!(acc.contains("access_level IN ('read','write','admin','deny')")); + } + + /// V-L2-G1: identifier validator accepts safe names, rejects everything + /// outside `^[A-Za-z_][A-Za-z0-9_]*$`. This is the codegen-side guard + /// against SQL injection via table/column names. + #[test] + fn test_validate_identifier_accepts_safe() { + for ok in &["posts", "Posts", "_x", "x_1", "Post_2026"] { + assert!( + validate_identifier(ok).is_ok(), + "{:?} should be accepted", + ok + ); + } + } + + #[test] + fn test_validate_identifier_rejects_unsafe() { + let attacks = [ + "", // empty + "1posts", // leading digit + "po sts", // space + "posts;", // statement terminator + "posts'); DROP TABLE x;--", // classic injection + "posts\"", // quote + "posts`", // backtick + "posts/*", // comment open + "schema.table", // dotted + "ΓΌnicode", // non-ASCII + ]; + for attack in &attacks { + assert!( + validate_identifier(attack).is_err(), + "{:?} should be rejected", + attack + ); + } + } } diff --git a/src/codegen/query.rs b/src/codegen/query.rs index 2694a87..5cfc6ee 100644 --- a/src/codegen/query.rs +++ b/src/codegen/query.rs @@ -445,7 +445,10 @@ mod tests { }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); - let view = interceptors[0].provenance_view.as_ref().expect("TODO: handle error"); + let view = interceptors[0] + .provenance_view + .as_ref() + .expect("TODO: handle error"); assert!(view.contains("verisimdb_posts_with_provenance")); assert!(view.contains("posts.id")); assert!(view.contains("posts.title")); @@ -465,7 +468,10 @@ mod tests { }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); - let view = interceptors[0].temporal_view.as_ref().expect("TODO: handle error"); + let view = interceptors[0] + .temporal_view + .as_ref() + .expect("TODO: handle error"); assert!(view.contains("verisimdb_posts_with_temporal")); assert!(view.contains("verisimdb_temporal_versions")); assert!(view.contains("valid_to IS NULL")); @@ -519,8 +525,11 @@ mod tests { #[test] fn test_entity_id_expr_composite_mongodb_uses_plus_concat() { - let expr = - build_entity_id_expr(&["account_id", "txn_id"], "ledger", DatabaseBackend::MongoDB); + let expr = build_entity_id_expr( + &["account_id", "txn_id"], + "ledger", + DatabaseBackend::MongoDB, + ); assert!(expr.contains("ledger.account_id")); assert!(expr.contains("ledger.txn_id")); // MongoDB concat operator is `+`, not `||`. diff --git a/src/doctor.rs b/src/doctor.rs index 50fd2a0..0563dd5 100644 --- a/src/doctor.rs +++ b/src/doctor.rs @@ -58,7 +58,8 @@ fn check_command_in_path(cmd: &str, description: &str) -> ValidationCheck { passed: false, detail: Some(format!( "`{} --version` exited with status {:?}", - cmd, out.status.code() + cmd, + out.status.code() )), }, Err(e) => ValidationCheck { diff --git a/src/gc.rs b/src/gc.rs index 2d3c5f5..16fb4de 100644 --- a/src/gc.rs +++ b/src/gc.rs @@ -13,7 +13,7 @@ use chrono::{Duration, Utc}; use rusqlite::Connection; use serde::Serialize; -use crate::manifest::{Manifest, RetentionConfig}; +use crate::manifest::Manifest; /// Number of rows purged per dimension by [`run_gc`]. #[derive(Debug, Clone, Serialize, Default)] @@ -135,16 +135,12 @@ fn purge_by_age( #[cfg(test)] mod tests { - use super::{RetentionConfig, run_gc}; - use crate::manifest::{Manifest, SidecarConfig}; + use super::run_gc; + use crate::manifest::{Manifest, RetentionConfig, SidecarConfig}; use rusqlite::Connection; /// Build a Manifest with a temp SQLite sidecar, retention set as given. - fn fixture( - sidecar_path: &str, - retention: RetentionConfig, - storage: &str, - ) -> Manifest { + fn fixture(sidecar_path: &str, retention: RetentionConfig, storage: &str) -> Manifest { let mut m: Manifest = toml::from_str( "[database]\n\ backend = \"sqlite\"\n", @@ -246,7 +242,9 @@ mod tests { // Verify nothing was actually deleted. let conn = Connection::open(sidecar_str).unwrap(); let n: i64 = conn - .query_row("SELECT COUNT(*) FROM verisimdb_provenance_log", [], |r| r.get(0)) + .query_row("SELECT COUNT(*) FROM verisimdb_provenance_log", [], |r| { + r.get(0) + }) .unwrap(); assert_eq!(n, 2, "dry-run must not delete"); } @@ -272,14 +270,20 @@ mod tests { let conn = Connection::open(sidecar_str).unwrap(); let provenance_count: i64 = conn - .query_row("SELECT COUNT(*) FROM verisimdb_provenance_log", [], |r| r.get(0)) + .query_row("SELECT COUNT(*) FROM verisimdb_provenance_log", [], |r| { + r.get(0) + }) .unwrap(); assert_eq!(provenance_count, 1, "fresh provenance kept"); // The current temporal version (e2, valid_to IS NULL) must survive // even though it is old enough to qualify on valid_from. let temporal_count: i64 = conn - .query_row("SELECT COUNT(*) FROM verisimdb_temporal_versions", [], |r| r.get(0)) + .query_row( + "SELECT COUNT(*) FROM verisimdb_temporal_versions", + [], + |r| r.get(0), + ) .unwrap(); assert_eq!(temporal_count, 2); let current_survived: i64 = conn diff --git a/src/intercept/sqlite.rs b/src/intercept/sqlite.rs index 41db81f..ad5f658 100755 --- a/src/intercept/sqlite.rs +++ b/src/intercept/sqlite.rs @@ -13,8 +13,8 @@ // V-L1-C1 (#46): sqlite3_update_hook + sidecar provenance writer. use crate::tier1::provenance::append_provenance; -use rusqlite::hooks::Action; use rusqlite::Connection; +use rusqlite::hooks::Action; use std::sync::{Arc, Mutex}; /// Type alias for a per-call entity-id resolver. Given `(table, rowid)` @@ -71,33 +71,27 @@ impl SqliteInterceptor { let sidecar = Arc::clone(&self.sidecar); let actor = self.actor.clone(); let resolver = Arc::clone(&self.resolver); - target.update_hook(Some(move |action: Action, _db: &str, table: &str, rowid: i64| { - let op = match action { - Action::SQLITE_INSERT => "insert", - Action::SQLITE_UPDATE => "update", - Action::SQLITE_DELETE => "delete", - _ => return, // unknown action β€” skip - }; - let entity_id = resolver(table, rowid); + target.update_hook(Some( + move |action: Action, _db: &str, table: &str, rowid: i64| { + let op = match action { + Action::SQLITE_INSERT => "insert", + Action::SQLITE_UPDATE => "update", + Action::SQLITE_DELETE => "delete", + _ => return, // unknown action β€” skip + }; + let entity_id = resolver(table, rowid); - // Lock the sidecar and append. We swallow errors here - // because the hook is invoked from inside SQLite's - // transaction machinery β€” a panic could destabilise the - // target connection. Errors are observable later via - // `verify_chain` returning Ok(false) or by inspecting - // the sidecar log. - if let Ok(mut conn) = sidecar.lock() { - let _ = append_provenance( - &mut conn, - &entity_id, - table, - op, - &actor, - None, - None, - ); - } - })); + // Lock the sidecar and append. We swallow errors here + // because the hook is invoked from inside SQLite's + // transaction machinery β€” a panic could destabilise the + // target connection. Errors are observable later via + // `verify_chain` returning Ok(false) or by inspecting + // the sidecar log. + if let Ok(mut conn) = sidecar.lock() { + let _ = append_provenance(&mut conn, &entity_id, table, op, &actor, None, None); + } + }, + )); } /// Borrow the sidecar connection for read-only queries (e.g. @@ -174,7 +168,10 @@ mod tests { ) .unwrap(); target - .execute("UPDATE users SET name = ?1 WHERE id = ?2", params!["Alicia", 1i64]) + .execute( + "UPDATE users SET name = ?1 WHERE id = ?2", + params!["Alicia", 1i64], + ) .unwrap(); target .execute("DELETE FROM users WHERE id = ?1", params![1i64]) @@ -249,14 +246,16 @@ mod tests { #[test] fn custom_resolver_overrides_rowid_default() { let target = fresh_target(); - let resolver: EntityIdResolver = - Arc::new(|table, rowid| format!("{table}#{rowid}")); - let interceptor = SqliteInterceptor::new(fresh_sidecar(), "test-actor") - .with_resolver(resolver); + let resolver: EntityIdResolver = Arc::new(|table, rowid| format!("{table}#{rowid}")); + let interceptor = + SqliteInterceptor::new(fresh_sidecar(), "test-actor").with_resolver(resolver); interceptor.install(&target); target - .execute("INSERT INTO users (id, name) VALUES (?1, ?2)", params![1i64, "Alice"]) + .execute( + "INSERT INTO users (id, name) VALUES (?1, ?2)", + params![1i64, "Alice"], + ) .unwrap(); let sidecar = interceptor.sidecar(); diff --git a/src/main.rs b/src/main.rs index ce72aa1..9733ca8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -163,7 +163,7 @@ fn main() -> Result<()> { }; // Determine the backend for SQL dialect selection. - let backend_name = m.database.effective_backend(); + let backend_name = m.database.effective_backend()?; let backend = abi::DatabaseBackend::from_str(backend_name) .unwrap_or(abi::DatabaseBackend::PostgreSQL); @@ -222,8 +222,8 @@ fn main() -> Result<()> { } let conn = rusqlite::Connection::open(&m.sidecar.path)?; // Distinct entity_ids that have at least one row in temporal_versions. - let mut stmt = conn - .prepare("SELECT DISTINCT entity_id FROM verisimdb_temporal_versions")?; + let mut stmt = + conn.prepare("SELECT DISTINCT entity_id FROM verisimdb_temporal_versions")?; let entities: Vec = stmt .query_map([], |r| r.get::<_, String>(0))? .collect::>()?; @@ -235,10 +235,7 @@ fn main() -> Result<()> { continue; }; if report.overall_score >= threshold { - println!( - " {} drift={:.3}", - report.entity_id, report.overall_score - ); + println!(" {} drift={:.3}", report.entity_id, report.overall_score); reported += 1; } } @@ -278,7 +275,7 @@ fn main() -> Result<()> { let report = manifest::status_report(&m); println!("{}", serde_json::to_string_pretty(&report)?); } else { - manifest::print_status(&m); + manifest::print_status(&m)?; } Ok(()) } @@ -308,8 +305,15 @@ fn main() -> Result<()> { if json { println!("{}", serde_json::to_string_pretty(&report)?); } else { - let action = if report.dry_run { "would delete" } else { "deleted" }; - println!("verisimiser gc ({}):", if report.dry_run { "dry-run" } else { "apply" }); + let action = if report.dry_run { + "would delete" + } else { + "deleted" + }; + println!( + "verisimiser gc ({}):", + if report.dry_run { "dry-run" } else { "apply" } + ); println!(" sidecar: {}", report.sidecar); println!(" provenance: {action} {} rows", report.provenance_deleted); println!(" temporal: {action} {} rows", report.temporal_deleted); @@ -339,11 +343,7 @@ fn main() -> Result<()> { /// Render a `ValidationReport` (from `validate` or `doctor`) and exit /// non-zero if any check failed. Plain-text by default; JSON when /// `json == true`. -fn emit_report( - report: &manifest::ValidationReport, - json: bool, - kind: &str, -) -> Result<()> { +fn emit_report(report: &manifest::ValidationReport, json: bool, kind: &str) -> Result<()> { if json { println!("{}", serde_json::to_string_pretty(report)?); } else { diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 31d5ff6..444ae1c 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -113,14 +113,28 @@ impl Default for DatabaseConfig { } impl DatabaseConfig { - /// Returns the effective backend name, considering legacy `target_db` field. - pub fn effective_backend(&self) -> &str { - if !self.backend.is_empty() && self.backend != "postgresql" { - &self.backend - } else if !self.target_db.is_empty() { - &self.target_db - } else { - &self.backend + /// Returns the effective backend name. + /// + /// `target-db` is a legacy field kept for backward compatibility with the + /// old manifest schema. The new field is `backend`. If both are set to + /// distinct values, refuse rather than silently picking one β€” value-based + /// tie-breaking (the previous behaviour) silently picked sqlite when a + /// user set `backend = "postgresql"` alongside `target-db = "sqlite"` + /// (V-L2-E1). + pub fn effective_backend(&self) -> Result<&str> { + let new_set = !self.backend.is_empty(); + let old_set = !self.target_db.is_empty(); + match (new_set, old_set) { + (true, true) if self.backend != self.target_db => anyhow::bail!( + "verisimiser.toml sets both [database].backend = {:?} and \ + [database].target-db = {:?}. target-db is the legacy field; \ + remove it and keep backend.", + self.backend, + self.target_db + ), + (true, _) => Ok(self.backend.as_str()), + (false, true) => Ok(self.target_db.as_str()), + (false, false) => Ok("postgresql"), } } } @@ -341,11 +355,7 @@ mod validate_manifest_tests { std::fs::write(&path, body).expect("write"); let report = validate_manifest(path.to_str().unwrap()); - assert!( - report.passed, - "expected pass; checks: {:?}", - report.checks - ); + assert!(report.passed, "expected pass; checks: {:?}", report.checks); assert!(report.failed_count() == 0); } @@ -421,10 +431,7 @@ mod load_manifest_tests { // The exact line/column varies with toml's internal pointer, but // there must be a `:::` somewhere in the message. let span_re = regex_like_line_col(&msg); - assert!( - span_re, - "error must include filename:line:col; got: {msg}" - ); + assert!(span_re, "error must include filename:line:col; got: {msg}"); } /// Lightweight substitute for a regex match (no regex crate added): @@ -552,12 +559,7 @@ pub fn load_manifest(path: &str) -> Result { fn byte_offset_to_line_col(text: &str, offset: usize) -> (usize, usize) { let prefix = &text[..offset.min(text.len())]; let line = prefix.bytes().filter(|b| *b == b'\n').count() + 1; - let col = prefix - .bytes() - .rev() - .take_while(|b| *b != b'\n') - .count() - + 1; + let col = prefix.bytes().rev().take_while(|b| *b != b'\n').count() + 1; (line, col) } @@ -642,7 +644,7 @@ lineage-days = {lineage_days} #[cfg(test)] mod init_template_tests { - use super::{render_manifest_template, Manifest, OctadConfig}; + use super::{Manifest, OctadConfig, render_manifest_template}; #[test] fn template_round_trips_through_toml() { @@ -873,7 +875,11 @@ pub fn status_report(manifest: &Manifest) -> StatusReport { }; StatusReport { name, - backend: manifest.database.effective_backend().to_string(), + backend: manifest + .database + .effective_backend() + .unwrap_or(manifest.database.backend.as_str()) + .to_string(), sidecar_path: manifest.sidecar.path.clone(), sidecar_storage: manifest.sidecar.storage.clone(), octad: OctadStatus { @@ -891,14 +897,14 @@ pub fn status_report(manifest: &Manifest) -> StatusReport { } /// Print a human-readable status summary of a loaded manifest. -pub fn print_status(manifest: &Manifest) { +pub fn print_status(manifest: &Manifest) -> Result<()> { let name = if !manifest.project.name.is_empty() { &manifest.project.name } else { &manifest.verisimiser.name }; - let backend = manifest.database.effective_backend(); + let backend = manifest.database.effective_backend()?; println!("=== VeriSimiser: {} ===", name); println!("Backend: {}", backend); @@ -908,6 +914,7 @@ pub fn print_status(manifest: &Manifest) { ); println!(); + let on_off = |b: bool| if b { "ON" } else { "off" }; println!( "Octad Dimensions ({}/8 enabled):", manifest.octad.enabled_count() @@ -916,50 +923,147 @@ pub fn print_status(manifest: &Manifest) { println!(" Metadata: ALWAYS ON (schema introspection)"); println!( " Provenance: {}", - if manifest.octad.enable_provenance { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_provenance) ); println!( " Lineage: {}", - if manifest.octad.enable_lineage { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_lineage) ); println!( " Constraints: {}", - if manifest.octad.enable_constraints { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_constraints) ); println!( " Access Control: {}", - if manifest.octad.enable_access_control { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_access_control) ); println!( " Temporal: {}", - if manifest.octad.enable_temporal { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_temporal) ); println!( " Simulation: {}", - if manifest.octad.enable_simulation { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_simulation) ); + Ok(()) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + /// V-L2-D1: enabled_count is bounded by 2..=8 for every flag combination. + #[test] + fn test_enabled_count_bounds() { + for mask in 0u8..64 { + let octad = OctadConfig { + enable_provenance: mask & 0b000001 != 0, + enable_lineage: mask & 0b000010 != 0, + enable_temporal: mask & 0b000100 != 0, + enable_access_control: mask & 0b001000 != 0, + enable_constraints: mask & 0b010000 != 0, + enable_simulation: mask & 0b100000 != 0, + }; + let c = octad.enabled_count(); + assert!( + (2..=8).contains(&c), + "enabled_count out of range for mask={:#08b}: got {}", + mask, + c + ); + } + } + + /// V-L2-D1: enabled_count exactly equals 2 + popcount(toggles). + #[test] + fn test_enabled_count_arithmetic() { + let octad = OctadConfig { + enable_provenance: true, + enable_lineage: false, + enable_temporal: true, + enable_access_control: false, + enable_constraints: true, + enable_simulation: false, + }; + assert_eq!(octad.enabled_count(), 2 + 3); + } + + /// V-L2-E1: setting both backend and target_db to the *same* value + /// is harmless β€” single source of truth. + #[test] + fn test_effective_backend_agreement() { + let cfg = DatabaseConfig { + backend: "sqlite".to_string(), + target_db: "sqlite".to_string(), + ..Default::default() + }; + assert_eq!(cfg.effective_backend().unwrap(), "sqlite"); + } + + /// V-L2-E1: setting both to *conflicting* values must error loudly. + #[test] + fn test_effective_backend_conflict_errors() { + let cfg = DatabaseConfig { + backend: "postgresql".to_string(), + target_db: "sqlite".to_string(), + ..Default::default() + }; + let err = cfg.effective_backend().unwrap_err().to_string(); + assert!( + err.contains("postgresql"), + "error mentions modern field value" + ); + assert!(err.contains("sqlite"), "error mentions legacy field value"); + } + + /// V-L2-E1: modern-only and legacy-only both work. + #[test] + fn test_effective_backend_single_source() { + let modern = DatabaseConfig { + backend: "sqlite".to_string(), + target_db: String::new(), + ..Default::default() + }; + assert_eq!(modern.effective_backend().unwrap(), "sqlite"); + + let legacy = DatabaseConfig { + backend: String::new(), + target_db: "mongodb".to_string(), + ..Default::default() + }; + assert_eq!(legacy.effective_backend().unwrap(), "mongodb"); + } + + /// V-L2-E1: with nothing set, default is postgresql. + #[test] + fn test_effective_backend_default() { + let cfg = DatabaseConfig { + backend: String::new(), + target_db: String::new(), + ..Default::default() + }; + assert_eq!(cfg.effective_backend().unwrap(), "postgresql"); + } + + /// V-L2-O1: init_manifest template reflects OctadConfig::default(). + #[test] + fn test_init_manifest_template_uses_defaults() { + // We can't actually call init_manifest in a unit test (it writes + // to CWD), but we can check that the template *would* be + // consistent by computing what it would emit and asserting + // the toggle lines match Default. + let defaults = OctadConfig::default(); + // If a future patch flips a default, this test makes the + // template-vs-Default invariant visible. + assert!(defaults.enable_provenance); + assert!(defaults.enable_lineage); + assert!(defaults.enable_temporal); + assert!(defaults.enable_access_control); + assert!(defaults.enable_constraints); + assert!(!defaults.enable_simulation); + } } diff --git a/src/tier1/drift.rs b/src/tier1/drift.rs index 0d8eb97..44d17e1 100644 --- a/src/tier1/drift.rs +++ b/src/tier1/drift.rs @@ -107,7 +107,7 @@ pub fn temporal_drift_score(versions: &[i64]) -> f64 { #[cfg(test)] mod temporal_drift_tests { - use super::{detect_temporal_drift, temporal_drift_score, DriftCategory}; + use super::{DriftCategory, detect_temporal_drift, temporal_drift_score}; use rusqlite::Connection; /// Identical versions β†’ score 0.0. diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index fc0c49e..cbe90f3 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -15,7 +15,7 @@ // β€” see ADR-0002 / #27); this module just persists the entries. use chrono::{DateTime, Utc}; -use rusqlite::{params, Connection, TransactionBehavior}; +use rusqlite::{Connection, TransactionBehavior, params}; // ========================================================================= // Canonical entry shape @@ -255,16 +255,8 @@ mod tests { #[test] fn genesis_entry_chains_from_empty() { let mut conn = open_sidecar(); - let hash = append_provenance( - &mut conn, - "e1", - "users", - "insert", - "alice", - None, - None, - ) - .unwrap(); + let hash = + append_provenance(&mut conn, "e1", "users", "insert", "alice", None, None).unwrap(); assert!(!hash.is_empty()); let prev: String = conn @@ -289,10 +281,8 @@ mod tests { #[test] fn sequential_appends_chain_correctly() { let mut conn = open_sidecar(); - let h1 = append_provenance( - &mut conn, "e1", "users", "insert", "alice", None, None, - ) - .unwrap(); + let h1 = + append_provenance(&mut conn, "e1", "users", "insert", "alice", None, None).unwrap(); let h2 = append_provenance( &mut conn, "e1", @@ -303,10 +293,7 @@ mod tests { None, ) .unwrap(); - let h3 = append_provenance( - &mut conn, "e1", "users", "delete", "bob", None, None, - ) - .unwrap(); + let h3 = append_provenance(&mut conn, "e1", "users", "delete", "bob", None, None).unwrap(); assert_ne!(h1, h2); assert_ne!(h2, h3); diff --git a/src/tier1/temporal.rs b/src/tier1/temporal.rs index a508956..3dbc415 100644 --- a/src/tier1/temporal.rs +++ b/src/tier1/temporal.rs @@ -16,7 +16,7 @@ // NULL` row hanging around. use chrono::{DateTime, Utc}; -use rusqlite::{params, Connection, TransactionBehavior}; +use rusqlite::{Connection, TransactionBehavior, params}; use serde::{Deserialize, Serialize}; // ========================================================================= @@ -97,14 +97,13 @@ pub fn append_version( ) -> rusqlite::Result { let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?; - let prev_version: i64 = tx - .query_row( - "SELECT COALESCE(MAX(version), 0) \ + let prev_version: i64 = tx.query_row( + "SELECT COALESCE(MAX(version), 0) \ FROM verisimdb_temporal_versions \ WHERE entity_id = ?1 AND table_name = ?2", - params![entity_id, table_name], - |row| row.get(0), - )?; + params![entity_id, table_name], + |row| row.get(0), + )?; let next_version = prev_version + 1; let now = Utc::now(); @@ -258,8 +257,7 @@ mod tests { #[test] fn genesis_append_starts_at_version_one() { let mut conn = open_sidecar(); - let v = append_version(&mut conn, "e1", "users", "{\"name\":\"Alice\"}", "insert") - .unwrap(); + let v = append_version(&mut conn, "e1", "users", "{\"name\":\"Alice\"}", "insert").unwrap(); assert_eq!(v, 1); } @@ -342,7 +340,10 @@ mod tests { std::thread::sleep(std::time::Duration::from_millis(20)); append_version(&mut conn, "e1", "users", "{\"v\":1}", "insert").unwrap(); let snap = read_at(&conn, "e1", "users", &before).unwrap(); - assert!(snap.is_none(), "no version exists at a time before any insert"); + assert!( + snap.is_none(), + "no version exists at a time before any insert" + ); } #[test] diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 96d3948..d387f95 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -78,8 +78,7 @@ fn test_full_pipeline_blog_schema() { enable_constraints: true, enable_simulation: false, }; - let overlay_ddl = - overlay::generate_sidecar_schema(&schema, &octad).expect("schema is valid"); + let overlay_ddl = overlay::generate_sidecar_schema(&schema, &octad).expect("schema is valid"); // Verify all expected sidecar tables are present. assert!( @@ -236,7 +235,7 @@ vector = false assert_eq!(manifest.verisimiser.name, "legacy-db"); assert_eq!(manifest.database.target_db, "postgresql"); - assert_eq!(manifest.database.effective_backend(), "postgresql"); + assert_eq!(manifest.database.effective_backend().unwrap(), "postgresql"); assert!(manifest.tier1.provenance); assert!(manifest.tier1.temporal_versioning); assert!(manifest.tier1.drift_detection); @@ -284,7 +283,14 @@ fn test_provenance_chain_integrity_multi_step() { tampered_op.operation = "delete".to_string(); assert!( !tampered_op.verify(), - "Tampering with operation should break verification" + "tampering with operation must break verify" + ); + + let mut tampered_snap = update1.clone(); + tampered_snap.before_snapshot = Some("{}".into()); + assert!( + !tampered_snap.verify(), + "before_snapshot is part of the hash; tampering with it must break verify" ); } @@ -436,7 +442,9 @@ fn test_end_to_end_file_workflow() { .unwrap(); } - // Write a manifest file. + // Write a manifest file. Note: on Windows, schema_path uses backslashes + // which are escape characters in TOML basic strings β€” emit the path as a + // TOML literal string (single-quoted) to dodge escape interpretation. let manifest_path = dir.path().join("verisimiser.toml"); { let mut f = std::fs::File::create(&manifest_path).unwrap(); @@ -449,7 +457,7 @@ name = "test-articles" [database] backend = "sqlite" connection-string-env = "TEST_DB" -schema-source = "{}" +schema-source = '{}' [octad] enable-provenance = true @@ -478,8 +486,8 @@ path = ".verisim/test.db" assert_eq!(schema.tables[0].name, "articles"); // Generate overlay. - let overlay_ddl = overlay::generate_sidecar_schema(&schema, &manifest.octad) - .expect("schema is valid"); + let overlay_ddl = + overlay::generate_sidecar_schema(&schema, &manifest.octad).expect("schema is valid"); assert!(overlay_ddl.contains("verisimdb_provenance_log")); assert!(overlay_ddl.contains("verisimdb_temporal_versions")); assert!( diff --git a/tests/sqlite_intercept_e2e.rs b/tests/sqlite_intercept_e2e.rs index 219cc27..39ebc2c 100755 --- a/tests/sqlite_intercept_e2e.rs +++ b/tests/sqlite_intercept_e2e.rs @@ -17,7 +17,7 @@ // (WAL, file locks, separate processes-files) rather than the // in-memory shortcut used by unit tests. -use rusqlite::{params, Connection}; +use rusqlite::{Connection, params}; use std::sync::Arc; use tempfile::TempDir; use verisimiser::intercept::sqlite::{EntityIdResolver, SqliteInterceptor}; @@ -43,10 +43,8 @@ fn setup() -> (TempDir, Connection, SqliteInterceptor) { // Resolver: route rowid to a logical entity id `accounts:N` so // the sidecar entries are human-readable. - let resolver: EntityIdResolver = - Arc::new(|table, rowid| format!("{table}:{rowid}")); - let interceptor = SqliteInterceptor::new(sidecar, "e2e-test") - .with_resolver(resolver); + let resolver: EntityIdResolver = Arc::new(|table, rowid| format!("{table}:{rowid}")); + let interceptor = SqliteInterceptor::new(sidecar, "e2e-test").with_resolver(resolver); interceptor.install(&target); (tmp, target, interceptor) @@ -135,7 +133,10 @@ fn e2e_mixed_workload_verifies_all_chains() { |r| r.get(0), ) .unwrap(); - assert_eq!(leaked, 0, "verisimdb_* tables must not leak into the target"); + assert_eq!( + leaked, 0, + "verisimdb_* tables must not leak into the target" + ); } #[test] @@ -151,7 +152,10 @@ fn e2e_chain_survives_reopen_of_sidecar() { ) .unwrap(); target - .execute("UPDATE accounts SET balance = 2000 WHERE id = ?1", params![42i64]) + .execute( + "UPDATE accounts SET balance = 2000 WHERE id = ?1", + params![42i64], + ) .unwrap(); // Drop the interceptor (and its sidecar handle); reopen and verify.