diff --git a/.machine_readable/6a2/STATE.a2ml b/.machine_readable/6a2/STATE.a2ml index 3a5106c..9a97dbe 100644 --- a/.machine_readable/6a2/STATE.a2ml +++ b/.machine_readable/6a2/STATE.a2ml @@ -26,8 +26,8 @@ milestones = [ { name = "State machine (claim/promote/reject/requeue/invalidate) + events.jsonl", completion = 100 }, { name = "`check` command (Agda typecheck + lint verdict)", completion = 100 }, { name = "Multi-root reachability (union of discovered CI roots)", completion = 100 }, + { name = "Content-hash invalidation of proven (file-level; +imports deferred)", completion = 75 }, { name = "Remaining lint rules (unpinned-headline, unused-import)", completion = 0 }, - { name = "Content-hash invalidation of proven", completion = 0 }, { name = "Groove service manifest (/.well-known/groove)", completion = 0 }, { name = "RSR retrofit (.machine_readable, Justfile, .well-known)", completion = 60 }, ] @@ -38,8 +38,8 @@ milestones = [ [critical-next-actions] actions = [ - "Add unpinned-headline + unused-import lint rules", - "Content-hash proven files (SHA-256 of file + imports) for auto-invalidation", + "Add unpinned-headline (needs config + headline regex) + unused-import (agda-unused) lint rules", + "Extend content-hash to cover transitive imports (needs include-root tracking)", "Serve /.well-known/groove for PanLL discovery", "Scaffold arghda-studio (AffineScript visual layer consuming the dag JSON)", ] diff --git a/CHANGELOG.md b/CHANGELOG.md index 5698912..c63d409 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,11 @@ All notable changes to arghda-core are documented here. The format follows (warn). - RSR scaffolding: `.machine_readable/6a2/` artefacts, `0-AI-MANIFEST.a2ml`, `Justfile`, `.well-known/`, and community-health files. +- Content-hash invalidation of `proven`: promotion records a SHA-256 of the + file in `.arghda/hashes.json`; the `stale` command reports proven files + edited since promotion, and `stale --invalidate` moves them back to inbox + (the `proven -> inbox` invalidation). Dependency-free SHA-256, pinned + against the NIST test vectors. ### Notes diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..eaea825 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,123 @@ +//! Dependency-free SHA-256 (FIPS 180-4). +//! +//! Used for `proven`-state content hashing — detecting when a file changes +//! after it was promoted, so the `proven -> inbox` invalidation in +//! `arghda-spec.adoc` can fire. This is change-detection, not a security +//! boundary, and the crate's ethos is lightweight, so we keep it hermetic +//! (no `sha2` dependency tree) and pin correctness against the canonical +//! NIST vectors in the tests below. + +const H0: [u32; 8] = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +const K: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +/// Lowercase hex SHA-256 digest of `data`. +pub fn sha256_hex(data: &[u8]) -> String { + // Pre-processing: append 0x80, pad with zeros to 56 mod 64, append the + // 64-bit big-endian bit length. + let mut msg = data.to_vec(); + let bit_len = (data.len() as u64).wrapping_mul(8); + msg.push(0x80); + while msg.len() % 64 != 56 { + msg.push(0); + } + msg.extend_from_slice(&bit_len.to_be_bytes()); + + let mut h = H0; + for chunk in msg.chunks_exact(64) { + let mut w = [0u32; 64]; + for (i, word) in chunk.chunks_exact(4).take(16).enumerate() { + w[i] = u32::from_be_bytes([word[0], word[1], word[2], word[3]]); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut hh] = h; + for (&ki, &wi) in K.iter().zip(w.iter()) { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let t1 = hh + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(ki) + .wrapping_add(wi); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let t2 = s0.wrapping_add(maj); + hh = g; + g = f; + f = e; + e = d.wrapping_add(t1); + d = c; + c = b; + b = a; + a = t1.wrapping_add(t2); + } + for (slot, v) in h.iter_mut().zip([a, b, c, d, e, f, g, hh]) { + *slot = slot.wrapping_add(v); + } + } + + let mut out = String::with_capacity(64); + for word in h { + out.push_str(&format!("{word:08x}")); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + // Canonical FIPS 180-4 / NIST test vectors. + #[test] + fn empty_string() { + assert_eq!( + sha256_hex(b""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ); + } + + #[test] + fn abc() { + assert_eq!( + sha256_hex(b"abc"), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + ); + } + + #[test] + fn hello_world() { + assert_eq!( + sha256_hex(b"hello world"), + "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" + ); + } + + #[test] + fn multi_block() { + // 56 bytes forces a second padding block (length straddles the boundary). + let input = "a".repeat(1000); + assert_eq!( + sha256_hex(input.as_bytes()), + "41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3" + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 5087e00..29dac5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,9 @@ pub mod dag; pub mod diagnostic; pub mod event; pub mod graph; +pub mod hash; pub mod lint; +pub mod proven; pub mod timestamp; pub mod watcher; pub mod workspace; @@ -19,4 +21,4 @@ pub use diagnostic::{Diagnostic, LintReport, Severity}; pub use event::{Event, EventKind}; pub use graph::{build as build_graph, ImportGraph}; pub use lint::{default_rules, run_lints, LintRule}; -pub use workspace::{State, Workspace}; +pub use workspace::{StaleEntry, State, Workspace}; diff --git a/src/main.rs b/src/main.rs index a6cd7cc..6b9d7fd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,6 +71,13 @@ enum Cmd { Invalidate { workspace: PathBuf, file: String }, /// Print the workspace event log. Events { workspace: PathBuf }, + /// List proven files whose content changed since promotion (stale). + Stale { + workspace: PathBuf, + /// Move stale files back to inbox (proven -> inbox). + #[arg(long)] + invalidate: bool, + }, /// Watch `inbox/` and `working/` in a workspace; print events. Watch { workspace: PathBuf }, } @@ -105,6 +112,10 @@ fn main() -> Result<()> { transition(&workspace, &file, State::Proven, State::Inbox)? } Cmd::Events { workspace } => events(&workspace)?, + Cmd::Stale { + workspace, + invalidate, + } => stale(&workspace, invalidate)?, Cmd::Watch { workspace } => watch(&workspace)?, } Ok(()) @@ -290,6 +301,30 @@ fn events(workspace: &Path) -> Result<()> { Ok(()) } +fn stale(workspace: &Path, invalidate: bool) -> Result<()> { + let ws = Workspace::open(workspace)?; + let stale = ws.stale_proven()?; + if stale.is_empty() { + println!("(no stale proven files)"); + return Ok(()); + } + for s in &stale { + println!("stale: {} ({})", s.file, s.reason); + } + if invalidate { + for s in &stale { + ws.transition( + &s.file, + State::Proven, + State::Inbox, + Some(format!("auto-invalidated: {}", s.reason)), + )?; + println!("invalidated: {} (proven -> inbox)", s.file); + } + } + Ok(()) +} + fn watch(workspace_path: &Path) -> Result<()> { let ws = Workspace::open(workspace_path)?; let inbox = ws.state_dir(State::Inbox); diff --git a/src/proven.rs b/src/proven.rs new file mode 100644 index 0000000..b98cfc3 --- /dev/null +++ b/src/proven.rs @@ -0,0 +1,66 @@ +//! The `proven`-state content-hash manifest (`.arghda/hashes.json`). +//! +//! When a file is promoted to `proven`, its content hash is recorded here. +//! [`crate::Workspace::stale_proven`] recomputes and compares, so a `proven` +//! file that was edited (or never recorded) after promotion is surfaced and +//! can be sent back to `inbox` — the `proven -> inbox` invalidation from +//! `docs/arghda-spec.adoc`. +//! +//! v1 hashes the file content only. Hashing a file *plus its transitive +//! imports* (the spec's full form) needs the workspace to know the source +//! tree's include root, which the flat triage layout does not yet carry; +//! that is a documented follow-on. + +use crate::hash::sha256_hex; +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; + +pub const HASHES_FILE: &str = "hashes.json"; + +/// What was recorded when a file entered `proven`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ProvenRecord { + pub sha256: String, + pub promoted_at: String, +} + +/// The whole manifest: basename -> record. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct ProvenManifest { + #[serde(default)] + pub entries: BTreeMap, +} + +fn manifest_path(ws_root: &Path) -> PathBuf { + ws_root.join(".arghda").join(HASHES_FILE) +} + +/// Load the manifest, or an empty one if it does not exist yet. +pub fn load(ws_root: &Path) -> Result { + let path = manifest_path(ws_root); + if !path.is_file() { + return Ok(ProvenManifest::default()); + } + let contents = + fs::read_to_string(&path).with_context(|| format!("reading {}", path.display()))?; + serde_json::from_str(&contents).with_context(|| format!("parsing {}", path.display())) +} + +/// Write the manifest, creating `.arghda/` if needed. +pub fn save(ws_root: &Path, manifest: &ProvenManifest) -> Result<()> { + let meta = ws_root.join(".arghda"); + fs::create_dir_all(&meta).with_context(|| format!("creating {}", meta.display()))?; + let path = manifest_path(ws_root); + let mut json = serde_json::to_string_pretty(manifest).context("serialising manifest")?; + json.push('\n'); + fs::write(&path, json).with_context(|| format!("writing {}", path.display())) +} + +/// SHA-256 of a file's bytes, lowercase hex. +pub fn hash_file(path: &Path) -> Result { + let bytes = fs::read(path).with_context(|| format!("reading {}", path.display()))?; + Ok(sha256_hex(&bytes)) +} diff --git a/src/workspace.rs b/src/workspace.rs index 0785306..41c02f5 100644 --- a/src/workspace.rs +++ b/src/workspace.rs @@ -1,4 +1,5 @@ use crate::event::{self, Event, EventKind}; +use crate::proven::{self, ProvenRecord}; use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; use std::fs; @@ -121,10 +122,65 @@ impl Workspace { .with_from(from) .with_to(to) .with_note(note); + + // Maintain the proven content-hash manifest: record on entry to + // `proven`, drop on exit. This is what lets `stale_proven` detect a + // file edited after promotion. + if to == State::Proven { + let mut manifest = proven::load(&self.root)?; + manifest.entries.insert( + file_name.to_string(), + ProvenRecord { + sha256: proven::hash_file(&dst)?, + promoted_at: ev.ts.clone(), + }, + ); + proven::save(&self.root, &manifest)?; + } else if from == State::Proven { + let mut manifest = proven::load(&self.root)?; + if manifest.entries.remove(file_name).is_some() { + proven::save(&self.root, &manifest)?; + } + } + event::append(&self.root, &ev) .with_context(|| format!("logging {:?} for {}", kind, file_name))?; Ok(dst) } + + /// Proven files whose current content no longer matches the hash recorded + /// at promotion (or that were never recorded). These are the candidates + /// for `proven -> inbox` invalidation. + pub fn stale_proven(&self) -> Result> { + let manifest = proven::load(&self.root)?; + let mut out = Vec::new(); + for path in self.list(State::Proven)? { + let Some(name) = path.file_name().and_then(|s| s.to_str()) else { + continue; + }; + let current = proven::hash_file(&path)?; + let reason = match manifest.entries.get(name) { + None => Some("no recorded hash"), + Some(rec) if rec.sha256 != current => Some("content changed since promotion"), + Some(_) => None, + }; + if let Some(reason) = reason { + out.push(StaleEntry { + file: name.to_string(), + reason, + }); + } + } + out.sort_by(|a, b| a.file.cmp(&b.file)); + Ok(out) + } +} + +/// A `proven` file flagged stale by [`Workspace::stale_proven`]. +#[derive(Clone, Debug)] +pub struct StaleEntry { + pub file: String, + pub reason: &'static str, } /// The event kind for a transition, or `None` if the pair is not a legal diff --git a/tests/proven.rs b/tests/proven.rs new file mode 100644 index 0000000..72191f3 --- /dev/null +++ b/tests/proven.rs @@ -0,0 +1,77 @@ +//! Proven-state content hashing and staleness detection. + +use arghda_core::{proven, State, Workspace}; +use std::fs; + +#[test] +fn promote_records_hash_and_stale_detects_a_later_edit() { + let tmp = tempfile::tempdir().unwrap(); + let ws = Workspace::init(tmp.path()).unwrap(); + fs::write( + ws.state_dir(State::Working).join("Foo.agda"), + "module Foo where\n", + ) + .unwrap(); + + ws.transition("Foo.agda", State::Working, State::Proven, None) + .unwrap(); + + // The promotion recorded a hash, and nothing is stale yet. + let manifest = proven::load(tmp.path()).unwrap(); + assert!(manifest.entries.contains_key("Foo.agda")); + assert_eq!(manifest.entries["Foo.agda"].sha256.len(), 64); + assert!(ws.stale_proven().unwrap().is_empty()); + + // Edit the proven file in place -> it is now stale. + fs::write( + ws.state_dir(State::Proven).join("Foo.agda"), + "module Foo where\nx = Set\n", + ) + .unwrap(); + let stale = ws.stale_proven().unwrap(); + assert_eq!(stale.len(), 1); + assert_eq!(stale[0].file, "Foo.agda"); + assert_eq!(stale[0].reason, "content changed since promotion"); +} + +#[test] +fn invalidate_returns_to_inbox_and_drops_the_hash() { + let tmp = tempfile::tempdir().unwrap(); + let ws = Workspace::init(tmp.path()).unwrap(); + fs::write( + ws.state_dir(State::Working).join("Bar.agda"), + "module Bar where\n", + ) + .unwrap(); + ws.transition("Bar.agda", State::Working, State::Proven, None) + .unwrap(); + + // proven -> inbox invalidation. + ws.transition( + "Bar.agda", + State::Proven, + State::Inbox, + Some("upstream changed".into()), + ) + .unwrap(); + + assert_eq!(ws.state_of("Bar.agda"), Some(State::Inbox)); + let manifest = proven::load(tmp.path()).unwrap(); + assert!(!manifest.entries.contains_key("Bar.agda")); +} + +#[test] +fn proven_file_without_a_record_is_stale() { + let tmp = tempfile::tempdir().unwrap(); + let ws = Workspace::init(tmp.path()).unwrap(); + // Drop a file straight into proven/ without going through promote, so no + // hash was recorded — the audit case of "how did this get here?". + fs::write( + ws.state_dir(State::Proven).join("Loose.agda"), + "module Loose where\n", + ) + .unwrap(); + let stale = ws.stale_proven().unwrap(); + assert_eq!(stale.len(), 1); + assert_eq!(stale[0].reason, "no recorded hash"); +}