Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .machine_readable/6a2/STATE.a2ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ milestones = [
{ name = "State machine (claim/promote/reject/requeue/invalidate) + events.jsonl", completion = 100 },
{ name = "`check` command (Agda typecheck + lint verdict)", completion = 100 },
{ name = "Multi-root reachability (union of discovered CI roots)", completion = 100 },
{ name = "Content-hash invalidation of proven (file-level; +imports deferred)", completion = 75 },
{ name = "Remaining lint rules (unpinned-headline, unused-import)", completion = 0 },
{ name = "Content-hash invalidation of proven", completion = 0 },
{ name = "Groove service manifest (/.well-known/groove)", completion = 0 },
{ name = "RSR retrofit (.machine_readable, Justfile, .well-known)", completion = 60 },
]
Expand All @@ -38,8 +38,8 @@ milestones = [

[critical-next-actions]
actions = [
"Add unpinned-headline + unused-import lint rules",
"Content-hash proven files (SHA-256 of file + imports) for auto-invalidation",
"Add unpinned-headline (needs config + headline regex) + unused-import (agda-unused) lint rules",
"Extend content-hash to cover transitive imports (needs include-root tracking)",
"Serve /.well-known/groove for PanLL discovery",
"Scaffold arghda-studio (AffineScript visual layer consuming the dag JSON)",
]
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ All notable changes to arghda-core are documented here. The format follows
(warn).
- RSR scaffolding: `.machine_readable/6a2/` artefacts, `0-AI-MANIFEST.a2ml`,
`Justfile`, `.well-known/`, and community-health files.
- Content-hash invalidation of `proven`: promotion records a SHA-256 of the
file in `.arghda/hashes.json`; the `stale` command reports proven files
edited since promotion, and `stale --invalidate` moves them back to inbox
(the `proven -> inbox` invalidation). Dependency-free SHA-256, pinned
against the NIST test vectors.

### Notes

Expand Down
123 changes: 123 additions & 0 deletions src/hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
//! Dependency-free SHA-256 (FIPS 180-4).
//!
//! Used for `proven`-state content hashing — detecting when a file changes
//! after it was promoted, so the `proven -> inbox` invalidation in
//! `arghda-spec.adoc` can fire. This is change-detection, not a security
//! boundary, and the crate's ethos is lightweight, so we keep it hermetic
//! (no `sha2` dependency tree) and pin correctness against the canonical
//! NIST vectors in the tests below.

const H0: [u32; 8] = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
];

const K: [u32; 64] = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
];

/// Lowercase hex SHA-256 digest of `data`.
pub fn sha256_hex(data: &[u8]) -> String {
// Pre-processing: append 0x80, pad with zeros to 56 mod 64, append the
// 64-bit big-endian bit length.
let mut msg = data.to_vec();
let bit_len = (data.len() as u64).wrapping_mul(8);
msg.push(0x80);
while msg.len() % 64 != 56 {
msg.push(0);
}
msg.extend_from_slice(&bit_len.to_be_bytes());

let mut h = H0;
for chunk in msg.chunks_exact(64) {
let mut w = [0u32; 64];
for (i, word) in chunk.chunks_exact(4).take(16).enumerate() {
w[i] = u32::from_be_bytes([word[0], word[1], word[2], word[3]]);
}
for i in 16..64 {
let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
w[i] = w[i - 16]
.wrapping_add(s0)
.wrapping_add(w[i - 7])
.wrapping_add(s1);
}

let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut hh] = h;
for (&ki, &wi) in K.iter().zip(w.iter()) {
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
let ch = (e & f) ^ ((!e) & g);
let t1 = hh
.wrapping_add(s1)
.wrapping_add(ch)
.wrapping_add(ki)
.wrapping_add(wi);
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
let maj = (a & b) ^ (a & c) ^ (b & c);
let t2 = s0.wrapping_add(maj);
hh = g;
g = f;
f = e;
e = d.wrapping_add(t1);
d = c;
c = b;
b = a;
a = t1.wrapping_add(t2);
}
for (slot, v) in h.iter_mut().zip([a, b, c, d, e, f, g, hh]) {
*slot = slot.wrapping_add(v);
}
}

let mut out = String::with_capacity(64);
for word in h {
out.push_str(&format!("{word:08x}"));
}
out
}

#[cfg(test)]
mod tests {
use super::*;

// Canonical FIPS 180-4 / NIST test vectors.
#[test]
fn empty_string() {
assert_eq!(
sha256_hex(b""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}

#[test]
fn abc() {
assert_eq!(
sha256_hex(b"abc"),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}

#[test]
fn hello_world() {
assert_eq!(
sha256_hex(b"hello world"),
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
);
}

#[test]
fn multi_block() {
// 56 bytes forces a second padding block (length straddles the boundary).
let input = "a".repeat(1000);
assert_eq!(
sha256_hex(input.as_bytes()),
"41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3"
);
}
}
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ pub mod dag;
pub mod diagnostic;
pub mod event;
pub mod graph;
pub mod hash;
pub mod lint;
pub mod proven;
pub mod timestamp;
pub mod watcher;
pub mod workspace;
Expand All @@ -19,4 +21,4 @@ pub use diagnostic::{Diagnostic, LintReport, Severity};
pub use event::{Event, EventKind};
pub use graph::{build as build_graph, ImportGraph};
pub use lint::{default_rules, run_lints, LintRule};
pub use workspace::{State, Workspace};
pub use workspace::{StaleEntry, State, Workspace};
35 changes: 35 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ enum Cmd {
Invalidate { workspace: PathBuf, file: String },
/// Print the workspace event log.
Events { workspace: PathBuf },
/// List proven files whose content changed since promotion (stale).
Stale {
workspace: PathBuf,
/// Move stale files back to inbox (proven -> inbox).
#[arg(long)]
invalidate: bool,
},
/// Watch `inbox/` and `working/` in a workspace; print events.
Watch { workspace: PathBuf },
}
Expand Down Expand Up @@ -105,6 +112,10 @@ fn main() -> Result<()> {
transition(&workspace, &file, State::Proven, State::Inbox)?
}
Cmd::Events { workspace } => events(&workspace)?,
Cmd::Stale {
workspace,
invalidate,
} => stale(&workspace, invalidate)?,
Cmd::Watch { workspace } => watch(&workspace)?,
}
Ok(())
Expand Down Expand Up @@ -290,6 +301,30 @@ fn events(workspace: &Path) -> Result<()> {
Ok(())
}

fn stale(workspace: &Path, invalidate: bool) -> Result<()> {
let ws = Workspace::open(workspace)?;
let stale = ws.stale_proven()?;
if stale.is_empty() {
println!("(no stale proven files)");
return Ok(());
}
for s in &stale {
println!("stale: {} ({})", s.file, s.reason);
}
if invalidate {
for s in &stale {
ws.transition(
&s.file,
State::Proven,
State::Inbox,
Some(format!("auto-invalidated: {}", s.reason)),
)?;
println!("invalidated: {} (proven -> inbox)", s.file);
}
}
Ok(())
}

fn watch(workspace_path: &Path) -> Result<()> {
let ws = Workspace::open(workspace_path)?;
let inbox = ws.state_dir(State::Inbox);
Expand Down
66 changes: 66 additions & 0 deletions src/proven.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//! The `proven`-state content-hash manifest (`.arghda/hashes.json`).
//!
//! When a file is promoted to `proven`, its content hash is recorded here.
//! [`crate::Workspace::stale_proven`] recomputes and compares, so a `proven`
//! file that was edited (or never recorded) after promotion is surfaced and
//! can be sent back to `inbox` — the `proven -> inbox` invalidation from
//! `docs/arghda-spec.adoc`.
//!
//! v1 hashes the file content only. Hashing a file *plus its transitive
//! imports* (the spec's full form) needs the workspace to know the source
//! tree's include root, which the flat triage layout does not yet carry;
//! that is a documented follow-on.

use crate::hash::sha256_hex;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};

pub const HASHES_FILE: &str = "hashes.json";

/// What was recorded when a file entered `proven`.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ProvenRecord {
pub sha256: String,
pub promoted_at: String,
}

/// The whole manifest: basename -> record.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct ProvenManifest {
#[serde(default)]
pub entries: BTreeMap<String, ProvenRecord>,
}

fn manifest_path(ws_root: &Path) -> PathBuf {
ws_root.join(".arghda").join(HASHES_FILE)
}

/// Load the manifest, or an empty one if it does not exist yet.
pub fn load(ws_root: &Path) -> Result<ProvenManifest> {
let path = manifest_path(ws_root);
if !path.is_file() {
return Ok(ProvenManifest::default());
}
let contents =
fs::read_to_string(&path).with_context(|| format!("reading {}", path.display()))?;
serde_json::from_str(&contents).with_context(|| format!("parsing {}", path.display()))
}

/// Write the manifest, creating `.arghda/` if needed.
pub fn save(ws_root: &Path, manifest: &ProvenManifest) -> Result<()> {
let meta = ws_root.join(".arghda");
fs::create_dir_all(&meta).with_context(|| format!("creating {}", meta.display()))?;
let path = manifest_path(ws_root);
let mut json = serde_json::to_string_pretty(manifest).context("serialising manifest")?;
json.push('\n');
fs::write(&path, json).with_context(|| format!("writing {}", path.display()))
}

/// SHA-256 of a file's bytes, lowercase hex.
pub fn hash_file(path: &Path) -> Result<String> {
let bytes = fs::read(path).with_context(|| format!("reading {}", path.display()))?;
Ok(sha256_hex(&bytes))
}
56 changes: 56 additions & 0 deletions src/workspace.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::event::{self, Event, EventKind};
use crate::proven::{self, ProvenRecord};
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::fs;
Expand Down Expand Up @@ -121,10 +122,65 @@ impl Workspace {
.with_from(from)
.with_to(to)
.with_note(note);

// Maintain the proven content-hash manifest: record on entry to
// `proven`, drop on exit. This is what lets `stale_proven` detect a
// file edited after promotion.
if to == State::Proven {
let mut manifest = proven::load(&self.root)?;
manifest.entries.insert(
file_name.to_string(),
ProvenRecord {
sha256: proven::hash_file(&dst)?,
promoted_at: ev.ts.clone(),
},
);
proven::save(&self.root, &manifest)?;
} else if from == State::Proven {
let mut manifest = proven::load(&self.root)?;
if manifest.entries.remove(file_name).is_some() {
proven::save(&self.root, &manifest)?;
}
}

event::append(&self.root, &ev)
.with_context(|| format!("logging {:?} for {}", kind, file_name))?;
Ok(dst)
}

/// Proven files whose current content no longer matches the hash recorded
/// at promotion (or that were never recorded). These are the candidates
/// for `proven -> inbox` invalidation.
pub fn stale_proven(&self) -> Result<Vec<StaleEntry>> {
let manifest = proven::load(&self.root)?;
let mut out = Vec::new();
for path in self.list(State::Proven)? {
let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
continue;
};
let current = proven::hash_file(&path)?;
let reason = match manifest.entries.get(name) {
None => Some("no recorded hash"),
Some(rec) if rec.sha256 != current => Some("content changed since promotion"),
Some(_) => None,
};
if let Some(reason) = reason {
out.push(StaleEntry {
file: name.to_string(),
reason,
});
}
}
out.sort_by(|a, b| a.file.cmp(&b.file));
Ok(out)
}
}

/// A `proven` file flagged stale by [`Workspace::stale_proven`].
#[derive(Clone, Debug)]
pub struct StaleEntry {
pub file: String,
pub reason: &'static str,
}

/// The event kind for a transition, or `None` if the pair is not a legal
Expand Down
Loading
Loading