diff --git a/.gitignore b/.gitignore index fd5fec7..2dcc89f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Rust build artefacts target/ +# Haskell / cabal build artefacts (from building agda-unused locally) +dist-newstyle/ + # Editor / tooling backups *.bak *~ diff --git a/CHANGELOG.md b/CHANGELOG.md index df7dab1..9421d1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,12 @@ All notable changes to arghda-core are documented here. The format follows `^[a-z][A-Za-z0-9-]*$`, per the spec). Detects top-level (column-0) signatures only, which gives the export-only filter for free; tolerant of multi-line `using` lists; self-skips when no `Smoke.agda` is in scope. +- `unused-import` (warn): re-emits the findings of the external `agda-unused` + tool (spec §Linter rules). Opt-in behind `scan --unused`; runs `agda-unused` + per file in local mode with `LC_ALL=C.UTF-8`, parses its `--json` output, + and re-emits each finding as an `unused-import` warning attributed to the + file. Degrades gracefully (with a note) when `agda-unused` is not on `PATH`, + mirroring how `check` tolerates a missing `agda`. - RSR scaffolding: `.machine_readable/6a2/` artefacts, `0-AI-MANIFEST.a2ml`, `Justfile`, `.well-known/`, and community-health files. - Content-hash invalidation of `proven`: promotion records a SHA-256 of the diff --git a/README.md b/README.md index 052275a..2524928 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,11 @@ record. its default `^[a-z][A-Za-z0-9-]*$` is deliberately broad, so operators narrow it to their own headline-naming convention. Self-skips when no `Smoke.agda` is in scope (e.g. a single-file `check`) + - `unused-import` (warn) — re-emits the findings of the external + [`agda-unused`](https://github.com/msuperdock/agda-unused) tool. Opt-in + behind `scan --unused` (it runs `agda-unused` per file in local mode and + re-checks each file); skipped with a note if the binary is not on `PATH`. + Invoked with `LC_ALL=C.UTF-8` so it can read UTF-8 Agda sources - Workspace state machine — transitions are file moves, each logged to `.arghda/events.jsonl` (`claim`, `promote`, `reject`, `requeue`, `invalidate`) @@ -51,12 +56,12 @@ plus standalone scratch files. `scan` also flags the files deliberately outside the `--safe --without-K` kernel cone (`Fidelity.agda`, the cubical island, the postulated shadow). -Not yet: `unused-import` (shells out to `agda-unused`); the DAG `headlines` -field (the extractor now exists for `unpinned-headline`, but the per-node -`headlines` array in the DAG schema is still unpopulated); persisting the -headline pattern in `.arghda/config.toml` (currently a CLI flag); the Groove -service manifest. (`missing-without-k` is subsumed by `missing-safe-pragma`, -which already reports a missing `--without-K`.) +Not yet: the DAG `headlines` field (the extractor now exists for +`unpinned-headline`, but the per-node `headlines` array in the DAG schema is +still unpopulated); persisting the headline pattern in `.arghda/config.toml` +(currently a CLI flag); the Groove service manifest. (`missing-without-k` is +subsumed by `missing-safe-pragma`, which already reports a missing +`--without-K`.) ## Build diff --git a/src/lib.rs b/src/lib.rs index f1b0fc2..1fea9d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ pub mod hash; pub mod lint; pub mod proven; pub mod timestamp; +pub mod unused; pub mod watcher; pub mod workspace; diff --git a/src/main.rs b/src/main.rs index 91b63a7..234f9c9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ use anyhow::{Context, Result}; use arghda_core::lint::LintContext; use arghda_core::{ - build_dag, check_file, default_rules, event, graph, rules_with_config, run_lints, watcher, - LintRule, RuleConfig, State, Workspace, + build_dag, check_file, default_rules, event, graph, rules_with_config, run_lints, unused, + watcher, LintRule, RuleConfig, State, Workspace, }; use clap::{Parser, Subcommand}; use std::path::{Path, PathBuf}; @@ -39,6 +39,11 @@ enum Cmd { /// (`unpinned-headline` rule). Defaults to the spec pattern. #[arg(long)] headline_pattern: Option, + /// Also run the external `agda-unused` analyser and re-emit its + /// findings as `unused-import` warnings (requires `agda-unused` on + /// PATH; skipped with a note if absent). + #[arg(long)] + unused: bool, /// Emit the report as JSON instead of human-readable text. #[arg(long)] json: bool, @@ -101,8 +106,9 @@ fn main() -> Result<()> { path, entry, headline_pattern, + unused, json, - } => scan(&path, &entry, headline_pattern.as_deref(), json)?, + } => scan(&path, &entry, headline_pattern.as_deref(), unused, json)?, Cmd::Check { file, include_root, @@ -142,6 +148,7 @@ fn scan( include_root: &Path, entry: &[PathBuf], headline_pattern: Option<&str>, + unused: bool, json: bool, ) -> Result<()> { let (roots, rules) = resolve_roots_and_rules(include_root, entry, headline_pattern)?; @@ -151,9 +158,6 @@ fn scan( }; let mut reports = Vec::new(); - let mut hard_blocks = 0usize; - let mut warns = 0usize; - for entry in WalkDir::new(include_root) .into_iter() .filter_map(|e| e.ok()) @@ -164,17 +168,45 @@ fn scan( } let report = run_lints(path, &ctx, &rules).with_context(|| format!("linting {}", path.display()))?; - hard_blocks += report.hard_blocks().count(); - warns += report.warns().count(); reports.push(report); } + let files_scanned = reports.len(); + + // Optional unused-code pass via the external `agda-unused` (per file, + // local mode). Opt-in because it needs the external tool and re-checks + // each file. Findings attach to that file's report. + if unused { + let mut available = true; + let mut saw_error = false; + for report in &mut reports { + let check = unused::check_file(&report.file, include_root)?; + if !check.available { + available = false; + break; + } + saw_error |= check.kind.as_deref() == Some("error"); + for d in check.diagnostics { + report.push(d); + } + } + if !available { + eprintln!("note: `agda-unused` not found on PATH; skipping unused-import findings"); + } else if saw_error { + eprintln!( + "note: agda-unused could not analyse some files; unused-import findings may be incomplete" + ); + } + } + + let hard_blocks: usize = reports.iter().map(|r| r.hard_blocks().count()).sum(); + let warns: usize = reports.iter().map(|r| r.warns().count()).sum(); if json { let payload = serde_json::json!({ "version": "0.1", "include_root": include_root, "entry_modules": roots, - "files_scanned": reports.len(), + "files_scanned": files_scanned, "hard_blocks": hard_blocks, "warns": warns, "reports": reports, diff --git a/src/unused.rs b/src/unused.rs new file mode 100644 index 0000000..a47f87b --- /dev/null +++ b/src/unused.rs @@ -0,0 +1,241 @@ +//! Shelling out to `agda-unused` to find unused code (the `unused-import` rule). +//! +//! Like [`crate::agda`], ArghDA does not analyse for unused code itself — it +//! asks the external `agda-unused` tool and re-emits its findings as ArghDA +//! diagnostics (`docs/arghda-spec.adoc` §Linter rules). If `agda-unused` is +//! not on `PATH` it degrades gracefully (`available: false`) so the rest of +//! the engine still works; the rule is opt-in behind `scan --unused`. +//! +//! We invoke it **per file in local mode** (`agda-unused --json -i +//! `). Local mode flags imports/code unused *within* that file (the +//! rule's namesake); `--global` instead treats the given root's imports as +//! project roots and would not flag them. `agda-unused` reads source in the +//! process locale, so we force `LC_ALL=C.UTF-8` — Agda sources are UTF-8 and +//! without it the tool aborts on the first multi-byte character. +//! +//! `--json` output is a wrapper object +//! `{ "type": "none" | "unused" | "error", "message": "…" }`; when `type` is +//! `"unused"` the findings live in `message` as location/description pairs: +//! +//! ```text +//! /abs/path/File.agda:line,col-col +//! unused import ‘Name’ +//! ``` +//! +//! re-emitted as `unused-import` `warn` diagnostics. + +use crate::diagnostic::{Diagnostic, Severity}; +use anyhow::{Context, Result}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; +use std::process::Command; + +/// The rule name re-emitted findings carry. +pub const RULE_NAME: &str = "unused-import"; + +/// Result of an `agda-unused` pass over a single file. +#[derive(Clone, Debug, Default)] +pub struct UnusedCheck { + /// Whether the `agda-unused` binary was found and executed. + pub available: bool, + /// The `type` field of the JSON wrapper ("none" / "unused" / "error"), + /// so the caller can surface the case where `agda-unused` could not + /// analyse a file. + pub kind: Option, + /// Findings located in the checked file, as `unused-import` warnings. + pub diagnostics: Vec, +} + +/// The `--json` wrapper `agda-unused` emits. +#[derive(Debug, Deserialize)] +struct UnusedJson { + #[serde(rename = "type")] + kind: String, + #[serde(default)] + message: String, +} + +/// Run `agda-unused --json -i ` (local mode, UTF-8) and +/// return the unused-code findings located in `file`. `available: false` iff +/// the binary is absent. +pub fn check_file(file: &Path, include_root: &Path) -> Result { + let Some(parsed) = run_one(file, include_root)? else { + return Ok(UnusedCheck::default()); + }; + let mut check = UnusedCheck { + available: true, + kind: Some(parsed.kind.clone()), + diagnostics: Vec::new(), + }; + if parsed.kind == "unused" { + // Local mode can also visit dependencies; keep only findings that + // belong to `file` so each is attributed exactly once (when we invoke + // agda-unused on its own file). agda-unused reports absolute paths, so + // compare canonicalised. + let target = std::fs::canonicalize(file).ok(); + for (path, line, desc) in parse_findings(&parsed.message) { + if target.is_some() && std::fs::canonicalize(&path).ok() == target { + check.diagnostics.push(Diagnostic { + rule: RULE_NAME.to_string(), + severity: Severity::Warn, + file: file.to_path_buf(), + message: if desc.is_empty() { + "unused code".to_string() + } else { + desc + }, + line, + }); + } + } + } + Ok(check) +} + +/// Invoke `agda-unused` once on `file`. `Ok(None)` iff the binary is absent. +/// A run that produces no JSON object (e.g. the tool itself errored) is +/// reported as an `"error"` wrapper rather than failing the whole scan. +fn run_one(file: &Path, include_root: &Path) -> Result> { + let output = Command::new("agda-unused") + .arg(file) + .arg("--json") + .arg("-i") + .arg(include_root) + .env("LC_ALL", "C.UTF-8") + .output(); + + match output { + Ok(out) => { + let mut combined = String::from_utf8_lossy(&out.stdout).into_owned(); + combined.push_str(&String::from_utf8_lossy(&out.stderr)); + match extract_json(&combined) { + Some(json) => { + let parsed: UnusedJson = serde_json::from_str(json).with_context(|| { + format!("parsing agda-unused JSON for {}", file.display()) + })?; + Ok(Some(parsed)) + } + // No JSON: agda-unused crashed/errored on this file. Surface as + // an error wrapper so one bad file does not abort the scan. + None => Ok(Some(UnusedJson { + kind: "error".to_string(), + message: combined.trim().to_string(), + })), + } + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(e.into()), + } +} + +/// Extract the outermost `{ … }` JSON object from `s` (tolerant of any +/// non-JSON text printed around it). +fn extract_json(s: &str) -> Option<&str> { + let start = s.find('{')?; + let end = s.rfind('}')?; + (end >= start).then(|| &s[start..=end]) +} + +/// Parse the findings out of an `agda-unused` `message`. Each finding is a +/// location line (`…/File.agda:line,col-col`) followed by an indented +/// description line (`unused import ‘Name’`). +fn parse_findings(message: &str) -> Vec<(PathBuf, Option, String)> { + let lines: Vec<&str> = message.lines().collect(); + let mut out = Vec::new(); + let mut i = 0; + while i < lines.len() { + if let Some((file, line)) = parse_location(lines[i]) { + // The description is the next non-empty line that is not itself a + // location (the indented `’`). + let mut desc = String::new(); + if i + 1 < lines.len() { + let next = lines[i + 1].trim(); + if !next.is_empty() && parse_location(lines[i + 1]).is_none() { + desc = next.to_string(); + i += 1; + } + } + out.push((file, line, desc)); + } + i += 1; + } + out +} + +/// Parse a location line `…/File.agda:line,col-col` into the file path and the +/// 1-based line number. Tolerant: the trailing column/range part is optional. +fn parse_location(line: &str) -> Option<(PathBuf, Option)> { + let trimmed = line.trim(); + // Anchor on the `.agda:` boundary so colons elsewhere don't confuse it. + let marker = ".agda:"; + let idx = trimmed.rfind(marker)?; + let file = PathBuf::from(&trimmed[..idx + ".agda".len()]); + let rest = &trimmed[idx + marker.len()..]; + // `rest` looks like `5,1-19` or `5,1-9,4`; take the leading integer. + let lineno = rest + .split(|c: char| !c.is_ascii_digit()) + .find(|s| !s.is_empty()) + .and_then(|s| s.parse::().ok()); + Some((file, lineno)) +} + +#[cfg(test)] +mod tests { + use super::*; + + // The wrapper agda-unused actually emits (captured live, local mode, on a + // fixture with an unused import). Note the Unicode quotes ‘ ’. + const REAL_UNUSED: &str = + r#"{"message":"/p/Main.agda:5,1-19\n unused import ‘Helper’","type":"unused"}"#; + const REAL_NONE: &str = r#"{"message":"No unused code.","type":"none"}"#; + + #[test] + fn default_outcome_is_unavailable_and_empty() { + let c = UnusedCheck::default(); + assert!(!c.available); + assert!(c.kind.is_none()); + assert!(c.diagnostics.is_empty()); + } + + #[test] + fn extract_json_finds_object_amid_noise() { + assert_eq!( + extract_json("warn: blah {\"type\":\"none\"} trailing"), + Some("{\"type\":\"none\"}") + ); + assert_eq!(extract_json("no object here"), None); + } + + #[test] + fn deserialises_real_wrappers() { + let unused: UnusedJson = serde_json::from_str(extract_json(REAL_UNUSED).unwrap()).unwrap(); + assert_eq!(unused.kind, "unused"); + assert!(unused.message.contains("unused import")); + let none: UnusedJson = serde_json::from_str(extract_json(REAL_NONE).unwrap()).unwrap(); + assert_eq!(none.kind, "none"); + } + + #[test] + fn parse_location_reads_path_and_line() { + let (f, l) = parse_location("/home/u/proj/Foo/Bar.agda:12,3-8").unwrap(); + assert_eq!(f, PathBuf::from("/home/u/proj/Foo/Bar.agda")); + assert_eq!(l, Some(12)); + } + + #[test] + fn parse_location_tolerates_line_range_and_rejects_non_location() { + assert_eq!(parse_location("X.agda:7,1-9,4").unwrap().1, Some(7)); + assert!(parse_location(" unused import ‘Foo’").is_none()); + } + + #[test] + fn parse_findings_pairs_real_location_with_description() { + // The `message` payload of REAL_UNUSED, with the \n unescaped. + let msg = "/p/Main.agda:5,1-19\n unused import ‘Helper’"; + let found = parse_findings(msg); + assert_eq!(found.len(), 1); + assert_eq!(found[0].0, PathBuf::from("/p/Main.agda")); + assert_eq!(found[0].1, Some(5)); + assert_eq!(found[0].2, "unused import ‘Helper’"); + } +} diff --git a/tests/fixtures/unused/Helper.agda b/tests/fixtures/unused/Helper.agda new file mode 100644 index 0000000..04dbc6c --- /dev/null +++ b/tests/fixtures/unused/Helper.agda @@ -0,0 +1,6 @@ +{-# OPTIONS --safe --without-K #-} +module Helper where + +-- A trivial, dependency-free definition. +helper : Set₁ +helper = Set diff --git a/tests/fixtures/unused/Main.agda b/tests/fixtures/unused/Main.agda new file mode 100644 index 0000000..ada4b01 --- /dev/null +++ b/tests/fixtures/unused/Main.agda @@ -0,0 +1,6 @@ +{-# OPTIONS --safe --without-K #-} +module Main where + +-- Imports Helper but uses nothing from it: a genuine unused import, which is +-- exactly what `agda-unused` reports (re-emitted as `unused-import`). +open import Helper