From a98c678efd0261d6bae1e76221ac491089d97e5f Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Thu, 21 May 2026 15:45:13 +0300 Subject: [PATCH 1/3] fix(supply-chain): ignore 10 no-fix CVEs that failed nightly 2026-05-21 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 2026-05-21 nightly (run 26210539455) failed on `pip-audit` because the OSV / GHSA databases published 9 new torch advisories and 1 markdown advisory overnight, none of which have an upstream fix. Without an ignore, `tools/check_pip_audit.py` fails closed on UNKNOWN severity (pip-audit's JSON does not serialise OSV severity) and the nightly stays red, masking real future breakage. Triage (issue #58): - torch PYSEC-2025-191..197, PYSEC-2025-210, PYSEC-2026-139: all require a LOCAL attacker passing malformed inputs to specific torch APIs (`jit.script`, `lstm_cell`, `cuda.memory.*`, `pt2` loader, etc.). ForgeLM is a local-CLI tool; an attacker with that access is already inside the trust boundary. None of the affected APIs are called with attacker-controlled arguments in `forgelm/`. - markdown PYSEC-2026-89: OSV affected-range misclassification — the advisory description states the fix shipped in markdown==3.8.1 but the range record has no `fixed` event, so every version is flagged. Installed 3.10.2 is post-fix. Each ignore is documented inline in `.github/workflows/nightly.yml` with the surface, the threat-model carve-out, and the condition for re-evaluating (per `docs/reference/supply_chain_security.md`). Also drops the stale "Issue #37 tracks the active set" reference — #37 was a closed nightly-failure issue, not a tracker. Refs: #58 Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/nightly.yml | 75 +++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d1c687d..148cb53 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -280,9 +280,14 @@ jobs: # pip-audit returns non-zero on any finding; capture the JSON # and apply our own severity policy via tools/check_pip_audit.py. # - # Vulnerability ignores (each ignore must carry a written - # justification + remove-after condition; review at every - # release cycle). Issue #37 tracks the active set. + # Vulnerability ignores: each entry below carries a written + # justification + remove-after condition and is re-validated + # at every release cycle (per + # docs/reference/supply_chain_security.md). When an ignore + # is added, link the triaging issue in the commit message so + # `git log -S CVE-…` is enough to recover the rationale. + # + # ----- transformers ----- # # CVE-2026-1839 — transformers, fix: 5.0.0rc3 (release candidate). # ForgeLM's pyproject pins ``transformers>=4.38.0,<5.0.0`` and @@ -293,10 +298,74 @@ jobs: # transformers ships a 4.x point release with the fix or # (b) ForgeLM cuts a tracked major-version-bump cycle that # raises the upper bound. Re-evaluate at each release. + # + # ----- torch (added 2026-05-21, triage in issue #58) ----- + # + # All nine torch CVEs below share the same threat-model carve- + # out: each requires a LOCAL attacker passing malformed inputs + # to a specific torch API. ForgeLM is a local-CLI training + # tool — an attacker with that level of access is already + # inside the trust boundary. None are reachable through + # ForgeLM's external surfaces (CLI flags, config YAML, + # dataset files); the affected APIs are not called with + # attacker-controlled arguments anywhere in forgelm/. + # Verified against advisory text in pypa/advisory-database + # on 2026-05-21. Re-evaluate at every release; if any CVE + # is re-scored to a remote attack vector, drop the ignore + # and bump the pinned torch version. + # + # PYSEC-2025-191 (CVE-2025-2953) — torch.mkldnn_max_pool2d + # local DoS; advisory note records vendor doubt about + # the finding's real existence. + # PYSEC-2025-192 (CVE-2025-2998) — torch.nn.utils.rnn.pad_packed_sequence + # local memory corruption. + # PYSEC-2025-193 (CVE-2025-2999) — torch.nn.utils.rnn.unpack_sequence + # local memory corruption. + # PYSEC-2025-194 (CVE-2025-3000) — torch.jit.script + # local memory corruption; ForgeLM does not expose + # jit.script to user inputs. + # PYSEC-2025-195 (CVE-2025-3001) — torch.lstm_cell + # local memory corruption; cell-level API not on + # ForgeLM's training path. + # PYSEC-2025-196 (CVE-2025-3121) — torch.jit.jit_module_from_flatbuffer + # local memory corruption; flatbuffer JIT loader not + # used by ForgeLM. + # PYSEC-2025-197 (CVE-2025-3136) — torch.cuda.memory.caching_allocator_delete + # local memory corruption; user does not control the call. + # PYSEC-2025-210 (CVE-2025-63396) — torch.profiler.profile + # local DoS via missing profiler.stop() finalisation; + # the profiler is not in ForgeLM's production paths. + # PYSEC-2026-139 (CVE-2026-4538) — pt2 Loading Handler + # local deserialization, same trust model as + # torch.load(pickle): operators are documented as + # required to only load trusted checkpoints. Fix PR + # pytorch/pytorch#176791 not merged at time of ignore. + # + # ----- markdown (added 2026-05-21, triage in issue #58) ----- + # + # PYSEC-2026-89 (CVE-2025-69534) — markdown (transitive via + # tensorboard). OSV affected-range misclassification: + # the advisory description states "fixed in version + # 3.8.1" but the OSV range record has no `fixed` event, + # so every released version is flagged as vulnerable. + # Installed 3.10.2 is post-fix. Verified against + # pypa/advisory-database/vulns/markdown/PYSEC-2026-89.yaml + # on 2026-05-21. Drop this ignore when the OSV record + # gains a `fixed: 3.8.1` event. pip-audit \ --format json \ --output /tmp/pip-audit.json \ --ignore-vuln CVE-2026-1839 \ + --ignore-vuln PYSEC-2025-191 \ + --ignore-vuln PYSEC-2025-192 \ + --ignore-vuln PYSEC-2025-193 \ + --ignore-vuln PYSEC-2025-194 \ + --ignore-vuln PYSEC-2025-195 \ + --ignore-vuln PYSEC-2025-196 \ + --ignore-vuln PYSEC-2025-197 \ + --ignore-vuln PYSEC-2025-210 \ + --ignore-vuln PYSEC-2026-139 \ + --ignore-vuln PYSEC-2026-89 \ || true python3 tools/check_pip_audit.py /tmp/pip-audit.json From f93d8544dac9aeb3527ac7d7c0f7908bc220c293 Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Thu, 21 May 2026 16:33:51 +0300 Subject: [PATCH 2/3] refactor(supply-chain): move pip-audit ignores to checked-in YAML file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR #59 review comment: extract the growing --ignore-vuln list from .github/workflows/nightly.yml into tools/pip_audit_ignores.yaml, consumed by tools/check_pip_audit.py via a new opt-in --ignores flag. New CVE suppressions no longer require editing the workflow. Design — opt-in, not opt-out ============================ docs/reference/supply_chain_security.md explicitly tells deployers that ForgeLM does NOT ship a default project-level ignore list. The new flag preserves that contract: without --ignores PATH, check_pip_audit.py applies no suppressions, so a deployer running pip install forgelm[security] python3 tools/check_pip_audit.py /tmp/pip-audit.json still sees the full unfiltered severity gate. The project's own nightly opts in explicitly: python3 tools/check_pip_audit.py /tmp/pip-audit.json \ --ignores tools/pip_audit_ignores.yaml Schema enforcement ================== Every entry in pip_audit_ignores.yaml must carry six required fields (id, package, reason, threat_model, verified_at, reevaluate_after); missing any one fails the gate with an ::error:: that names the gap. This blocks the "stick a bare id: in and forget" pattern — every suppression now carries a written justification + re-evaluate trigger as required by docs/reference/supply_chain_security.md. Matching uses {id} ∪ aliases on both sides, so an ignore listing the CVE alias still matches a pip-audit finding emitted under its PYSEC primary id (and vice versa). Each match is logged as a ::notice:: annotation so the run summary surfaces the audit trail; suppressions don't disappear into the workflow log silently. Migrations ========== All 11 ignores in nightly.yml (CVE-2026-1839 transformers + 9 torch PYSEC-2025-191..197/210 + PYSEC-2026-139 + 1 markdown PYSEC-2026-89) moved verbatim into the YAML file with the justifications expanded into structured fields. The workflow's pip-audit step shrinks from ~95 lines of inline comments + per-CVE --ignore-vuln args to a ~12-line block pointing at the YAML. Tests ===== Extends tests/test_check_pip_audit.py with 9 new cases covering: - suppression by primary id and by alias - no false-positive match on unrelated CVEs - schema validation (each required field individually named on failure) - missing / invalid YAML files fail closed - default (no --ignores) is unchanged — deployer-safe - the checked-in tools/pip_audit_ignores.yaml itself passes schema validation (regression guard so the workflow never breaks on its own ignore file) Docs ==== Updates docs/reference/supply_chain_security.md and the TR mirror plus docs/usermanuals/{en,tr}/operations/supply-chain.md with the new deployer workflow (write your own ignores.yaml, pass via --ignores). Bilingual parity verified by tools/check_bilingual_parity.py --strict. Refs: #58, #59 (review comment) Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/nightly.yml | 98 +------- docs/reference/supply_chain_security-tr.md | 30 ++- docs/reference/supply_chain_security.md | 31 ++- .../usermanuals/en/operations/supply-chain.md | 20 +- .../usermanuals/tr/operations/supply-chain.md | 20 +- tests/test_check_pip_audit.py | 209 ++++++++++++++++++ tools/check_pip_audit.py | 205 +++++++++++++++-- tools/pip_audit_ignores.yaml | 175 +++++++++++++++ 8 files changed, 672 insertions(+), 116 deletions(-) create mode 100644 tools/pip_audit_ignores.yaml diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 148cb53..acfd274 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -277,97 +277,23 @@ jobs: - name: pip-audit (transitive CVE scan) run: | - # pip-audit returns non-zero on any finding; capture the JSON - # and apply our own severity policy via tools/check_pip_audit.py. + # pip-audit captures every finding; tools/check_pip_audit.py + # applies ForgeLM's severity policy (HIGH → fail, + # MEDIUM → ::warning::, UNKNOWN → fail closed) on the JSON. # - # Vulnerability ignores: each entry below carries a written - # justification + remove-after condition and is re-validated - # at every release cycle (per - # docs/reference/supply_chain_security.md). When an ignore - # is added, link the triaging issue in the commit message so - # `git log -S CVE-…` is enough to recover the rationale. - # - # ----- transformers ----- - # - # CVE-2026-1839 — transformers, fix: 5.0.0rc3 (release candidate). - # ForgeLM's pyproject pins ``transformers>=4.38.0,<5.0.0`` and - # the 5.x branch is a major version bump that breaks downstream - # callers (TRL adapter signature changes + tokenizer-config - # API drift). The CVE has no 4.x backport at the time of - # ignore (verified 2026-05-10). Stop-gap until either (a) - # transformers ships a 4.x point release with the fix or - # (b) ForgeLM cuts a tracked major-version-bump cycle that - # raises the upper bound. Re-evaluate at each release. - # - # ----- torch (added 2026-05-21, triage in issue #58) ----- - # - # All nine torch CVEs below share the same threat-model carve- - # out: each requires a LOCAL attacker passing malformed inputs - # to a specific torch API. ForgeLM is a local-CLI training - # tool — an attacker with that level of access is already - # inside the trust boundary. None are reachable through - # ForgeLM's external surfaces (CLI flags, config YAML, - # dataset files); the affected APIs are not called with - # attacker-controlled arguments anywhere in forgelm/. - # Verified against advisory text in pypa/advisory-database - # on 2026-05-21. Re-evaluate at every release; if any CVE - # is re-scored to a remote attack vector, drop the ignore - # and bump the pinned torch version. - # - # PYSEC-2025-191 (CVE-2025-2953) — torch.mkldnn_max_pool2d - # local DoS; advisory note records vendor doubt about - # the finding's real existence. - # PYSEC-2025-192 (CVE-2025-2998) — torch.nn.utils.rnn.pad_packed_sequence - # local memory corruption. - # PYSEC-2025-193 (CVE-2025-2999) — torch.nn.utils.rnn.unpack_sequence - # local memory corruption. - # PYSEC-2025-194 (CVE-2025-3000) — torch.jit.script - # local memory corruption; ForgeLM does not expose - # jit.script to user inputs. - # PYSEC-2025-195 (CVE-2025-3001) — torch.lstm_cell - # local memory corruption; cell-level API not on - # ForgeLM's training path. - # PYSEC-2025-196 (CVE-2025-3121) — torch.jit.jit_module_from_flatbuffer - # local memory corruption; flatbuffer JIT loader not - # used by ForgeLM. - # PYSEC-2025-197 (CVE-2025-3136) — torch.cuda.memory.caching_allocator_delete - # local memory corruption; user does not control the call. - # PYSEC-2025-210 (CVE-2025-63396) — torch.profiler.profile - # local DoS via missing profiler.stop() finalisation; - # the profiler is not in ForgeLM's production paths. - # PYSEC-2026-139 (CVE-2026-4538) — pt2 Loading Handler - # local deserialization, same trust model as - # torch.load(pickle): operators are documented as - # required to only load trusted checkpoints. Fix PR - # pytorch/pytorch#176791 not merged at time of ignore. - # - # ----- markdown (added 2026-05-21, triage in issue #58) ----- - # - # PYSEC-2026-89 (CVE-2025-69534) — markdown (transitive via - # tensorboard). OSV affected-range misclassification: - # the advisory description states "fixed in version - # 3.8.1" but the OSV range record has no `fixed` event, - # so every released version is flagged as vulnerable. - # Installed 3.10.2 is post-fix. Verified against - # pypa/advisory-database/vulns/markdown/PYSEC-2026-89.yaml - # on 2026-05-21. Drop this ignore when the OSV record - # gains a `fixed: 3.8.1` event. + # Project-internal CVE suppressions live in + # tools/pip_audit_ignores.yaml; --ignores is opt-in so + # standalone deployer invocations of check_pip_audit.py + # inherit nothing (per docs/reference/supply_chain_security.md). + # Adding an entry to that file is a security-policy change; + # see the file header for the required schema (id, package, + # reason, threat_model, verified_at, reevaluate_after). pip-audit \ --format json \ --output /tmp/pip-audit.json \ - --ignore-vuln CVE-2026-1839 \ - --ignore-vuln PYSEC-2025-191 \ - --ignore-vuln PYSEC-2025-192 \ - --ignore-vuln PYSEC-2025-193 \ - --ignore-vuln PYSEC-2025-194 \ - --ignore-vuln PYSEC-2025-195 \ - --ignore-vuln PYSEC-2025-196 \ - --ignore-vuln PYSEC-2025-197 \ - --ignore-vuln PYSEC-2025-210 \ - --ignore-vuln PYSEC-2026-139 \ - --ignore-vuln PYSEC-2026-89 \ || true - python3 tools/check_pip_audit.py /tmp/pip-audit.json + python3 tools/check_pip_audit.py /tmp/pip-audit.json \ + --ignores tools/pip_audit_ignores.yaml - name: bandit (static security analysis) run: | diff --git a/docs/reference/supply_chain_security-tr.md b/docs/reference/supply_chain_security-tr.md index b0578c1..13223e9 100644 --- a/docs/reference/supply_chain_security-tr.md +++ b/docs/reference/supply_chain_security-tr.md @@ -109,11 +109,31 @@ python3 tools/check_pip_audit.py /tmp/pip-audit.json ### Suppression (kasıtlı CVE kabulü) -Bir CVE kabul edildiyse ama henüz düzeltilemiyorsa (upstream release -beklemede, vb.), operatörün risk acceptance log'unda dokümante edip -nightly run için bastırmak amacıyla `pip-audit --ignore-vuln ` -kullanın. ForgeLM proje-seviyesinde bir ignore listesi göndermez — -her suppression operatör-tarafında ve quarterly-reviewed olmalıdır. +ForgeLM varsayılan bir proje-seviyesi ignore listesi göndermez — +`python3 tools/check_pip_audit.py /tmp/pip-audit.json`'u standalone +çalıştıran bir deployer hiçbir suppression miras almaz ve tam +filtresiz gate'i görür. Deployer-tarafı kabulleri kendi risk +acceptance log'unuzda dokümante edin ve opt-in flag ile geçin: + +```bash +python3 tools/check_pip_audit.py /tmp/pip-audit.json \ + --ignores path/to/your_ignores.yaml +``` + +YAML dosyasındaki her giriş `id`, `package`, `reason`, `threat_model`, +`verified_at`, ve `reevaluate_after` taşımalıdır (opsiyonel: `aliases`, +`references`); herhangi bir zorunlu alanın eksikliği gate'i kapalı +fail ettirir, böylece dokümante edilmemiş bir suppression sessizce +inemez. Her eşleşme run summary'de `::notice::` annotation olarak +loglanır; audit trail görünür kalır. + +ForgeLM'in kendi nightly'si proje-içi triage için check-in edilmiş +bir [`tools/pip_audit_ignores.yaml`](../../tools/pip_audit_ignores.yaml) +taşır (şu an transformers `CVE-2026-1839` artı 2026-05-21 döngüsünden +gelen dokuz no-fix torch advisory'si ve bir OSV-yanlış-sınıflandırılmış +markdown kaydı). Bu dosya yalnız projenin kendi workflow'u tarafından +`--ignores` ile tüketilir; deployer'lar miras almaz. Her release +cycle'da gözden geçirilir. ## bandit (static security analysis) diff --git a/docs/reference/supply_chain_security.md b/docs/reference/supply_chain_security.md index c80f484..4f9c429 100644 --- a/docs/reference/supply_chain_security.md +++ b/docs/reference/supply_chain_security.md @@ -109,12 +109,31 @@ python3 tools/check_pip_audit.py /tmp/pip-audit.json ### Suppression (intentional CVE acceptance) -If a CVE is acknowledged but not yet fixable (upstream release -pending, etc.), document it in the deployer's risk acceptance log -and use `pip-audit --ignore-vuln ` to suppress it for the -nightly run. ForgeLM does NOT ship a project-level -ignore list — every suppression should be deployer-side and -quarterly-reviewed. +ForgeLM does NOT ship a default project-level ignore list — a +deployer running `python3 tools/check_pip_audit.py /tmp/pip-audit.json` +standalone inherits no suppressions and sees the full unfiltered +gate. Document any deployer-side acceptance in your own risk +acceptance log and pass it via the opt-in flag: + +```bash +python3 tools/check_pip_audit.py /tmp/pip-audit.json \ + --ignores path/to/your_ignores.yaml +``` + +Each entry in the YAML file must carry `id`, `package`, `reason`, +`threat_model`, `verified_at`, and `reevaluate_after` (optional: +`aliases`, `references`); missing any required field fails the gate +closed, so an undocumented suppression cannot land silently. Every +match is logged as a `::notice::` annotation in the run summary so +the audit trail stays visible. + +ForgeLM's own nightly does carry a checked-in +[`tools/pip_audit_ignores.yaml`](../../tools/pip_audit_ignores.yaml) for +project-internal triage (currently transformers `CVE-2026-1839` plus +nine no-fix torch advisories and one OSV-misclassified markdown +record from the 2026-05-21 cycle). That file is consumed only by the +project's own workflow via `--ignores`; deployers do not inherit it. +Review at every release cycle. ## bandit (static security analysis) diff --git a/docs/usermanuals/en/operations/supply-chain.md b/docs/usermanuals/en/operations/supply-chain.md index 4576e02..25342f4 100644 --- a/docs/usermanuals/en/operations/supply-chain.md +++ b/docs/usermanuals/en/operations/supply-chain.md @@ -60,13 +60,27 @@ python3 tools/check_bandit.py /tmp/bandit.json ## When a CVE is acknowledged but not yet fixable -If upstream has not yet released the fix and you've documented the CVE in your deployer-side risk acceptance log: +If upstream has not yet released the fix and you've documented the CVE in your deployer-side risk acceptance log, write a YAML ignore file and pass it to `check_pip_audit.py` via the opt-in `--ignores` flag: + +```yaml +# your_ignores.yaml +ignores: + - id: CVE-2026-XXXX + package: some-pkg + reason: brief one-line summary + threat_model: why your deployment's surface does not expose the affected API + verified_at: '2026-05-21' + reevaluate_after: each quarter, or when upstream ships the fix +``` ```bash -pip-audit --ignore-vuln --strict --format json --output /tmp/pip-audit.json +pip-audit --strict --format json --output /tmp/pip-audit.json +python3 tools/check_pip_audit.py /tmp/pip-audit.json --ignores your_ignores.yaml ``` -ForgeLM does **not** ship a project-level ignore list — every suppression is deployer-side and should be quarterly-reviewed. +Missing any required field (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) fails the gate closed, so undocumented suppressions cannot land silently. Every match is logged as a `::notice::` annotation in the run summary. + +ForgeLM does **not** ship a default project-level ignore list. The project's own nightly carries a checked-in `tools/pip_audit_ignores.yaml` (for project-internal triage), but `check_pip_audit.py` reads no ignores at all without `--ignores`, so deployers running the tool standalone inherit nothing. Every deployer-side suppression is documented in your own risk acceptance log and quarterly-reviewed. ## Where to read more diff --git a/docs/usermanuals/tr/operations/supply-chain.md b/docs/usermanuals/tr/operations/supply-chain.md index a929c7b..89c94dc 100644 --- a/docs/usermanuals/tr/operations/supply-chain.md +++ b/docs/usermanuals/tr/operations/supply-chain.md @@ -60,13 +60,27 @@ python3 tools/check_bandit.py /tmp/bandit.json ## Bir CVE kabul edildiğinde ama henüz düzeltilemediğinde -Upstream henüz düzeltmeyi yayınlamadıysa ve CVE'yi operatör-tarafı risk acceptance log'unuzda belgelediyseniz: +Upstream henüz düzeltmeyi yayınlamadıysa ve CVE'yi operatör-tarafı risk acceptance log'unuzda belgelediyseniz, bir YAML ignore dosyası yazıp `check_pip_audit.py`'ye opt-in `--ignores` flag'i üzerinden geçirin: + +```yaml +# your_ignores.yaml +ignores: + - id: CVE-2026-XXXX + package: some-pkg + reason: tek satırlık kısa özet + threat_model: deployment yüzeyinizin etkilenen API'yi neden açığa çıkarmadığı + verified_at: '2026-05-21' + reevaluate_after: her quarter, ya da upstream fix gönderdiğinde +``` ```bash -pip-audit --ignore-vuln --strict --format json --output /tmp/pip-audit.json +pip-audit --strict --format json --output /tmp/pip-audit.json +python3 tools/check_pip_audit.py /tmp/pip-audit.json --ignores your_ignores.yaml ``` -ForgeLM proje-seviyesi bir ignore listesi yayınlamaz **— her suppression operatör-tarafı olmalı ve quarterly-review yapılmalıdır**. +Zorunlu alanlardan birinin (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) eksikliği gate'i kapalı fail ettirir; böylece dokümante edilmemiş bir suppression sessizce inemez. Her eşleşme run summary'de `::notice::` annotation olarak loglanır. + +ForgeLM **varsayılan proje-seviyesi bir ignore listesi yayınlamaz**. Projenin kendi nightly'si check-in edilmiş bir `tools/pip_audit_ignores.yaml` taşır (proje-içi triage için), ama `check_pip_audit.py` `--ignores` olmadan hiçbir ignore okumaz; bu yüzden tool'u standalone çalıştıran deployer'lar hiçbir şey miras almaz. Her operatör-tarafı suppression kendi risk acceptance log'unuzda dokümante edilir ve quarterly-review yapılır. ## Daha fazla okumak için nereye diff --git a/tests/test_check_pip_audit.py b/tests/test_check_pip_audit.py index 45163e1..b5b5cfc 100644 --- a/tests/test_check_pip_audit.py +++ b/tests/test_check_pip_audit.py @@ -170,3 +170,212 @@ def test_high_takes_precedence_over_unknown(tool, tmp_path, capsys): assert tool.main([str(_TOOL_PATH), str(p)]) == 1 captured = capsys.readouterr() assert "high/critical" in captured.out + + +# --------------------------------------------------------------------------- +# Opt-in --ignores YAML support. +# +# Behaviour contract: +# - Default (no --ignores) is unchanged → deployers running the tool +# standalone get the unfiltered severity gate (existing tests above +# still pass). +# - --ignores PATH suppresses findings whose {id} ∪ aliases intersects +# an ignore entry; each suppression is logged as ::notice:: so the +# run summary still surfaces the audit trail. +# - Schema is enforced: each entry must carry id/package/reason/ +# threat_model/verified_at/reevaluate_after — missing any field is a +# policy violation (an undocumented suppression) and fails the gate. +# --------------------------------------------------------------------------- + + +def _write_ignores(tmp_path: Path, body: str) -> Path: + p = tmp_path / "pip_audit_ignores.yaml" + p.write_text(body, encoding="utf-8") + return p + + +def _valid_ignore_entry( + *, + cve_id: str = "CVE-2026-9999", + aliases: list[str] | None = None, + package: str = "synthetic-pkg", +) -> str: + lines = [ + f" - id: {cve_id}", + f" package: {package}", + " reason: synthetic ignore for unit tests", + " threat_model: not reachable from any external surface", + " verified_at: '2026-05-21'", + " reevaluate_after: never (test fixture)", + ] + if aliases: + lines.insert(1, f" aliases: [{', '.join(aliases)}]") + return "\n".join(lines) + + +def test_ignores_suppresses_by_primary_id(tool, tmp_path, capsys): + """A finding whose `id` is listed is suppressed; gate exits 0.""" + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "synthetic-pkg", + "version": "1.0.0", + "vulns": [{"id": "CVE-2026-9999", "severity": "HIGH"}], + } + ] + }, + ) + ignores = _write_ignores(tmp_path, "ignores:\n" + _valid_ignore_entry()) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 0 + captured = capsys.readouterr() + assert "::notice::pip-audit suppressed" in captured.out + assert "CVE-2026-9999" in captured.out + # HIGH header must NOT appear — the finding was suppressed before bucketing. + assert "high/critical" not in captured.out + + +def test_ignores_suppresses_by_alias(tool, tmp_path, capsys): + """Pip-audit emits `id: PYSEC-…` with `aliases: [CVE-…]`; an + ignore file referencing either form must match — the schema spec + advertises `aliases:` precisely to bridge that lookup.""" + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "torch", + "version": "2.12.0", + "vulns": [{"id": "PYSEC-2025-191", "aliases": ["CVE-2025-2953"]}], + } + ] + }, + ) + # Ignore file references only the CVE alias, not the PYSEC primary id. + ignores = _write_ignores( + tmp_path, + "ignores:\n" + _valid_ignore_entry(cve_id="CVE-2025-2953", aliases=["PYSEC-2025-191"], package="torch"), + ) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 0 + captured = capsys.readouterr() + assert "PYSEC-2025-191" in captured.out + assert "::notice::" in captured.out + + +def test_ignores_does_not_match_unrelated_findings(tool, tmp_path, capsys): + """An ignore entry must not suppress a different CVE in the report. + + Catches a future refactor where the matcher accidentally became + permissive (e.g., substring match instead of exact-id intersection). + """ + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "synthetic-pkg", + "version": "1.0.0", + "vulns": [{"id": "CVE-2026-0001", "severity": "HIGH"}], + } + ] + }, + ) + ignores = _write_ignores( + tmp_path, + "ignores:\n" + _valid_ignore_entry(cve_id="CVE-2026-9999"), + ) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "::notice::" not in captured.out + assert "CVE-2026-0001" in captured.out + + +def test_ignores_schema_missing_required_field_fails(tool, tmp_path, capsys): + """An entry missing any required field must fail the gate. + + Otherwise an operator could short-circuit the policy by adding a + bare `id: …` line without the written justification + re-evaluate + condition the standard requires. + """ + audit = _write_audit(tmp_path, {"dependencies": []}) + ignores = _write_ignores( + tmp_path, + "ignores:\n - id: CVE-2026-0001\n package: synthetic-pkg\n", + ) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + err = captured.err + assert "missing required field" in err + # The field names that were missing should be named so the operator + # can fix the file without re-reading the schema. + for required in ("reason", "threat_model", "verified_at", "reevaluate_after"): + assert required in err + + +def test_ignores_missing_file_fails(tool, tmp_path, capsys): + """A nonexistent --ignores path is a hard error. + + Falling back to "no ignores" would silently turn project-side + suppressions off, which could surface a flood of accepted-risk + CVEs as fresh failures and obscure real regressions. + """ + audit = _write_audit(tmp_path, {"dependencies": []}) + missing = tmp_path / "no-such-ignores.yaml" + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(missing)]) == 1 + captured = capsys.readouterr() + assert "ignore file not readable" in captured.err + + +def test_ignores_invalid_yaml_fails(tool, tmp_path, capsys): + audit = _write_audit(tmp_path, {"dependencies": []}) + bad = tmp_path / "bad.yaml" + bad.write_text("ignores:\n - id: [unclosed", encoding="utf-8") + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(bad)]) == 1 + captured = capsys.readouterr() + assert "not valid YAML" in captured.err + + +def test_default_no_ignores_is_unfiltered_for_deployers(tool, tmp_path, capsys): + """Without --ignores the gate must run the full severity policy. + + Documented contract in supply_chain_security.md: deployers + invoking `python3 tools/check_pip_audit.py /tmp/pip-audit.json` + standalone inherit none of the project-internal suppressions. + """ + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "synthetic-pkg", + "version": "1.0.0", + "vulns": [{"id": "CVE-2026-9999", "severity": "HIGH"}], + } + ] + }, + ) + # No --ignores: even though this CVE id IS in the project's real + # ignore file, a deployer invocation must still fail closed. + assert tool.main([str(_TOOL_PATH), str(audit)]) == 1 + captured = capsys.readouterr() + assert "::notice::" not in captured.out + assert "CVE-2026-9999" in captured.out + + +def test_project_ignore_file_passes_schema_validation(tool): + """The checked-in tools/pip_audit_ignores.yaml itself must satisfy + the schema so the workflow never breaks on its own ignore file. + + This is a structural smoke test, not a semantic one — we only + confirm the loader returns a non-None dict. Bumping a CVE entry's + semantic content (reason, dates, etc.) is reviewed by humans on PR. + """ + project_file = _PROJECT_ROOT / "tools" / "pip_audit_ignores.yaml" + assert project_file.exists(), "project ignore file must be checked in" + loaded = tool._load_ignores(project_file) + assert loaded is not None, "checked-in ignore file failed schema validation" + # Every entry indexed under at least its primary id. + assert "CVE-2026-1839" in loaded, ( + "the historical transformers ignore must still be present; if you removed it intentionally update this test" + ) diff --git a/tools/check_pip_audit.py b/tools/check_pip_audit.py index 99e6407..b32e950 100644 --- a/tools/check_pip_audit.py +++ b/tools/check_pip_audit.py @@ -11,6 +11,16 @@ Used in ``.github/workflows/nightly.yml`` after the ``pip-audit`` step. +Optional opt-in ignore list via ``--ignores PATH``: every finding whose +``{id} ∪ aliases`` intersects an entry in the YAML file is suppressed +(emitting a ``::notice::`` annotation that names the id and the +``reason`` field) before severity bucketing. Deployers running this +script standalone WITHOUT ``--ignores`` inherit no suppressions, in +keeping with the deployer-side risk-acceptance policy documented in +``docs/reference/supply_chain_security.md``. ForgeLM's own nightly +points at ``tools/pip_audit_ignores.yaml``; see that file's header for +the schema. + Exit codes (per ``tools/`` contract — NOT the public 0/1/2/3/4 surface that ``forgelm/`` honours): @@ -20,13 +30,19 @@ UNKNOWN-severity finding (F-PR29-A7-11: pip-audit's JSON omits severity, so UNKNOWN means we cannot prove a vulnerability is low-impact; failing closed avoids silent drop), OR the input file is - missing / unparseable. + missing / unparseable, OR the ignore file (when supplied) is missing, + unparseable, or schema-invalid. Usage:: + # Standalone (no project-side suppressions — recommended for deployers): pip-audit --format json --output /tmp/pip-audit.json || true python3 tools/check_pip_audit.py /tmp/pip-audit.json + # Project nightly (consumes the checked-in ignore file): + python3 tools/check_pip_audit.py /tmp/pip-audit.json \\ + --ignores tools/pip_audit_ignores.yaml + Standards-side note: this helper exists to satisfy the ``|| true`` carve-out in ``docs/standards/testing.md`` (CI bypass discipline). The bash ``pip-audit --format json > out.json || true`` step that calls into us is @@ -40,6 +56,7 @@ from __future__ import annotations +import argparse import json import sys from pathlib import Path @@ -131,14 +148,142 @@ def _load_report(report_path: Path) -> Optional[dict[str, Any]]: return None -def _bucket_findings(report: dict[str, Any]) -> tuple[list[str], list[str], list[str]]: - """Walk every (name, vuln) pair and return ``(high, medium, unknown)`` - lists of pre-formatted finding lines. LOW tier is silent — the raw - JSON remains in build artefacts for post-mortem if needed.""" +# Required keys per entry in the ignore file. Missing any of them is a +# policy violation (an undocumented suppression), so the gate fails closed +# rather than silently accepting CVEs with no recorded justification. +_IGNORE_REQUIRED_KEYS: frozenset[str] = frozenset( + {"id", "package", "reason", "threat_model", "verified_at", "reevaluate_after"} +) + + +def _load_ignores(ignores_path: Path) -> Optional[dict[str, dict[str, Any]]]: + """Read + validate ``--ignores`` YAML; return ``{id_or_alias: entry}``. + + On any failure (file missing, YAML invalid, schema invalid) emits a + ``::error::`` annotation and returns ``None`` so the caller fails + closed — an unreadable ignore file must not be silently treated as + "no ignores", or every CVE would suddenly fail an otherwise green + gate without anyone noticing the YAML drifted. + """ + try: + import yaml # type: ignore[import-untyped] + except ImportError as exc: # pragma: no cover - PyYAML is a runtime dep + print( + f"::error::--ignores requires PyYAML (`pip install pyyaml`): {exc}", + file=sys.stderr, + ) + return None + + try: + raw = ignores_path.read_text(encoding="utf-8") + except OSError as exc: + print( + f"::error::pip-audit ignore file not readable at {ignores_path}: {exc}", + file=sys.stderr, + ) + return None + + try: + loaded = yaml.safe_load(raw) + except yaml.YAMLError as exc: + print( + f"::error::pip-audit ignore file at {ignores_path} is not valid YAML: {exc}", + file=sys.stderr, + ) + return None + + if not isinstance(loaded, dict): + print( + f"::error::pip-audit ignore file at {ignores_path} must be a mapping with key 'ignores'.", + file=sys.stderr, + ) + return None + entries = loaded.get("ignores") + if not isinstance(entries, list): + print( + f"::error::pip-audit ignore file at {ignores_path} must define a top-level 'ignores:' list.", + file=sys.stderr, + ) + return None + + by_id: dict[str, dict[str, Any]] = {} + for index, entry in enumerate(entries): + if not isinstance(entry, dict): + print( + f"::error::pip-audit ignore entry #{index} in {ignores_path} must be a mapping.", + file=sys.stderr, + ) + return None + missing = _IGNORE_REQUIRED_KEYS - entry.keys() + if missing: + print( + f"::error::pip-audit ignore entry #{index} (id={entry.get('id')!r}) " + f"in {ignores_path} is missing required field(s): " + f"{', '.join(sorted(missing))}.", + file=sys.stderr, + ) + return None + primary_id = entry["id"] + if not isinstance(primary_id, str): + print( + f"::error::pip-audit ignore entry #{index} in {ignores_path} 'id' must be a string.", + file=sys.stderr, + ) + return None + # Index by every alias so cross-DB lookups (PYSEC ↔ CVE ↔ GHSA) + # match without the workflow having to know which form pip-audit + # emits this week. Last write wins on duplicates, but we surface + # the dup so the file stays clean. + ids = {primary_id, *(entry.get("aliases") or [])} + for ident in ids: + if ident in by_id and by_id[ident] is not entry: + print( + f"::warning::pip-audit ignore id {ident!r} appears under " + f"both {by_id[ident].get('id')!r} and {primary_id!r} " + f"in {ignores_path}; later entry wins." + ) + by_id[ident] = entry + return by_id + + +def _vuln_identifiers(vuln: dict[str, Any]) -> set[str]: + """Return the union ``{id} ∪ aliases`` for ignore-match purposes.""" + ids: set[str] = set() + primary = vuln.get("id") + if isinstance(primary, str): + ids.add(primary) + aliases = vuln.get("aliases") + if isinstance(aliases, list): + ids.update(a for a in aliases if isinstance(a, str)) + return ids + + +def _bucket_findings( + report: dict[str, Any], + ignores: Optional[dict[str, dict[str, Any]]] = None, +) -> tuple[list[str], list[str], list[str], list[str]]: + """Walk every (name, vuln) pair and return + ``(high, medium, unknown, suppressed)`` lists of pre-formatted lines. + + ``suppressed`` carries findings whose ``{id} ∪ aliases`` intersected + the ignore set; the caller surfaces each one as a ``::notice::`` + annotation so suppressions stay audit-visible. LOW tier is silent + — the raw JSON remains in build artefacts for post-mortem if needed. + """ high: list[str] = [] medium: list[str] = [] unknown: list[str] = [] + suppressed: list[str] = [] for name, vuln in _iter_findings(report): + if ignores: + matched = _vuln_identifiers(vuln) & ignores.keys() + if matched: + # Pick the entry by any matching id — they all point at + # the same dict thanks to alias indexing. + entry = ignores[next(iter(matched))] + vid = vuln.get("id") or "" + suppressed.append(f"{name} {vid} — reason: {entry.get('reason')}") + continue severity = _vuln_severity(vuln) line = _format_finding(name, vuln, severity) if severity in _HIGH_TIERS: @@ -147,20 +292,54 @@ def _bucket_findings(report: dict[str, Any]) -> tuple[list[str], list[str], list medium.append(line) elif severity == "UNKNOWN": unknown.append(line) - return high, medium, unknown + return high, medium, unknown, suppressed + + +def _parse_argv(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog=argv[0], + description="Apply ForgeLM's severity gate to a pip-audit JSON report.", + ) + parser.add_argument( + "report", + type=Path, + help="path to pip-audit JSON report (output of `pip-audit --format json`).", + ) + parser.add_argument( + "--ignores", + type=Path, + default=None, + help=( + "optional YAML file listing CVE ids to suppress (each must " + "carry id/package/reason/threat_model/verified_at/" + "reevaluate_after). Without this flag no suppressions are " + "applied — deployers running this tool standalone get the " + "full unfiltered gate." + ), + ) + return parser.parse_args(argv[1:]) def main(argv: list[str]) -> int: - if len(argv) != 2: - print(f"usage: {argv[0]} ", file=sys.stderr) - return 1 + args = _parse_argv(argv) - report_path = Path(argv[1]) - report = _load_report(report_path) + report = _load_report(args.report) if report is None: return 1 - high, medium, unknown = _bucket_findings(report) + ignores: Optional[dict[str, dict[str, Any]]] = None + if args.ignores is not None: + ignores = _load_ignores(args.ignores) + if ignores is None: + return 1 + + high, medium, unknown, suppressed = _bucket_findings(report, ignores) + + for line in suppressed: + # ::notice:: keeps every suppression in the run log so a reviewer + # can spot-check that the ignore file hasn't accidentally hidden + # a freshly-rescored CVE. + print(f"::notice::pip-audit suppressed (project ignore list): {line}") for line in medium: # GitHub Actions annotation; surfaces in the run summary without @@ -186,7 +365,7 @@ def main(argv: list[str]) -> int: print(f"::error::pip-audit {line}") print( f"::error::pip-audit found {len(unknown)} finding(s) without parseable " - f"severity in {report_path}; pip-audit's JSON does not serialise OSV " + f"severity in {args.report}; pip-audit's JSON does not serialise OSV " f"severity, so each must be reviewed manually (failing closed)." ) return 1 diff --git a/tools/pip_audit_ignores.yaml b/tools/pip_audit_ignores.yaml new file mode 100644 index 0000000..a3d8bbf --- /dev/null +++ b/tools/pip_audit_ignores.yaml @@ -0,0 +1,175 @@ +# pip-audit ignore list for ForgeLM's own nightly compatibility workflow. +# +# THIS FILE IS NOT A PROJECT RECOMMENDATION FOR DEPLOYERS. It captures +# ForgeLM's internal risk acceptance for *its own* CI nightly only. +# Deployers running `python3 tools/check_pip_audit.py /tmp/pip-audit.json` +# without `--ignores PATH` inherit nothing from this file. See +# `docs/reference/supply_chain_security.md` "Suppression" for the policy: +# every deployer-side suppression is documented in the deployer's own +# risk acceptance log, not here. +# +# Schema: +# id — primary OSV/PYSEC/CVE id used by pip-audit. +# aliases — (optional) list of alternative ids for cross-DB +# lookup; pip-audit matches against {id} | aliases. +# package — pypi package name (for human review). +# reason — one-line summary of why we accept this CVE. +# threat_model — multi-line explanation of why ForgeLM's external +# surface (CLI flags, config YAML, dataset files, +# etc.) does not expose the affected API to +# attacker control. +# verified_at — ISO date (YYYY-MM-DD) the advisory was last +# read against this entry. +# reevaluate_after — concrete trigger that drops the ignore. +# references — (optional) list of advisory URLs. +# +# `tools/check_pip_audit.py --ignores tools/pip_audit_ignores.yaml` +# validates that every required field is present and exits 1 if not. +# Adding an entry here is a security-policy change; review per +# `docs/reference/supply_chain_security.md`. + +ignores: + # ----- transformers ----- + + - id: CVE-2026-1839 + package: transformers + reason: >- + Fix only in transformers 5.0.0rc3 (release candidate); pyproject + pins <5.0.0 due to TRL adapter + tokenizer-config breaking + changes. + threat_model: >- + ForgeLM's pyproject pins ``transformers>=4.38.0,<5.0.0``. The + 5.x branch is a major version bump that breaks downstream + callers (TRL adapter signature changes + tokenizer-config API + drift). No 4.x backport at time of ignore. + verified_at: '2026-05-10' + reevaluate_after: >- + Each release cycle, or when transformers ships a 4.x point + release with the fix, or when ForgeLM cuts a tracked + major-version-bump cycle that raises the upper bound. + + # ----- torch (added 2026-05-21, triage in issue #58) ----- + # + # All nine torch CVEs below share the same threat-model carve-out: + # each requires a LOCAL attacker passing malformed inputs to a + # specific torch API. ForgeLM is a local-CLI training tool — an + # attacker with that level of access is already inside the trust + # boundary. None of the affected APIs are called with attacker- + # controlled arguments anywhere in `forgelm/`. + + - id: PYSEC-2025-191 + aliases: [CVE-2025-2953] + package: torch + reason: Local DoS in `torch.mkldnn_max_pool2d`; advisory records vendor doubt about the finding's real existence. + threat_model: >- + Requires local attacker passing malformed inputs. ForgeLM + does not call `torch.mkldnn_max_pool2d` with attacker- + controlled arguments. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-192 + aliases: [CVE-2025-2998] + package: torch + reason: Local memory corruption in `torch.nn.utils.rnn.pad_packed_sequence`. + threat_model: >- + Requires local attacker passing malformed inputs. ForgeLM's + sequence handling does not pass attacker-controlled tensors + to this API. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-193 + aliases: [CVE-2025-2999] + package: torch + reason: Local memory corruption in `torch.nn.utils.rnn.unpack_sequence`. + threat_model: >- + Requires local attacker passing malformed inputs. ForgeLM's + sequence handling does not pass attacker-controlled tensors + to this API. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-194 + aliases: [CVE-2025-3000] + package: torch + reason: Local memory corruption in `torch.jit.script`. + threat_model: >- + Requires local attacker passing malformed inputs. ForgeLM + does not expose `torch.jit.script` to user inputs. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-195 + aliases: [CVE-2025-3001] + package: torch + reason: Local memory corruption in `torch.lstm_cell`. + threat_model: >- + Requires local attacker passing malformed inputs. The + cell-level API is not on ForgeLM's training path. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-196 + aliases: [CVE-2025-3121] + package: torch + reason: Local memory corruption in `torch.jit.jit_module_from_flatbuffer`. + threat_model: >- + Requires local attacker passing malformed inputs. The + flatbuffer JIT loader is not used by ForgeLM. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-197 + aliases: [CVE-2025-3136] + package: torch + reason: Local memory corruption in `torch.cuda.memory.caching_allocator_delete` (`c10/cuda/CUDACachingAllocator.cpp`). + threat_model: >- + Requires local attacker passing malformed inputs. User does + not control the allocator deletion call from any ForgeLM + surface. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2025-210 + aliases: [CVE-2025-63396] + package: torch + reason: Local DoS in `torch.profiler.profile` via missing `profiler.stop()` finalisation hang. + threat_model: >- + Requires local control of the profiler lifecycle. The + profiler is not in ForgeLM's production paths. + verified_at: '2026-05-21' + reevaluate_after: Each release cycle, or when CVSS re-scores to a remote attack vector. + + - id: PYSEC-2026-139 + aliases: [CVE-2026-4538] + package: torch + reason: Local deserialization in pt2 Loading Handler; fix PR pytorch/pytorch#176791 not merged at time of ignore. + threat_model: >- + Same trust model as `torch.load(pickle)`: operators are + documented as required to only load trusted checkpoints. + Remote exploitation is out of scope for ForgeLM's local-CLI + surface. + verified_at: '2026-05-21' + reevaluate_after: When pytorch/pytorch#176791 (or equivalent fix) ships in a torch release within our pinned range. + references: + - https://github.com/pytorch/pytorch/pull/176791 + + # ----- markdown (added 2026-05-21, triage in issue #58) ----- + + - id: PYSEC-2026-89 + aliases: [CVE-2025-69534] + package: markdown + reason: OSV affected-range misclassification — advisory description states "fixed in version 3.8.1" but the OSV range record has no `fixed` event. + threat_model: >- + Installed markdown 3.10.2 is post-fix. The vulnerability + (DoS via `html.parser.HTMLParser` `AssertionError` on + malformed Markdown) is genuinely remote, but our installed + version contains the upstream fix. Verified against + `pypa/advisory-database/vulns/markdown/PYSEC-2026-89.yaml`. + verified_at: '2026-05-21' + reevaluate_after: >- + When the OSV record gains a `fixed: 3.8.1` event and pip-audit + stops flagging post-fix versions. + references: + - https://github.com/pypa/advisory-database/blob/main/vulns/markdown/PYSEC-2026-89.yaml From 2a28f8c6513d26a809844faebeac82611e7bd86a Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sat, 23 May 2026 01:17:55 +0300 Subject: [PATCH 3/3] fix(supply-chain): harden pip-audit ignore-file validation (PR #59 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on the pip_audit_ignores.yaml loader: - Validate `aliases` is a list of strings before unpacking it into the id index. Previously `aliases: CVE-2025-2953` (a bare string) would be iterated character-by-character, polluting the index with single-char "ids" and silently breaking matching; a non-iterable would crash. Now fails closed with a clear ::error::. - Validate value shape of required fields, not just presence: id, package, reason, threat_model, reevaluate_after must be non-empty strings, and verified_at must be a 'YYYY-MM-DD' string (quote it in YAML so it is not parsed as a datetime.date). An empty reason was a documented-justification gap the old presence-only check let through. Note: reevaluate_after is deliberately NOT date-validated — it is a free-text retirement condition ("Each release cycle, or when …"), so the review's suggested date-regex on it would have rejected every real entry. Date validation applies to verified_at only. - Hoist `ignore_ids = set(ignores)` once in _bucket_findings instead of intersecting against `ignores.keys()` per finding. - Doc grammar: "fails the gate closed" -> "causes the gate to fail closed" in both EN mirrors, and document the new malformed-value rejection in the reference + user-manual (EN + TR). Adds 7 tests: aliases-as-string, aliases-with-non-string-element, null-aliases-accepted, empty-required-string, malformed verified_at, unquoted-YAML-date rejection, and free-text reevaluate_after accepted. 23 tests pass. Refs: #59 (review) Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/reference/supply_chain_security-tr.md | 10 +- docs/reference/supply_chain_security.md | 10 +- .../usermanuals/en/operations/supply-chain.md | 2 +- .../usermanuals/tr/operations/supply-chain.md | 2 +- tests/test_check_pip_audit.py | 168 ++++++++++++++++++ tools/check_pip_audit.py | 72 ++++++-- 6 files changed, 242 insertions(+), 22 deletions(-) diff --git a/docs/reference/supply_chain_security-tr.md b/docs/reference/supply_chain_security-tr.md index 13223e9..e14c8c4 100644 --- a/docs/reference/supply_chain_security-tr.md +++ b/docs/reference/supply_chain_security-tr.md @@ -122,10 +122,12 @@ python3 tools/check_pip_audit.py /tmp/pip-audit.json \ YAML dosyasındaki her giriş `id`, `package`, `reason`, `threat_model`, `verified_at`, ve `reevaluate_after` taşımalıdır (opsiyonel: `aliases`, -`references`); herhangi bir zorunlu alanın eksikliği gate'i kapalı -fail ettirir, böylece dokümante edilmemiş bir suppression sessizce -inemez. Her eşleşme run summary'de `::notice::` annotation olarak -loglanır; audit trail görünür kalır. +`references`); herhangi bir zorunlu alanın eksikliği — ya da bir alanın +hatalı değer taşıması (boş string, `YYYY-MM-DD` olmayan bir +`verified_at`, ya da string listesi olmayan `aliases`) — gate'in +kapalı fail etmesine yol açar; böylece dokümante edilmemiş bir +suppression sessizce inemez. Her eşleşme run summary'de `::notice::` +annotation olarak loglanır; audit trail görünür kalır. ForgeLM'in kendi nightly'si proje-içi triage için check-in edilmiş bir [`tools/pip_audit_ignores.yaml`](../../tools/pip_audit_ignores.yaml) diff --git a/docs/reference/supply_chain_security.md b/docs/reference/supply_chain_security.md index 4f9c429..6feccd2 100644 --- a/docs/reference/supply_chain_security.md +++ b/docs/reference/supply_chain_security.md @@ -122,10 +122,12 @@ python3 tools/check_pip_audit.py /tmp/pip-audit.json \ Each entry in the YAML file must carry `id`, `package`, `reason`, `threat_model`, `verified_at`, and `reevaluate_after` (optional: -`aliases`, `references`); missing any required field fails the gate -closed, so an undocumented suppression cannot land silently. Every -match is logged as a `::notice::` annotation in the run summary so -the audit trail stays visible. +`aliases`, `references`); missing any required field — or giving one +a malformed value (an empty string, a `verified_at` that is not a +`YYYY-MM-DD` date, or `aliases` that is not a list of strings) — +causes the gate to fail closed, so an undocumented suppression cannot +land silently. Every match is logged as a `::notice::` annotation in +the run summary so the audit trail stays visible. ForgeLM's own nightly does carry a checked-in [`tools/pip_audit_ignores.yaml`](../../tools/pip_audit_ignores.yaml) for diff --git a/docs/usermanuals/en/operations/supply-chain.md b/docs/usermanuals/en/operations/supply-chain.md index 25342f4..e56487b 100644 --- a/docs/usermanuals/en/operations/supply-chain.md +++ b/docs/usermanuals/en/operations/supply-chain.md @@ -78,7 +78,7 @@ pip-audit --strict --format json --output /tmp/pip-audit.json python3 tools/check_pip_audit.py /tmp/pip-audit.json --ignores your_ignores.yaml ``` -Missing any required field (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) fails the gate closed, so undocumented suppressions cannot land silently. Every match is logged as a `::notice::` annotation in the run summary. +Missing any required field (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) — or giving one a malformed value (empty string, a non-`YYYY-MM-DD` `verified_at`, or an `aliases` that is not a list of strings) — causes the gate to fail closed, so undocumented suppressions cannot land silently. Every match is logged as a `::notice::` annotation in the run summary. ForgeLM does **not** ship a default project-level ignore list. The project's own nightly carries a checked-in `tools/pip_audit_ignores.yaml` (for project-internal triage), but `check_pip_audit.py` reads no ignores at all without `--ignores`, so deployers running the tool standalone inherit nothing. Every deployer-side suppression is documented in your own risk acceptance log and quarterly-reviewed. diff --git a/docs/usermanuals/tr/operations/supply-chain.md b/docs/usermanuals/tr/operations/supply-chain.md index 89c94dc..69a3033 100644 --- a/docs/usermanuals/tr/operations/supply-chain.md +++ b/docs/usermanuals/tr/operations/supply-chain.md @@ -78,7 +78,7 @@ pip-audit --strict --format json --output /tmp/pip-audit.json python3 tools/check_pip_audit.py /tmp/pip-audit.json --ignores your_ignores.yaml ``` -Zorunlu alanlardan birinin (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) eksikliği gate'i kapalı fail ettirir; böylece dokümante edilmemiş bir suppression sessizce inemez. Her eşleşme run summary'de `::notice::` annotation olarak loglanır. +Zorunlu alanlardan birinin (`id`, `package`, `reason`, `threat_model`, `verified_at`, `reevaluate_after`) eksikliği — ya da bir alanın hatalı değer taşıması (boş string, `YYYY-MM-DD` olmayan `verified_at`, ya da string listesi olmayan `aliases`) — gate'in kapalı fail etmesine yol açar; böylece dokümante edilmemiş bir suppression sessizce inemez. Her eşleşme run summary'de `::notice::` annotation olarak loglanır. ForgeLM **varsayılan proje-seviyesi bir ignore listesi yayınlamaz**. Projenin kendi nightly'si check-in edilmiş bir `tools/pip_audit_ignores.yaml` taşır (proje-içi triage için), ama `check_pip_audit.py` `--ignores` olmadan hiçbir ignore okumaz; bu yüzden tool'u standalone çalıştıran deployer'lar hiçbir şey miras almaz. Her operatör-tarafı suppression kendi risk acceptance log'unuzda dokümante edilir ve quarterly-review yapılır. diff --git a/tests/test_check_pip_audit.py b/tests/test_check_pip_audit.py index b5b5cfc..006df12 100644 --- a/tests/test_check_pip_audit.py +++ b/tests/test_check_pip_audit.py @@ -363,6 +363,174 @@ def test_default_no_ignores_is_unfiltered_for_deployers(tool, tmp_path, capsys): assert "CVE-2026-9999" in captured.out +def test_ignores_aliases_as_bare_string_fails(tool, tmp_path, capsys): + """`aliases: CVE-…` (a string, not a list) must fail closed. + + Without the list-of-strings check, `{primary, *"CVE-2025-2953"}` + would unpack the string character-by-character into the id index, + polluting it with single-char "ids" and silently breaking matching. + """ + audit = _write_audit(tmp_path, {"dependencies": []}) + body = ( + "ignores:\n" + " - id: PYSEC-2025-191\n" + " aliases: CVE-2025-2953\n" # <-- bare string, should be a list + " package: torch\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: '2026-05-21'\n" + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "'aliases' must be a list of strings" in captured.err + + +def test_ignores_aliases_with_non_string_element_fails(tool, tmp_path, capsys): + audit = _write_audit(tmp_path, {"dependencies": []}) + body = ( + "ignores:\n" + " - id: PYSEC-2025-191\n" + " aliases: [CVE-2025-2953, 12345]\n" # <-- 12345 is an int + " package: torch\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: '2026-05-21'\n" + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "'aliases' must be a list of strings" in captured.err + + +def test_ignores_null_aliases_is_accepted(tool, tmp_path, capsys): + """An explicit empty `aliases:` (YAML null) is treated as absent. + + The primary id alone must still index and match. + """ + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "synthetic-pkg", + "version": "1.0.0", + "vulns": [{"id": "CVE-2026-9999", "severity": "HIGH"}], + } + ] + }, + ) + body = ( + "ignores:\n" + " - id: CVE-2026-9999\n" + " aliases:\n" # <-- null + " package: synthetic-pkg\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: '2026-05-21'\n" + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 0 + captured = capsys.readouterr() + assert "::notice::pip-audit suppressed" in captured.out + + +def test_ignores_empty_required_string_fails(tool, tmp_path, capsys): + """An empty `reason:` is an undocumented suppression — must fail. + + Presence alone is not enough; the policy requires a real + justification, so a blank value is a policy violation. + """ + audit = _write_audit(tmp_path, {"dependencies": []}) + body = ( + "ignores:\n" + " - id: CVE-2026-0001\n" + " package: synthetic-pkg\n" + " reason: ''\n" # <-- empty + " threat_model: synthetic\n" + " verified_at: '2026-05-21'\n" + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "field 'reason' must be a non-empty string" in captured.err + + +def test_ignores_malformed_verified_at_fails(tool, tmp_path, capsys): + """`verified_at` must be a YYYY-MM-DD string. + + Catches both a free-text date ("last week") and an unquoted YAML + date that parses to a `datetime.date` (not a string). + """ + audit = _write_audit(tmp_path, {"dependencies": []}) + body = ( + "ignores:\n" + " - id: CVE-2026-0001\n" + " package: synthetic-pkg\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: last week\n" # <-- not a date + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "'verified_at' must be a 'YYYY-MM-DD' string" in captured.err + + +def test_ignores_unquoted_yaml_date_fails(tool, tmp_path, capsys): + """An unquoted `verified_at: 2026-05-21` parses to a datetime.date, + not a string, and must be rejected so the schema stays uniform.""" + audit = _write_audit(tmp_path, {"dependencies": []}) + body = ( + "ignores:\n" + " - id: CVE-2026-0001\n" + " package: synthetic-pkg\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: 2026-05-21\n" # <-- unquoted -> datetime.date + " reevaluate_after: never (test fixture)\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 1 + captured = capsys.readouterr() + assert "'verified_at' must be a 'YYYY-MM-DD' string" in captured.err + + +def test_ignores_freetext_reevaluate_after_is_accepted(tool, tmp_path, capsys): + """`reevaluate_after` is free text (a condition), NOT a date — a + sentence value must be accepted, unlike `verified_at`.""" + audit = _write_audit( + tmp_path, + { + "dependencies": [ + { + "name": "synthetic-pkg", + "version": "1.0.0", + "vulns": [{"id": "CVE-2026-9999", "severity": "HIGH"}], + } + ] + }, + ) + body = ( + "ignores:\n" + " - id: CVE-2026-9999\n" + " package: synthetic-pkg\n" + " reason: synthetic\n" + " threat_model: synthetic\n" + " verified_at: '2026-05-21'\n" + " reevaluate_after: Each release cycle, or when upstream ships the fix.\n" + ) + ignores = _write_ignores(tmp_path, body) + assert tool.main([str(_TOOL_PATH), str(audit), "--ignores", str(ignores)]) == 0 + captured = capsys.readouterr() + assert "::notice::pip-audit suppressed" in captured.out + + def test_project_ignore_file_passes_schema_validation(tool): """The checked-in tools/pip_audit_ignores.yaml itself must satisfy the schema so the workflow never breaks on its own ignore file. diff --git a/tools/check_pip_audit.py b/tools/check_pip_audit.py index b32e950..542dd49 100644 --- a/tools/check_pip_audit.py +++ b/tools/check_pip_audit.py @@ -58,6 +58,7 @@ import argparse import json +import re import sys from pathlib import Path from typing import Any, Iterable, Optional @@ -151,9 +152,53 @@ def _load_report(report_path: Path) -> Optional[dict[str, Any]]: # Required keys per entry in the ignore file. Missing any of them is a # policy violation (an undocumented suppression), so the gate fails closed # rather than silently accepting CVEs with no recorded justification. -_IGNORE_REQUIRED_KEYS: frozenset[str] = frozenset( - {"id", "package", "reason", "threat_model", "verified_at", "reevaluate_after"} +# +# All required fields must be non-empty strings EXCEPT ``verified_at``, +# which must additionally be an ISO ``YYYY-MM-DD`` date. ``reevaluate_after`` +# is deliberately free text (e.g. "Each release cycle, or when …"), NOT a +# date — it captures the condition that retires the ignore, which is +# rarely a fixed calendar day. +_IGNORE_REQUIRED_STR_KEYS: tuple[str, ...] = ( + "id", + "package", + "reason", + "threat_model", + "reevaluate_after", ) +_IGNORE_REQUIRED_KEYS: frozenset[str] = frozenset({*_IGNORE_REQUIRED_STR_KEYS, "verified_at"}) +_VERIFIED_AT_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$") + + +def _validate_ignore_entry(entry: dict[str, Any], index: int, ignores_path: Path) -> Optional[str]: + """Return an ``::error::`` body if ``entry`` is malformed, else ``None``. + + Caller guarantees every required key is present (so the missing-key + error can name all gaps at once); this validates value *shape* so a + well-meaning typo cannot weaken the gate: + + - required string fields must be non-empty (an empty ``reason`` or + ``threat_model`` is an undocumented suppression); + - ``verified_at`` must be a ``YYYY-MM-DD`` string (quote it in YAML + so it is not parsed as a ``datetime.date``); + - ``aliases``, when present, must be a list of strings — a bare + string would be unpacked character-by-character into the id index + and pollute matching. + """ + prefix = f"pip-audit ignore entry #{index} (id={entry.get('id')!r}) in {ignores_path}" + for field in _IGNORE_REQUIRED_STR_KEYS: + value = entry[field] + if not isinstance(value, str) or not value.strip(): + return f"{prefix} field '{field}' must be a non-empty string." + verified_at = entry["verified_at"] + if not isinstance(verified_at, str) or not _VERIFIED_AT_RE.match(verified_at.strip()): + return ( + f"{prefix} field 'verified_at' must be a 'YYYY-MM-DD' string " + f"(quote it in YAML so it is not parsed as a date)." + ) + aliases = entry.get("aliases") + if aliases is not None and (not isinstance(aliases, list) or not all(isinstance(a, str) for a in aliases)): + return f"{prefix} field 'aliases' must be a list of strings." + return None def _load_ignores(ignores_path: Path) -> Optional[dict[str, dict[str, Any]]]: @@ -223,17 +268,17 @@ def _load_ignores(ignores_path: Path) -> Optional[dict[str, dict[str, Any]]]: file=sys.stderr, ) return None - primary_id = entry["id"] - if not isinstance(primary_id, str): - print( - f"::error::pip-audit ignore entry #{index} in {ignores_path} 'id' must be a string.", - file=sys.stderr, - ) + shape_error = _validate_ignore_entry(entry, index, ignores_path) + if shape_error: + print(f"::error::{shape_error}", file=sys.stderr) return None + primary_id = entry["id"] # Index by every alias so cross-DB lookups (PYSEC ↔ CVE ↔ GHSA) # match without the workflow having to know which form pip-audit - # emits this week. Last write wins on duplicates, but we surface - # the dup so the file stays clean. + # emits this week. ``aliases`` is validated as a list of strings + # above; ``or []`` also tolerates an explicit ``aliases:`` null. + # Last write wins on duplicates, but we surface the dup so the + # file stays clean. ids = {primary_id, *(entry.get("aliases") or [])} for ident in ids: if ident in by_id and by_id[ident] is not entry: @@ -274,9 +319,12 @@ def _bucket_findings( medium: list[str] = [] unknown: list[str] = [] suppressed: list[str] = [] + # Materialise the ignore id set once rather than rebuilding the keys + # view on every finding. + ignore_ids: set[str] = set(ignores) if ignores else set() for name, vuln in _iter_findings(report): - if ignores: - matched = _vuln_identifiers(vuln) & ignores.keys() + if ignore_ids: + matched = _vuln_identifiers(vuln) & ignore_ids if matched: # Pick the entry by any matching id — they all point at # the same dict thanks to alias indexing.