diff --git a/apps/backend/api/v1/sbom.py b/apps/backend/api/v1/sbom.py
index 0c6a08cb..c2e7fd58 100644
--- a/apps/backend/api/v1/sbom.py
+++ b/apps/backend/api/v1/sbom.py
@@ -687,7 +687,7 @@ async def download_sbom_signature_bundle_endpoint(
"/projects/{project_id}/sbom-ingest",
response_model=ScanPublic,
status_code=status.HTTP_202_ACCEPTED,
- summary="Ingest an external CycloneDX SBOM (queues a Celery task; returns 202 Accepted)",
+ summary="Ingest an external CycloneDX or SPDX SBOM (queues a Celery task; returns 202)",
responses={
202: {
"description": "SBOM accepted; a queued scan row is returned.",
@@ -717,14 +717,14 @@ async def download_sbom_signature_bundle_endpoint(
"content": {"application/problem+json": {}},
},
415: {
- "description": "Upload is not a CycloneDX JSON media type. RFC 7807.",
+ "description": "Upload is not a CycloneDX or SPDX media type. RFC 7807.",
"content": {"application/problem+json": {}},
},
422: {
"description": (
- "Upload is not a valid / supported CycloneDX document (not JSON, "
- "wrong bomFormat, unsupported specVersion, too many components). "
- "RFC 7807."
+ "Upload is not a valid / supported SBOM document — not CycloneDX-JSON "
+ "or SPDX (JSON / Tag-Value), wrong bomFormat, unsupported specVersion, "
+ "too many components/packages, or too deeply nested. RFC 7807."
),
"content": {"application/problem+json": {}},
},
@@ -754,7 +754,11 @@ async def ingest_sbom_endpoint(
project_id: uuid.UUID,
sbom: UploadFile = File(
...,
- description="A CycloneDX JSON SBOM document (.json / .cdx.json).",
+ description=(
+ "A CycloneDX-JSON (.json / .cdx.json) or SPDX (.spdx / .spdx.json / "
+ ".tag) SBOM document. Trivy auto-detects the format for CVE matching; "
+ "SPDX is mapped to CycloneDX for component persistence."
+ ),
),
ref: str | None = Form(
default=None,
diff --git a/apps/backend/services/sbom_ingest_service.py b/apps/backend/services/sbom_ingest_service.py
index a3d5219f..840b510d 100644
--- a/apps/backend/services/sbom_ingest_service.py
+++ b/apps/backend/services/sbom_ingest_service.py
@@ -2,10 +2,12 @@
External CycloneDX SBOM ingest — synchronous validation + scan-row creation.
This is the synchronous *front half* of the SBOM-ingest feature: it accepts an
-uploaded CycloneDX JSON document, validates it adversarially, persists a
-``kind="sbom"`` :class:`~models.scan.Scan` row, writes the validated SBOM to a
-durable on-disk location, and enqueues the Celery task that does the heavy work
-(``tasks.ingest_sbom.ingest_sbom_task``). The endpoint returns ``202 Accepted``
+uploaded CycloneDX-JSON or SPDX (JSON / Tag-Value) document, validates it
+adversarially, persists a ``kind="sbom"`` :class:`~models.scan.Scan` row, writes
+the original SBOM bytes to a durable on-disk location, and enqueues the Celery
+task that does the heavy work (``tasks.ingest_sbom.ingest_sbom_task`` — which
+maps SPDX → CycloneDX for component persistence and hands the original file to
+Trivy, which auto-detects the format). The endpoint returns ``202 Accepted``
with the queued scan row — never the result (CLAUDE.md core rule #3).
This endpoint is NOT the Dependency-Track ``/api/v1/bom`` + ``X-Api-Key`` BOM
@@ -21,18 +23,20 @@
fast-fails on a declared ``Content-Length`` over the cap before reading a
single body byte (mirrors the source-archive endpoint).
- - **Content-Type / filename allow-list.** Only JSON-ish media types and a
- ``.json`` / ``.cdx.json`` filename are accepted (415 otherwise). The header
- is advisory; the JSON parse + CycloneDX structure check are authoritative.
-
- - **Structural whitelist, NO deep traversal.** We parse the JSON and check
- only the TOP-LEVEL keys: ``bomFormat == "CycloneDX"``, ``specVersion`` in a
- known set, and (when present) ``components`` is a list whose ``len`` is
- within ``sbom_ingest_max_components()`` (default 50,000). We deliberately do
- NOT recurse into the component elements here — a deeply-nested hostile
- document cannot drive our validation into a recursion / CPU blow-up. The
- authoritative deep parse happens later, inside the Celery worker
- (``persist_sbom_components``), off the request path.
+ - **Content-Type / filename allow-list.** Only JSON-ish / SPDX media types and
+ a ``.json`` / ``.cdx.json`` / ``.spdx`` / ``.tag`` filename are accepted (415
+ otherwise). The header is advisory; the content sniff + structure check are
+ authoritative.
+
+ - **Structural whitelist, NO deep traversal.** ``validate_uploaded_sbom``
+ detects the format (CycloneDX-JSON or SPDX JSON/Tag-Value) and checks only
+ TOP-LEVEL shape: for CycloneDX, ``bomFormat``, ``specVersion`` in a known
+ set, and a bounded ``components`` array; for SPDX-JSON, a bounded
+ ``packages`` array. A byte-level depth pre-check runs BEFORE any
+ ``json.loads`` so a deeply-nested hostile document is a clean 422, never a
+ decoder ``RecursionError`` (500). We deliberately do NOT recurse into element
+ bodies here — the authoritative deep parse / SPDX→CycloneDX mapping happens
+ later, inside the Celery worker, off the request path.
CLAUDE.md compliance:
- Core rule #11: every limit is read via ``os.getenv`` at call time (through
@@ -46,6 +50,7 @@
from __future__ import annotations
import json
+import re
import uuid
from pathlib import Path
from typing import Any
@@ -63,6 +68,12 @@
)
from core.security import CurrentUser
from models import Scan
+from services.sbom_conformance import (
+ FORMAT_CYCLONEDX,
+ FORMAT_SPDX_JSON,
+ FORMAT_SPDX_TV,
+ detect_format,
+)
from services.scan_service import (
ScanEnqueueFailed,
ScanInProgressConflict,
@@ -83,6 +94,13 @@
{
"application/json",
"application/vnd.cyclonedx+json",
+ # SPDX media types (model 3 SPDX-input support). SPDX-JSON carriers use
+ # application/spdx+json or application/json; SPDX Tag-Value uploads use
+ # text/spdx (or omit the part content-type, or carry a .spdx/.tag
+ # filename — see the filename allow-list). We deliberately do NOT add the
+ # over-broad text/plain: a .txt with text/plain stays a 415.
+ "application/spdx+json",
+ "text/spdx",
"application/octet-stream",
"", # some CLIs omit the part content-type
}
@@ -224,7 +242,12 @@ def _validate_content_type(*, content_type: str | None, filename: str | None) ->
"""
normalized_ct = (content_type or "").lower().split(";", 1)[0].strip()
name = (filename or "").strip().lower()
- name_ok = name.endswith(".json") or name.endswith(".cdx.json")
+ # CycloneDX JSON (.json / .cdx.json) or SPDX (.spdx / .spdx.json / .tag /
+ # .spdx.tag). The authoritative format gate is the content sniff in
+ # validate_uploaded_sbom; this is the advisory fast-fail.
+ name_ok = name.endswith(
+ (".json", ".cdx.json", ".spdx", ".spdx.json", ".tag", ".spdx.tag")
+ )
if normalized_ct in _ALLOWED_CONTENT_TYPES:
return
if name_ok:
@@ -236,7 +259,7 @@ def _validate_content_type(*, content_type: str | None, filename: str | None) ->
)
raise SbomIngestUnsupportedType(
f"content-type {normalized_ct!r} (filename {name!r}) is not an accepted "
- "CycloneDX JSON media type"
+ "CycloneDX or SPDX media type"
)
@@ -338,6 +361,71 @@ def validate_cyclonedx_document(raw: bytes) -> dict[str, Any]:
return parsed
+def _validate_spdx_json_packages(doc: dict[str, Any]) -> None:
+ """Bound the SPDX-JSON ``packages`` array (the SPDX analogue of the
+ CycloneDX ``components`` cap) so a huge document is rejected up front."""
+ packages = doc.get("packages")
+ if packages is None:
+ return
+ if not isinstance(packages, list):
+ raise SbomIngestInvalid("SPDX 'packages' must be a JSON array when present")
+ max_packages = sbom_ingest_max_components()
+ if len(packages) > max_packages:
+ raise SbomIngestInvalid(
+ f"SBOM declares {len(packages)} packages; the maximum is {max_packages}"
+ )
+
+
+def validate_uploaded_sbom(raw: bytes) -> str:
+ """Validate an uploaded SBOM of any supported format; return the format tag.
+
+ Accepts CycloneDX-JSON and SPDX (JSON / Tag-Value). Raises
+ :class:`SbomIngestInvalid` (422) for anything else, mirroring the per-format
+ handling of :mod:`services.sbom_conformance` / :mod:`services.sbom_convert`.
+
+ Adversarial-input contract: the O(n) byte-depth pre-check runs FIRST, before
+ any ``json.loads`` — including the one inside ``detect_format`` — so a
+ maliciously deep JSON document is rejected as a clean 422 and can never drive
+ the stdlib decoder into a ``RecursionError`` (which would escape as a 500).
+ Total size is already bounded by the read cap applied upstream; deep parsing
+ of the (now format-confirmed) document still happens later in the worker, off
+ the request path.
+ """
+ depth = _max_nesting_depth(raw)
+ if depth > _MAX_NESTING_DEPTH:
+ raise SbomIngestInvalid(
+ f"SBOM nesting depth {depth} exceeds the maximum {_MAX_NESTING_DEPTH}"
+ )
+
+ fmt, doc = detect_format(raw)
+ if fmt == FORMAT_CYCLONEDX:
+ validate_cyclonedx_document(raw)
+ return fmt
+ if fmt == FORMAT_SPDX_JSON:
+ # ``doc`` is the parsed SPDX-JSON object (detect_format already decoded
+ # it under the depth guard above).
+ _validate_spdx_json_packages(doc or {})
+ return fmt
+ if fmt == FORMAT_SPDX_TV:
+ # Tag-Value is line-oriented (no recursion surface), but the read cap
+ # bounds BYTES, not package COUNT — a 32 MiB file of 14-byte
+ # ``PackageName:`` lines is ~2.4M packages, far past the JSON cap. Bound
+ # the count here so every format enforces the same component ceiling
+ # (the worker would otherwise drive millions of upserts). The anchored
+ # regex is linear (security review: no ReDoS).
+ text = raw.decode("utf-8", errors="replace")
+ package_count = len(re.findall(r"(?m)^PackageName:", text))
+ max_packages = sbom_ingest_max_components()
+ if package_count > max_packages:
+ raise SbomIngestInvalid(
+ f"SBOM declares {package_count} packages; the maximum is {max_packages}"
+ )
+ return fmt
+ raise SbomIngestInvalid(
+ "upload is not a CycloneDX or SPDX (JSON / Tag-Value) document"
+ )
+
+
# ---------------------------------------------------------------------------
# Ingest — validate + persist scan row + write file + enqueue
# ---------------------------------------------------------------------------
@@ -386,9 +474,10 @@ async def ingest_sbom(
1. ``prepare_scan_target`` — existence/team-access (404/403), project-scoped
API-key boundary (403), archived (409), per-team concurrency cap (429).
Reuses ``trigger_scan``'s exact guard sequence + exceptions.
- 2. Request validation — Content-Type/filename (415), size cap (413), JSON +
- CycloneDX structure (422). Runs AFTER the authz/state guards so a
- non-member learns nothing about a project from a malformed body.
+ 2. Request validation — Content-Type/filename (415), size cap (413),
+ CycloneDX-JSON or SPDX (JSON/Tag-Value) structure (422). Runs AFTER the
+ authz/state guards so a non-member learns nothing about a project from a
+ malformed body.
3. INSERT the scan row, flush. The partial unique index
``ix_scans_project_active`` makes this the atomic concurrency check: a
second in-flight scan for the project raises :class:`ScanInProgressConflict`
@@ -427,8 +516,11 @@ async def ingest_sbom(
# ---- 2. request validation (untrusted input) -----------------------------
_validate_content_type(content_type=upload.content_type, filename=upload.filename)
raw = await _read_bounded(upload, max_bytes=sbom_ingest_max_bytes())
- # Structural whitelist; never deep-traverses component elements.
- validate_cyclonedx_document(raw)
+ # Format-dispatched structural whitelist (CycloneDX-JSON or SPDX JSON/TV).
+ # Depth-guarded; never deep-traverses element bodies (that runs in the
+ # worker). The original bytes are stored as-is and Trivy auto-detects the
+ # format; the worker maps SPDX → CycloneDX for component persistence.
+ validate_uploaded_sbom(raw)
original_filename = _clean_meta_text(upload.filename)
normalized_release = _clean_meta_text(release)
diff --git a/apps/backend/tasks/ingest_sbom.py b/apps/backend/tasks/ingest_sbom.py
index 00af16ba..16e31331 100644
--- a/apps/backend/tasks/ingest_sbom.py
+++ b/apps/backend/tasks/ingest_sbom.py
@@ -41,7 +41,6 @@
from __future__ import annotations
-import json
import shutil
import uuid
from pathlib import Path
@@ -63,6 +62,7 @@
)
from models import Project, SbomConformance, Scan
from services import sbom_conformance
+from services.sbom_convert import UnsupportedSbomFormat, to_cyclonedx
from services.vulnerability_matching import persist_trivy_findings
from tasks._progress import (
close_log_file,
@@ -384,12 +384,19 @@ def _persist_conformance(
def _load_uploaded_sbom(scan_metadata: dict[str, Any]) -> tuple[Path, dict[str, Any]]:
- """Resolve, containment-check, and parse the uploaded CycloneDX SBOM.
-
- Returns ``(sbom_path, parsed_dict)``. Raises :class:`_IngestAborted` on a
- missing path key, a path that resolves outside ``workspace_root()``, an
- absent file, or invalid JSON. This is a minimal defensive backstop — the
- synchronous service Pass owns the authoritative CycloneDX schema validation.
+ """Resolve, containment-check, and NORMALISE the uploaded SBOM to CycloneDX.
+
+ Returns ``(sbom_path, cyclonedx_dict)``. ``sbom_path`` is the ORIGINAL
+ uploaded file (CycloneDX **or** SPDX) — Trivy reads it directly (``trivy
+ sbom`` auto-detects both formats by content), so no converted file is ever
+ written to disk. The returned dict is always CycloneDX-shaped: an SPDX
+ upload is mapped by :func:`services.sbom_convert.to_cyclonedx` so
+ ``persist_sbom_components`` consumes one shape for every input format.
+
+ Raises :class:`_IngestAborted` on a missing path key, a path that resolves
+ outside ``workspace_root()``, an absent/unreadable file, or content that is
+ neither CycloneDX-JSON nor SPDX (JSON / Tag-Value). The synchronous service
+ Pass owns the authoritative up-front validation; this is a defensive backstop.
"""
raw_path = scan_metadata.get("sbom_path")
if not raw_path or not isinstance(raw_path, str):
@@ -412,12 +419,17 @@ def _load_uploaded_sbom(scan_metadata: dict[str, Any]) -> tuple[Path, dict[str,
raise _IngestAborted(f"SBOM file not found: {candidate}")
try:
- with candidate.open("rb") as fh:
- parsed = json.loads(fh.read())
- except (OSError, ValueError) as exc:
- raise _IngestAborted(f"SBOM file is not valid JSON: {exc}") from exc
+ raw = candidate.read_bytes()
+ except OSError as exc:
+ raise _IngestAborted(f"SBOM file could not be read: {exc}") from exc
- if not isinstance(parsed, dict):
- raise _IngestAborted("SBOM document is not a JSON object")
+ # Normalise to a CycloneDX dict for component persistence. CycloneDX passes
+ # through; SPDX (JSON / Tag-Value) is mapped. RDF/XML/junk raises
+ # UnsupportedSbomFormat → terminal abort (the sync service already rejected
+ # these up front, so reaching here means a tampered/garbled stored file).
+ try:
+ cyclonedx = to_cyclonedx(raw)
+ except UnsupportedSbomFormat as exc:
+ raise _IngestAborted(f"SBOM is not CycloneDX or SPDX: {exc}") from exc
- return candidate, parsed
+ return candidate, cyclonedx
diff --git a/apps/backend/tests/integration/scan/test_ingest_sbom_pipeline.py b/apps/backend/tests/integration/scan/test_ingest_sbom_pipeline.py
index 47786b07..e0291420 100644
--- a/apps/backend/tests/integration/scan/test_ingest_sbom_pipeline.py
+++ b/apps/backend/tests/integration/scan/test_ingest_sbom_pipeline.py
@@ -124,17 +124,18 @@ def _fake_run(
def _seed_queued_sbom_scan(
- workspace: Path, *, ref: str | None = None
+ workspace: Path, *, ref: str | None = None, sbom_src: Path | None = None
) -> tuple[uuid.UUID, uuid.UUID]:
- """Seed project + queued sbom scan and write the realistic SBOM to its
- durable on-disk ingest path. Returns (scan_id, project_id)."""
+ """Seed project + queued sbom scan and write the SBOM to its durable on-disk
+ ingest path. Defaults to the realistic CycloneDX fixture; ``sbom_src`` points
+ at a different fixture (e.g. a real SPDX document). Returns (scan_id, project_id)."""
import asyncio
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from core.config import database_url
- sbom_bytes = (FIXTURES / "realistic.cdx.json").read_bytes()
+ sbom_bytes = (sbom_src or (FIXTURES / "realistic.cdx.json")).read_bytes()
async def _build() -> tuple[uuid.UUID, uuid.UUID]:
engine = create_async_engine(database_url(), pool_pre_ping=True, future=True)
@@ -426,3 +427,59 @@ def test_ingest_rerun_replaces_conformance_verdict(
assert len(rows) == 1, "re-entry must REPLACE the verdict, not duplicate it"
assert rows[0].id != first_id, "the verdict row was re-created (delete-then-insert)"
assert rows[0].result == "warn"
+
+
+# ---------------------------------------------------------------------------
+# SPDX input — a real syft SPDX-JSON document ingests (SPDX→CycloneDX mapping)
+# ---------------------------------------------------------------------------
+
+# Real syft SPDX fixtures recorded in PR1 (tests/fixtures/sbom/).
+_SBOM_FIXTURES = BACKEND_ROOT / "tests" / "fixtures" / "sbom"
+
+
+def test_ingest_spdx_json_persists_components_and_conformance(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path, sync_session: Session
+) -> None:
+ """An uploaded SPDX-JSON document is mapped to CycloneDX for component
+ persistence, and its conformance verdict records source_format='spdx-json'.
+ Trivy reads the original SPDX file directly (the stub returns a report); the
+ point of this test is the SPDX→component path + the SPDX conformance tag."""
+ monkeypatch.setenv("WORKSPACE_HOST_PATH", str(tmp_path))
+ _stub_trivy_from_fixture(monkeypatch)
+
+ scan_id, _ = _seed_queued_sbom_scan(
+ tmp_path, sbom_src=_SBOM_FIXTURES / "real_spdx.json"
+ )
+
+ from tasks.ingest_sbom import ingest_sbom_task
+
+ result = ingest_sbom_task.apply(args=[str(scan_id)])
+ assert result.successful(), f"task failed: {result.traceback}"
+
+ sync_session.expire_all()
+ scan = sync_session.execute(select(Scan).where(Scan.id == scan_id)).scalar_one()
+ assert scan.status == "succeeded"
+
+ # Components were mapped out of the SPDX packages and persisted.
+ component_rows = list(
+ sync_session.execute(
+ select(ScanComponent).where(ScanComponent.scan_id == scan_id)
+ ).scalars()
+ )
+ assert component_rows, "SPDX packages must map to persisted ScanComponent rows"
+
+ # The conformance verdict is tagged as SPDX-JSON (scored on the original).
+ verdict = sync_session.execute(
+ select(SbomConformance).where(SbomConformance.scan_id == scan_id)
+ ).scalar_one()
+ assert verdict.source_format == "spdx-json"
+
+ # The durable original SBOM is preserved (the bytes are SPDX, but the
+ # download artifact kind is the shared 'sbom_cyclonedx' label).
+ kinds = {
+ a.kind
+ for a in sync_session.execute(
+ select(ScanArtifact).where(ScanArtifact.scan_id == scan_id)
+ ).scalars()
+ }
+ assert "sbom_cyclonedx" in kinds
diff --git a/apps/backend/tests/integration/test_sbom_ingest_api.py b/apps/backend/tests/integration/test_sbom_ingest_api.py
index 5134618b..98048583 100644
--- a/apps/backend/tests/integration/test_sbom_ingest_api.py
+++ b/apps/backend/tests/integration/test_sbom_ingest_api.py
@@ -795,3 +795,66 @@ async def test_get_conformance_scan_in_other_project_is_404(client) -> None:
headers=_bearer_for(user),
)
assert resp.status_code == 404, resp.text
+
+
+# ---------------------------------------------------------------------------
+# SPDX input — the endpoint now ACCEPTS SPDX (JSON / Tag-Value), not just
+# CycloneDX. enqueue is stubbed, so these assert the front-half (202 + queued
+# sbom scan row) without running the worker.
+# ---------------------------------------------------------------------------
+
+_VALID_SPDX_JSON = json.dumps(
+ {
+ "spdxVersion": "SPDX-2.3",
+ "name": "doc",
+ "creationInfo": {"created": "2026-01-01T00:00:00Z", "creators": ["Tool: syft"]},
+ "packages": [
+ {
+ "SPDXID": "SPDXRef-a",
+ "name": "lodash",
+ "versionInfo": "4.17.19",
+ "externalRefs": [
+ {
+ "referenceCategory": "PACKAGE-MANAGER",
+ "referenceType": "purl",
+ "referenceLocator": "pkg:npm/lodash@4.17.19",
+ }
+ ],
+ }
+ ],
+ }
+).encode()
+
+_VALID_SPDX_TV = (
+ b"SPDXVersion: SPDX-2.3\n"
+ b"Created: 2026-01-01T00:00:00Z\n"
+ b"Creator: Tool: syft\n"
+ b"PackageName: lodash\n"
+ b"SPDXID: SPDXRef-a\n"
+ b"PackageVersion: 4.17.19\n"
+ b"ExternalRef: PACKAGE-MANAGER purl pkg:npm/lodash@4.17.19\n"
+)
+
+
+async def test_ingest_spdx_json_returns_202(client, _workspace: Path) -> None:
+ _team, user, project = await _seed(client, role="developer")
+ resp = await client.post(
+ f"/v1/projects/{project.id}/sbom-ingest",
+ headers=_bearer_for(user),
+ files=_sbom_part(_VALID_SPDX_JSON, name="bom.spdx.json", ctype="application/json"),
+ )
+ assert resp.status_code == 202, resp.text
+ assert resp.json()["kind"] == "sbom"
+
+
+async def test_ingest_spdx_tag_value_returns_202(client, _workspace: Path) -> None:
+ _team, user, project = await _seed(client, role="developer")
+ resp = await client.post(
+ f"/v1/projects/{project.id}/sbom-ingest",
+ headers=_bearer_for(user),
+ # Tag-Value is not JSON; the .spdx filename carries it past the advisory
+ # content-type gate, and the content sniff confirms SPDXVersion:.
+ files=_sbom_part(_VALID_SPDX_TV, name="bom.spdx", ctype="application/octet-stream"),
+ )
+ assert resp.status_code == 202, resp.text
+ assert resp.json()["kind"] == "sbom"
diff --git a/apps/backend/tests/unit/services/test_sbom_ingest_validation.py b/apps/backend/tests/unit/services/test_sbom_ingest_validation.py
index 5638e349..a2669b82 100644
--- a/apps/backend/tests/unit/services/test_sbom_ingest_validation.py
+++ b/apps/backend/tests/unit/services/test_sbom_ingest_validation.py
@@ -29,6 +29,7 @@
from __future__ import annotations
import json
+from pathlib import Path
import pytest
@@ -41,8 +42,11 @@
_read_bounded,
_validate_content_type,
validate_cyclonedx_document,
+ validate_uploaded_sbom,
)
+_SBOM_FIXTURES = Path(__file__).resolve().parents[2] / "fixtures" / "sbom"
+
def _doc(spec_version: str = "1.5", **extra: object) -> bytes:
base: dict[str, object] = {"bomFormat": "CycloneDX", "specVersion": spec_version}
@@ -401,3 +405,106 @@ def test_clean_meta_text_caps_length() -> None:
cleaned = _clean_meta_text("x" * (_META_TEXT_MAX_LEN + 500))
assert cleaned is not None
assert len(cleaned) == _META_TEXT_MAX_LEN
+
+
+# ---------------------------------------------------------------------------
+# validate_uploaded_sbom — format dispatch (CycloneDX-JSON + SPDX JSON/TV)
+#
+# Pure (no DB / app). Real syft SPDX fixtures cover density; crafted inputs
+# cover the adversarial / unsupported edges. The byte-depth pre-check must fire
+# BEFORE any json.loads (incl. detect_format's) so a deep document is a clean
+# 422, never a RecursionError → 500.
+# ---------------------------------------------------------------------------
+
+
+def test_validate_accepts_cyclonedx_returns_format() -> None:
+ assert validate_uploaded_sbom(_doc()) == "cyclonedx"
+
+
+def test_validate_accepts_real_spdx_json() -> None:
+ raw = (_SBOM_FIXTURES / "real_spdx.json").read_bytes()
+ assert validate_uploaded_sbom(raw) == "spdx-json"
+
+
+def test_validate_accepts_real_spdx_tag_value() -> None:
+ raw = (_SBOM_FIXTURES / "real_spdx.tag").read_bytes()
+ assert validate_uploaded_sbom(raw) == "spdx-tv"
+
+
+def test_validate_accepts_minimal_spdx_json() -> None:
+ raw = json.dumps(
+ {
+ "spdxVersion": "SPDX-2.3",
+ "name": "doc",
+ "creationInfo": {"created": "2026-01-01T00:00:00Z", "creators": ["Tool: x"]},
+ "packages": [{"SPDXID": "SPDXRef-a", "name": "a", "versionInfo": "1"}],
+ }
+ ).encode()
+ assert validate_uploaded_sbom(raw) == "spdx-json"
+
+
+def test_validate_accepts_minimal_spdx_tag_value() -> None:
+ raw = b"SPDXVersion: SPDX-2.3\nPackageName: a\nPackageVersion: 1\n"
+ assert validate_uploaded_sbom(raw) == "spdx-tv"
+
+
+@pytest.mark.parametrize(
+ ("raw", "why"),
+ [
+ (b"this is not an sbom", "plain-text"),
+ (b"spdx", "spdx-rdf-unsupported"),
+ (b'', "cyclonedx-xml-unsupported"),
+ (b"[]", "json-array"),
+ (b'{"foo": 1}', "json-without-sbom-markers"),
+ # A pseudo-SPDX with bomFormat:"SPDX" (not real SPDX — real uses
+ # spdxVersion) detects as unknown → 422 (unchanged from #406).
+ (json.dumps({"bomFormat": "SPDX", "specVersion": "1.5"}).encode(), "fake-spdx"),
+ ],
+)
+def test_validate_rejects_unsupported_formats(raw: bytes, why: str) -> None:
+ with pytest.raises(SbomIngestInvalid):
+ validate_uploaded_sbom(raw)
+
+
+def test_validate_spdx_json_packages_cap(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("SBOM_INGEST_MAX_COMPONENTS", "2")
+ raw = json.dumps(
+ {
+ "spdxVersion": "SPDX-2.3",
+ "packages": [{"SPDXID": f"SPDXRef-{i}", "name": str(i)} for i in range(3)],
+ }
+ ).encode()
+ with pytest.raises(SbomIngestInvalid):
+ validate_uploaded_sbom(raw)
+
+
+def test_validate_spdx_tag_value_packages_cap(monkeypatch: pytest.MonkeyPatch) -> None:
+ # Tag-Value enforces the same component ceiling as the JSON paths (security
+ # review: a byte-capped .tag could otherwise smuggle millions of packages).
+ monkeypatch.setenv("SBOM_INGEST_MAX_COMPONENTS", "2")
+ raw = b"SPDXVersion: SPDX-2.3\n" + b"PackageName: a\n" * 3
+ with pytest.raises(SbomIngestInvalid):
+ validate_uploaded_sbom(raw)
+
+
+def test_validate_spdx_tag_value_at_cap_passes(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("SBOM_INGEST_MAX_COMPONENTS", "3")
+ raw = b"SPDXVersion: SPDX-2.3\n" + b"PackageName: a\n" * 3
+ assert validate_uploaded_sbom(raw) == "spdx-tv"
+
+
+def test_validate_depth_guard_fires_before_json_parse() -> None:
+ # A pathologically deep JSON document must be rejected by the O(n) byte
+ # depth pre-check as a clean 422 — never reaching json.loads (which would
+ # raise RecursionError → unhandled 500). 4000 levels is far past the cap.
+ deep = b'{"a":' * 4000 + b"1" + b"}" * 4000
+ with pytest.raises(SbomIngestInvalid):
+ validate_uploaded_sbom(deep)
+
+
+def test_validate_content_type_accepts_spdx_extensions() -> None:
+ # Filename allow-list now covers SPDX extensions (advisory gate).
+ for name in ("bom.spdx", "bom.spdx.json", "bom.tag", "sbom.spdx.tag"):
+ _validate_content_type(content_type="application/octet-stream", filename=name)
+ _validate_content_type(content_type="application/spdx+json", filename="x.unknown")
+ _validate_content_type(content_type="text/spdx", filename="x.unknown")
diff --git a/docs-site/docs/ci-integration/sbom-upload.md b/docs-site/docs/ci-integration/sbom-upload.md
index 332e31d5..5fe4642a 100644
--- a/docs-site/docs/ci-integration/sbom-upload.md
+++ b/docs-site/docs/ci-integration/sbom-upload.md
@@ -1,14 +1,14 @@
---
id: sbom-upload
title: Upload an SBOM
-description: Upload a CycloneDX SBOM that an external tool already produced — TRUSCA queues a scan that matches CVEs, classifies declared licenses, and runs the build gate.
+description: Upload a CycloneDX or SPDX SBOM that an external tool already produced — TRUSCA queues a scan that matches CVEs, classifies declared licenses, scores conformance, and runs the build gate.
sidebar_label: Upload an SBOM
sidebar_position: 5
---
# Upload an SBOM
-Already have a CycloneDX SBOM (software bill of materials) from another tool? Upload it to an existing TRUSCA project and TRUSCA matches its components against vulnerability data, classifies declared licenses, builds the dependency graph, and runs the build gate — without cloning or scanning your source.
+Already have an SBOM (software bill of materials) from another tool? Upload it to an existing TRUSCA project and TRUSCA matches its components against vulnerability data, classifies declared licenses, builds the dependency graph, scores the SBOM's conformance, and runs the build gate — without cloning or scanning your source. Both **CycloneDX-JSON** and **SPDX** (JSON or Tag-Value) are accepted.
The endpoint is `POST /v1/projects/{project_id}/sbom-ingest`. It is asynchronous: a successful request returns `202 Accepted` with a queued scan row, and you poll the scan to read the result.
@@ -25,7 +25,7 @@ TRUSCA is **not** Dependency-Track API compatible. The Dependency-Track flow —
- A TRUSCA API key in the `tos__` format. Create one at **/integrations → API keys → New API key**; see [API keys](../admin-guide/api-keys.md) for the scope model.
- The target **project already exists**. Copy its UUID from **Project Settings → CI/CD**. Uploading an SBOM does not create a project.
- The API key's scope covers that project — a `project`-scoped key bound to it, or a `team`-scoped key for a project the team owns.
-- A CycloneDX JSON document. Supported `specVersion` values are `1.2` through `1.6`. SPDX is not accepted on this endpoint.
+- A **CycloneDX-JSON** document (supported `specVersion` values are `1.2` through `1.6`) **or** an **SPDX** document in JSON or Tag-Value form. Trivy auto-detects the format for CVE matching; SPDX is mapped to CycloneDX for component persistence. SPDX RDF/XML is not accepted.
- No scan is currently queued or running for the project (one in-flight scan per project; a second returns `409`).
## Upload an SBOM
@@ -166,8 +166,8 @@ All errors are RFC 7807 (Problem Details for HTTP APIs) responses with the `appl
| `404` | The project does not exist, or it is hidden from the caller (existence-hide). |
| `409` | A scan is already queued or running for this project, or the project is archived. |
| `413` | The upload exceeds the size cap (`SBOM_INGEST_MAX_BYTES`). |
-| `415` | The upload is not a CycloneDX JSON media type — the content type and filename are both wrong. Use `application/json` or `application/vnd.cyclonedx+json`, with a `.json` or `.cdx.json` filename. |
-| `422` | The upload is not a valid CycloneDX document — not JSON, `bomFormat` is not `CycloneDX`, an unsupported `specVersion`, malformed `components`, or more components than `SBOM_INGEST_MAX_COMPONENTS`. |
+| `415` | The upload's media type and filename are both wrong. Use `application/json` / `application/vnd.cyclonedx+json` / `application/spdx+json` / `text/spdx`, or a `.json` / `.cdx.json` / `.spdx` / `.tag` filename. |
+| `422` | The upload is not a valid CycloneDX-JSON or SPDX (JSON/Tag-Value) document — wrong `bomFormat`, an unsupported CycloneDX `specVersion`, malformed `components`/`packages`, more than `SBOM_INGEST_MAX_COMPONENTS`, or too deeply nested. |
| `429` | Rate limited, or the team's concurrent-scan cap is reached. The response carries a `Retry-After` header. |
## Troubleshooting
@@ -186,11 +186,11 @@ A scan is already queued or running for this project — TRUSCA allows one in-fl
### `415 Unsupported Media Type`
-TRUSCA accepts only CycloneDX JSON. Confirm the file is JSON and the upload sets a JSON media type or a `.json` / `.cdx.json` filename. SPDX and CycloneDX XML are not accepted here.
+TRUSCA accepts CycloneDX-JSON and SPDX (JSON or Tag-Value). Confirm the upload sets an accepted media type (`application/json`, `application/vnd.cyclonedx+json`, `application/spdx+json`, `text/spdx`) or a recognised filename (`.json`, `.cdx.json`, `.spdx`, `.tag`). SPDX RDF/XML and CycloneDX XML are not accepted here.
### `422 Unprocessable Entity`
-The document is JSON but not an ingestible CycloneDX SBOM. Check that `bomFormat` is `CycloneDX`, that `specVersion` is between `1.2` and `1.6`, and that the component count is within `SBOM_INGEST_MAX_COMPONENTS`. The `detail` field names the specific reason.
+The upload is not an ingestible CycloneDX or SPDX SBOM. For CycloneDX, check that `bomFormat` is `CycloneDX` and `specVersion` is between `1.2` and `1.6`; for SPDX, that the document carries `spdxVersion` (JSON) or a `SPDXVersion:` line (Tag-Value). The component/package count must be within `SBOM_INGEST_MAX_COMPONENTS`, and the document must not be pathologically nested. The `detail` field names the specific reason.
### `429 Too Many Requests`
diff --git a/docs-site/i18n/ko/docusaurus-plugin-content-docs/current/ci-integration/sbom-upload.md b/docs-site/i18n/ko/docusaurus-plugin-content-docs/current/ci-integration/sbom-upload.md
index e4d8d3db..89decba6 100644
--- a/docs-site/i18n/ko/docusaurus-plugin-content-docs/current/ci-integration/sbom-upload.md
+++ b/docs-site/i18n/ko/docusaurus-plugin-content-docs/current/ci-integration/sbom-upload.md
@@ -1,14 +1,14 @@
---
id: sbom-upload
title: SBOM 업로드
-description: 외부 도구가 이미 생성한 CycloneDX SBOM을 업로드하면 TRUSCA가 CVE를 매칭하고 선언 라이선스를 분류하며 빌드 게이트를 실행하는 스캔을 큐에 넣습니다.
+description: 외부 도구가 이미 생성한 CycloneDX 또는 SPDX SBOM을 업로드하면 TRUSCA가 CVE를 매칭하고 선언 라이선스를 분류하며 적합성을 채점하고 빌드 게이트를 실행하는 스캔을 큐에 넣습니다.
sidebar_label: SBOM 업로드
sidebar_position: 5
---
# SBOM 업로드
-다른 도구로 만든 CycloneDX SBOM(software bill of materials, 소프트웨어 구성 명세)이 이미 있습니까? 기존 TRUSCA 프로젝트에 업로드하면 TRUSCA가 소스를 복제하거나 스캔하지 않고도 그 컴포넌트를 취약점 데이터와 매칭하고, 선언 라이선스를 분류하고, 의존성 그래프를 구성하고, 빌드 게이트를 실행합니다.
+다른 도구로 만든 SBOM(software bill of materials, 소프트웨어 구성 명세)이 이미 있습니까? 기존 TRUSCA 프로젝트에 업로드하면 TRUSCA가 소스를 복제하거나 스캔하지 않고도 그 컴포넌트를 취약점 데이터와 매칭하고, 선언 라이선스를 분류하고, 의존성 그래프를 구성하고, SBOM의 적합성을 채점하고, 빌드 게이트를 실행합니다. **CycloneDX-JSON**과 **SPDX**(JSON 또는 Tag-Value)를 모두 받습니다.
엔드포인트는 `POST /v1/projects/{project_id}/sbom-ingest` 입니다. 비동기로 동작합니다. 요청이 성공하면 큐에 들어간 스캔 행과 함께 `202 Accepted`를 반환하므로, 스캔을 폴링해 결과를 확인합니다.
@@ -25,7 +25,7 @@ TRUSCA는 Dependency-Track API 호환이 **아닙니다**. Dependency-Track 방
- `tos__` 형식의 TRUSCA API Key. **/integrations → API keys → New API key**에서 생성하며, 스코프 모델은 [API keys](../admin-guide/api-keys.md) 참고.
- 대상 **프로젝트가 이미 존재**. UUID는 **Project Settings → CI/CD**에서 복사합니다. SBOM 업로드는 프로젝트를 생성하지 않습니다.
- API Key의 스코프가 그 프로젝트를 커버 — 프로젝트에 바인딩된 `project` 스코프 키이거나, 팀이 소유한 프로젝트라면 `team` 스코프 키.
-- CycloneDX JSON 문서. 지원하는 `specVersion`은 `1.2`부터 `1.6`까지입니다. 이 엔드포인트는 SPDX를 받지 않습니다.
+- **CycloneDX-JSON** 문서(지원하는 `specVersion`은 `1.2`부터 `1.6`) **또는** JSON·Tag-Value 형식의 **SPDX** 문서. CVE 매칭에서는 Trivy가 포맷을 자동 감지하고, 컴포넌트 적재를 위해 SPDX는 CycloneDX로 변환됩니다. SPDX RDF/XML은 받지 않습니다.
- 프로젝트에 큐 대기 중이거나 실행 중인 스캔이 없음(프로젝트당 진행 스캔 1개, 두 번째는 `409` 반환).
## SBOM 업로드 방법
@@ -166,8 +166,8 @@ curl -H "Authorization: Bearer $TRUSTEDOSS_API_KEY" \
| `404` | 프로젝트가 없거나, 호출자에게 숨겨짐(존재 은닉). |
| `409` | 이 프로젝트에 스캔이 이미 큐 대기 중이거나 실행 중이거나, 프로젝트가 archived 상태. |
| `413` | 업로드가 용량 상한(`SBOM_INGEST_MAX_BYTES`)을 초과. |
-| `415` | 업로드가 CycloneDX JSON 미디어 타입이 아님 — 콘텐츠 타입과 파일명이 모두 잘못됨. `application/json` 또는 `application/vnd.cyclonedx+json`을 쓰고 `.json` 또는 `.cdx.json` 파일명을 쓰세요. |
-| `422` | 업로드가 유효한 CycloneDX 문서가 아님 — JSON이 아니거나, `bomFormat`이 `CycloneDX`가 아니거나, 지원하지 않는 `specVersion`이거나, `components`가 잘못됐거나, 컴포넌트가 `SBOM_INGEST_MAX_COMPONENTS`보다 많음. |
+| `415` | 업로드의 콘텐츠 타입과 파일명이 모두 잘못됨. `application/json` / `application/vnd.cyclonedx+json` / `application/spdx+json` / `text/spdx`를 쓰거나, `.json` / `.cdx.json` / `.spdx` / `.tag` 파일명을 쓰세요. |
+| `422` | 업로드가 유효한 CycloneDX-JSON 또는 SPDX(JSON/Tag-Value) 문서가 아님 — `bomFormat`이 잘못됐거나, 지원하지 않는 CycloneDX `specVersion`이거나, `components`/`packages`가 잘못됐거나, `SBOM_INGEST_MAX_COMPONENTS`보다 많거나, 지나치게 깊게 중첩됨. |
| `429` | 레이트 리밋에 걸렸거나 팀의 동시 스캔 상한에 도달. 응답에 `Retry-After` 헤더가 실립니다. |
## 문제 해결
@@ -186,11 +186,11 @@ API Key의 스코프가 프로젝트를 커버하지 않습니다. 그 프로젝
### `415 Unsupported Media Type`
-TRUSCA는 CycloneDX JSON만 받습니다. 파일이 JSON인지, 업로드가 JSON 미디어 타입이나 `.json` / `.cdx.json` 파일명을 설정하는지 확인하세요. SPDX와 CycloneDX XML은 여기서 받지 않습니다.
+TRUSCA는 CycloneDX-JSON과 SPDX(JSON 또는 Tag-Value)를 받습니다. 업로드가 허용된 미디어 타입(`application/json`·`application/vnd.cyclonedx+json`·`application/spdx+json`·`text/spdx`)이나 인식되는 파일명(`.json`·`.cdx.json`·`.spdx`·`.tag`)을 설정하는지 확인하세요. SPDX RDF/XML과 CycloneDX XML은 여기서 받지 않습니다.
### `422 Unprocessable Entity`
-문서는 JSON이지만 처리할 수 있는 CycloneDX SBOM이 아닙니다. `bomFormat`이 `CycloneDX`인지, `specVersion`이 `1.2`에서 `1.6` 사이인지, 컴포넌트 개수가 `SBOM_INGEST_MAX_COMPONENTS` 이내인지 확인하세요. `detail` 필드가 구체적 사유를 알려 줍니다.
+업로드가 처리할 수 있는 CycloneDX 또는 SPDX SBOM이 아닙니다. CycloneDX는 `bomFormat`이 `CycloneDX`이고 `specVersion`이 `1.2`에서 `1.6` 사이인지, SPDX는 `spdxVersion`(JSON)이나 `SPDXVersion:` 줄(Tag-Value)을 갖는지 확인하세요. 컴포넌트·패키지 개수는 `SBOM_INGEST_MAX_COMPONENTS` 이내여야 하고, 문서가 지나치게 깊게 중첩되면 안 됩니다. `detail` 필드가 구체적 사유를 알려 줍니다.
### `429 Too Many Requests`