diff --git a/src/skillspector/nodes/report.py b/src/skillspector/nodes/report.py index 6295e12c..a8d5992e 100644 --- a/src/skillspector/nodes/report.py +++ b/src/skillspector/nodes/report.py @@ -99,6 +99,25 @@ def _sanitize_finding(finding: Finding) -> Finding: return replace(finding, **{f: _clean_text(getattr(finding, f)) for f in _SANITIZED_FIELDS}) +def _build_sarif_properties(finding: Finding) -> dict[str, object] | None: + """Project selected finding metadata into a SARIF properties dictionary.""" + finding_dict = finding.to_dict() + metadata: dict[str, object] = { + "severity": finding_dict["severity"], + "category": finding_dict["category"], + "pattern": finding_dict["pattern"], + "confidence": finding_dict["confidence"], + "finding": finding_dict["finding"], + "explanation": finding_dict["explanation"], + "remediation": finding_dict["remediation"], + "code_snippet": finding_dict["code_snippet"], + "intent": finding_dict["intent"], + "tags": finding_dict["tags"], + } + cleaned = {key: value for key, value in metadata.items() if value is not None} + return cleaned or None + + def _severity_to_sarif_level(severity: str) -> Literal["error", "warning", "note"]: """Map Finding.severity to SARIF result level.""" return { @@ -209,14 +228,13 @@ def _build_sarif( for finding in findings: if not finding.rule_id or not finding.message: continue - start_line = finding.start_line - end_line = finding.end_line - region = SarifRegion(start_line=start_line, end_line=end_line) + region = SarifRegion(start_line=finding.start_line, end_line=finding.end_line) results.append( SarifResult( rule_id=finding.rule_id, message=SarifMessage(text=finding.message), level=_severity_to_sarif_level(finding.severity), + properties=_build_sarif_properties(finding), locations=[ SarifLocation( physical_location=SarifPhysicalLocation( @@ -241,6 +259,7 @@ def _build_sarif( rule_id=finding.rule_id, message=SarifMessage(text=finding.message), level=_severity_to_sarif_level(finding.severity), + properties=_build_sarif_properties(finding), locations=[ SarifLocation( physical_location=SarifPhysicalLocation( diff --git a/src/skillspector/sarif_models.py b/src/skillspector/sarif_models.py index c3256ad8..08a8e51a 100644 --- a/src/skillspector/sarif_models.py +++ b/src/skillspector/sarif_models.py @@ -84,6 +84,7 @@ class SarifResult(BaseModel): # When present, the result is suppressed; SARIF consumers (e.g. GitHub code # scanning) exclude suppressed results from counts but keep them for audit. suppressions: list[SarifSuppression] | None = None + properties: dict[str, object] | None = None class SarifReportingDescriptor(BaseModel): diff --git a/tests/nodes/test_report.py b/tests/nodes/test_report.py index 71445d65..3dfed86b 100644 --- a/tests/nodes/test_report.py +++ b/tests/nodes/test_report.py @@ -506,6 +506,37 @@ def test_report_output_format_sarif(self) -> None: assert "runs" in data assert data.get("$schema") or "runs" in data + def test_report_output_format_sarif_includes_finding_properties(self) -> None: + finding = _finding("E2", "HIGH", "env harvest", confidence=0.85, file="tool.py") + finding.category = "environment" + finding.pattern = r"os\.environ" + finding.finding = "TOKEN lookup" + finding.explanation = "Environment-derived secret access" + finding.remediation = "Drop env var usage" + finding.code_snippet = "os.environ['TOKEN']" + finding.intent = "secret_exfiltration" + finding.tags = ["env", "secret"] + state: SkillspectorState = { + "filtered_findings": [finding], + "component_metadata": [], + "has_executable_scripts": False, + "manifest": {}, + "skill_path": None, + "output_format": "sarif", + } + result = report(state) + result_row = result["sarif_report"]["runs"][0]["results"][0] + assert result_row["properties"]["severity"] == "HIGH" + assert result_row["properties"]["category"] == "environment" + assert result_row["properties"]["pattern"] == r"os\.environ" + assert result_row["properties"]["confidence"] == 0.85 + assert result_row["properties"]["finding"] == "TOKEN lookup" + assert result_row["properties"]["explanation"] == "Environment-derived secret access" + assert result_row["properties"]["remediation"] == "Drop env var usage" + assert result_row["properties"]["code_snippet"] == "os.environ['TOKEN']" + assert result_row["properties"]["intent"] == "secret_exfiltration" + assert result_row["properties"]["tags"] == ["env", "secret"] + def test_report_default_output_format_is_sarif(self) -> None: """When output_format is missing, report uses sarif.""" state: SkillspectorState = { @@ -552,8 +583,17 @@ def test_report_dedup_affects_score_only_not_report_output(self) -> None: def test_report_baseline_suppresses_finding_and_lowers_score() -> None: """A baseline-suppressed CRITICAL finding does not count toward the risk score.""" baseline = Baseline(rules=[SuppressionRule(rule_id="P5", reason="false positive")]) + suppressed_finding = _finding("P5", "CRITICAL", confidence=1.0) + suppressed_finding.category = "critical_path" + suppressed_finding.pattern = r"exec\(" + suppressed_finding.finding = "exec call" + suppressed_finding.explanation = "Dynamic execution remains reachable" + suppressed_finding.remediation = "Drop suspicious logic" + suppressed_finding.code_snippet = "exec(payload)" + suppressed_finding.intent = "command_execution" + suppressed_finding.tags = ["critical", "injection"] state: SkillspectorState = { - "filtered_findings": [_finding("P5", "CRITICAL")], + "filtered_findings": [suppressed_finding], "component_metadata": [], "has_executable_scripts": False, "manifest": {}, @@ -569,7 +609,19 @@ def test_report_baseline_suppresses_finding_and_lowers_score() -> None: # (audit trail) so consumers exclude them from counts. sarif_results = result["sarif_report"]["runs"][0]["results"] assert len(sarif_results) == 1 - assert sarif_results[0]["suppressions"][0]["kind"] == "external" + suppressed_result = sarif_results[0] + assert suppressed_result["suppressions"][0]["kind"] == "external" + assert suppressed_result["suppressions"][0]["justification"] == "false positive" + assert suppressed_result["properties"]["severity"] == "CRITICAL" + assert suppressed_result["properties"]["category"] == "critical_path" + assert suppressed_result["properties"]["pattern"] == r"exec\(" + assert suppressed_result["properties"]["confidence"] == 1.0 + assert suppressed_result["properties"]["finding"] == "exec call" + assert suppressed_result["properties"]["explanation"] == "Dynamic execution remains reachable" + assert suppressed_result["properties"]["remediation"] == "Drop suspicious logic" + assert suppressed_result["properties"]["code_snippet"] == "exec(payload)" + assert suppressed_result["properties"]["intent"] == "command_execution" + assert suppressed_result["properties"]["tags"] == ["critical", "injection"] assert len(result["suppressed_findings"]) == 1 @@ -693,3 +745,24 @@ def test_report_doc_findings_no_multiplier() -> None: # Without the multiplier: 2 HIGH = 50, not 65 assert result["risk_score"] == 50 assert result["risk_severity"] == "MEDIUM" + + +def test_report_sarif_preserves_high_vs_critical_severity() -> None: + """HIGH and CRITICAL both map to SARIF error, but properties keep the exact severity.""" + state: SkillspectorState = { + "filtered_findings": [ + _finding("R1", "HIGH", message="high finding", file="high.py"), + _finding("R2", "CRITICAL", message="critical finding", file="critical.py"), + ], + "component_metadata": [], + "has_executable_scripts": False, + "manifest": {}, + "skill_path": None, + "output_format": "sarif", + } + results = report(state)["sarif_report"]["runs"][0]["results"] + by_rule = {item["ruleId"]: item for item in results} + assert by_rule["R1"]["level"] == "error" + assert by_rule["R2"]["level"] == "error" + assert by_rule["R1"]["properties"]["severity"] == "HIGH" + assert by_rule["R2"]["properties"]["severity"] == "CRITICAL" diff --git a/tests/nodes/test_sarif_rules_and_empty_findings.py b/tests/nodes/test_sarif_rules_and_empty_findings.py index d4f9f945..02a47587 100644 --- a/tests/nodes/test_sarif_rules_and_empty_findings.py +++ b/tests/nodes/test_sarif_rules_and_empty_findings.py @@ -19,6 +19,7 @@ from skillspector.models import Finding from skillspector.nodes.report import _build_sarif +from skillspector.suppression import SuppressedFinding def _make_finding(rule_id: str = "PE3", message: str = "Credential Access", **kwargs) -> Finding: @@ -155,3 +156,62 @@ def test_sarif_schema_present(self) -> None: sarif = _build_sarif(findings) assert "$schema" in sarif assert sarif["version"] == "2.1.0" + + +class TestSarifResultProperties: + """SARIF results should preserve selected finding metadata in properties.""" + + def test_active_finding_metadata_in_properties(self) -> None: + finding = _make_finding( + category="network_security", + pattern=r"socket\.connect", + confidence=0.77, + finding="network connect", + explanation="Outbound network path remains open", + remediation="Sanitize network credentials", + code_snippet="payload", + intent="exfiltration", + tags=["llm-unconfirmed", "network"], + end_line=10, + ) + sarif = _build_sarif([finding]) + result = sarif["runs"][0]["results"][0] + assert result["properties"]["severity"] == "HIGH" + assert result["properties"]["category"] == "network_security" + assert result["properties"]["pattern"] == r"socket\.connect" + assert result["properties"]["confidence"] == 0.77 + assert result["properties"]["finding"] == "network connect" + assert result["properties"]["explanation"] == "Outbound network path remains open" + assert result["properties"]["remediation"] == "Sanitize network credentials" + assert result["properties"]["code_snippet"] == "payload" + assert result["properties"]["intent"] == "exfiltration" + assert result["properties"]["tags"] == ["llm-unconfirmed", "network"] + region = result["locations"][0]["physicalLocation"]["region"] + assert region["endLine"] == 10 + + def test_suppressed_finding_keeps_properties_and_suppression_marker(self) -> None: + finding = _make_finding( + rule_id="P5", + message="Credential leak", + category="authn_security", + pattern=r"api[_-]?key", + confidence=1.0, + finding="credential leak", + explanation="Credential material is exposed in output", + remediation="Rotate keys", + code_snippet="secret", + intent="exposed_secret", + tags=["critical", "auth"], + end_line=20, + ) + sarif = _build_sarif([], [SuppressedFinding(finding=finding, reason="false positive")]) + result = sarif["runs"][0]["results"][0] + assert result["suppressions"][0]["kind"] == "external" + assert result["suppressions"][0]["justification"] == "false positive" + assert result["properties"]["severity"] == "HIGH" + assert result["properties"]["category"] == "authn_security" + assert result["properties"]["pattern"] == r"api[_-]?key" + assert result["properties"]["confidence"] == 1.0 + assert result["properties"]["finding"] == "credential leak" + assert result["properties"]["explanation"] == "Credential material is exposed in output" + assert result["properties"]["intent"] == "exposed_secret"