Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/skillspector/nodes/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ def _sanitize_finding(finding: Finding) -> Finding:
return replace(finding, **{f: _clean_text(getattr(finding, f)) for f in _SANITIZED_FIELDS})


def _build_sarif_properties(finding: Finding) -> dict[str, object] | None:
"""Project selected finding metadata into a SARIF properties dictionary."""
finding_dict = finding.to_dict()
metadata: dict[str, object] = {
Comment thread
rodboev marked this conversation as resolved.
"severity": finding_dict["severity"],
"category": finding_dict["category"],
"pattern": finding_dict["pattern"],
"confidence": finding_dict["confidence"],
"finding": finding_dict["finding"],
"explanation": finding_dict["explanation"],
"remediation": finding_dict["remediation"],
"code_snippet": finding_dict["code_snippet"],
"intent": finding_dict["intent"],
"tags": finding_dict["tags"],
}
cleaned = {key: value for key, value in metadata.items() if value is not None}
return cleaned or None


def _severity_to_sarif_level(severity: str) -> Literal["error", "warning", "note"]:
"""Map Finding.severity to SARIF result level."""
return {
Expand Down Expand Up @@ -209,14 +228,13 @@ def _build_sarif(
for finding in findings:
if not finding.rule_id or not finding.message:
continue
start_line = finding.start_line
end_line = finding.end_line
region = SarifRegion(start_line=start_line, end_line=end_line)
region = SarifRegion(start_line=finding.start_line, end_line=finding.end_line)
results.append(
SarifResult(
rule_id=finding.rule_id,
message=SarifMessage(text=finding.message),
level=_severity_to_sarif_level(finding.severity),
properties=_build_sarif_properties(finding),
locations=[
SarifLocation(
physical_location=SarifPhysicalLocation(
Expand All @@ -241,6 +259,7 @@ def _build_sarif(
rule_id=finding.rule_id,
message=SarifMessage(text=finding.message),
level=_severity_to_sarif_level(finding.severity),
properties=_build_sarif_properties(finding),
locations=[
SarifLocation(
physical_location=SarifPhysicalLocation(
Expand Down
1 change: 1 addition & 0 deletions src/skillspector/sarif_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class SarifResult(BaseModel):
# When present, the result is suppressed; SARIF consumers (e.g. GitHub code
# scanning) exclude suppressed results from counts but keep them for audit.
suppressions: list[SarifSuppression] | None = None
properties: dict[str, object] | None = None


class SarifReportingDescriptor(BaseModel):
Expand Down
77 changes: 75 additions & 2 deletions tests/nodes/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,37 @@ def test_report_output_format_sarif(self) -> None:
assert "runs" in data
assert data.get("$schema") or "runs" in data

def test_report_output_format_sarif_includes_finding_properties(self) -> None:
finding = _finding("E2", "HIGH", "env harvest", confidence=0.85, file="tool.py")
finding.category = "environment"
finding.pattern = r"os\.environ"
finding.finding = "TOKEN lookup"
finding.explanation = "Environment-derived secret access"
finding.remediation = "Drop env var usage"
finding.code_snippet = "os.environ['TOKEN']"
finding.intent = "secret_exfiltration"
finding.tags = ["env", "secret"]
state: SkillspectorState = {
"filtered_findings": [finding],
"component_metadata": [],
"has_executable_scripts": False,
"manifest": {},
"skill_path": None,
"output_format": "sarif",
}
result = report(state)
result_row = result["sarif_report"]["runs"][0]["results"][0]
assert result_row["properties"]["severity"] == "HIGH"
assert result_row["properties"]["category"] == "environment"
assert result_row["properties"]["pattern"] == r"os\.environ"
assert result_row["properties"]["confidence"] == 0.85
assert result_row["properties"]["finding"] == "TOKEN lookup"
assert result_row["properties"]["explanation"] == "Environment-derived secret access"
assert result_row["properties"]["remediation"] == "Drop env var usage"
assert result_row["properties"]["code_snippet"] == "os.environ['TOKEN']"
assert result_row["properties"]["intent"] == "secret_exfiltration"
assert result_row["properties"]["tags"] == ["env", "secret"]

def test_report_default_output_format_is_sarif(self) -> None:
"""When output_format is missing, report uses sarif."""
state: SkillspectorState = {
Expand Down Expand Up @@ -552,8 +583,17 @@ def test_report_dedup_affects_score_only_not_report_output(self) -> None:
def test_report_baseline_suppresses_finding_and_lowers_score() -> None:
"""A baseline-suppressed CRITICAL finding does not count toward the risk score."""
baseline = Baseline(rules=[SuppressionRule(rule_id="P5", reason="false positive")])
suppressed_finding = _finding("P5", "CRITICAL", confidence=1.0)
suppressed_finding.category = "critical_path"
suppressed_finding.pattern = r"exec\("
suppressed_finding.finding = "exec call"
suppressed_finding.explanation = "Dynamic execution remains reachable"
suppressed_finding.remediation = "Drop suspicious logic"
suppressed_finding.code_snippet = "exec(payload)"
suppressed_finding.intent = "command_execution"
suppressed_finding.tags = ["critical", "injection"]
state: SkillspectorState = {
"filtered_findings": [_finding("P5", "CRITICAL")],
"filtered_findings": [suppressed_finding],
"component_metadata": [],
"has_executable_scripts": False,
"manifest": {},
Expand All @@ -569,7 +609,19 @@ def test_report_baseline_suppresses_finding_and_lowers_score() -> None:
# (audit trail) so consumers exclude them from counts.
sarif_results = result["sarif_report"]["runs"][0]["results"]
assert len(sarif_results) == 1
assert sarif_results[0]["suppressions"][0]["kind"] == "external"
suppressed_result = sarif_results[0]
assert suppressed_result["suppressions"][0]["kind"] == "external"
assert suppressed_result["suppressions"][0]["justification"] == "false positive"
assert suppressed_result["properties"]["severity"] == "CRITICAL"
assert suppressed_result["properties"]["category"] == "critical_path"
assert suppressed_result["properties"]["pattern"] == r"exec\("
assert suppressed_result["properties"]["confidence"] == 1.0
assert suppressed_result["properties"]["finding"] == "exec call"
assert suppressed_result["properties"]["explanation"] == "Dynamic execution remains reachable"
assert suppressed_result["properties"]["remediation"] == "Drop suspicious logic"
assert suppressed_result["properties"]["code_snippet"] == "exec(payload)"
assert suppressed_result["properties"]["intent"] == "command_execution"
assert suppressed_result["properties"]["tags"] == ["critical", "injection"]
assert len(result["suppressed_findings"]) == 1


Expand Down Expand Up @@ -693,3 +745,24 @@ def test_report_doc_findings_no_multiplier() -> None:
# Without the multiplier: 2 HIGH = 50, not 65
assert result["risk_score"] == 50
assert result["risk_severity"] == "MEDIUM"


def test_report_sarif_preserves_high_vs_critical_severity() -> None:
"""HIGH and CRITICAL both map to SARIF error, but properties keep the exact severity."""
state: SkillspectorState = {
"filtered_findings": [
_finding("R1", "HIGH", message="high finding", file="high.py"),
_finding("R2", "CRITICAL", message="critical finding", file="critical.py"),
],
"component_metadata": [],
"has_executable_scripts": False,
"manifest": {},
"skill_path": None,
"output_format": "sarif",
}
results = report(state)["sarif_report"]["runs"][0]["results"]
by_rule = {item["ruleId"]: item for item in results}
assert by_rule["R1"]["level"] == "error"
assert by_rule["R2"]["level"] == "error"
assert by_rule["R1"]["properties"]["severity"] == "HIGH"
assert by_rule["R2"]["properties"]["severity"] == "CRITICAL"
60 changes: 60 additions & 0 deletions tests/nodes/test_sarif_rules_and_empty_findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from skillspector.models import Finding
from skillspector.nodes.report import _build_sarif
from skillspector.suppression import SuppressedFinding


def _make_finding(rule_id: str = "PE3", message: str = "Credential Access", **kwargs) -> Finding:
Expand Down Expand Up @@ -155,3 +156,62 @@ def test_sarif_schema_present(self) -> None:
sarif = _build_sarif(findings)
assert "$schema" in sarif
assert sarif["version"] == "2.1.0"


class TestSarifResultProperties:
"""SARIF results should preserve selected finding metadata in properties."""

def test_active_finding_metadata_in_properties(self) -> None:
finding = _make_finding(
category="network_security",
pattern=r"socket\.connect",
confidence=0.77,
finding="network connect",
explanation="Outbound network path remains open",
remediation="Sanitize network credentials",
code_snippet="payload",
intent="exfiltration",
tags=["llm-unconfirmed", "network"],
end_line=10,
)
sarif = _build_sarif([finding])
result = sarif["runs"][0]["results"][0]
assert result["properties"]["severity"] == "HIGH"
assert result["properties"]["category"] == "network_security"
assert result["properties"]["pattern"] == r"socket\.connect"
assert result["properties"]["confidence"] == 0.77
assert result["properties"]["finding"] == "network connect"
assert result["properties"]["explanation"] == "Outbound network path remains open"
assert result["properties"]["remediation"] == "Sanitize network credentials"
assert result["properties"]["code_snippet"] == "payload"
assert result["properties"]["intent"] == "exfiltration"
assert result["properties"]["tags"] == ["llm-unconfirmed", "network"]
region = result["locations"][0]["physicalLocation"]["region"]
assert region["endLine"] == 10

def test_suppressed_finding_keeps_properties_and_suppression_marker(self) -> None:
finding = _make_finding(
rule_id="P5",
message="Credential leak",
category="authn_security",
pattern=r"api[_-]?key",
confidence=1.0,
finding="credential leak",
explanation="Credential material is exposed in output",
remediation="Rotate keys",
code_snippet="secret",
intent="exposed_secret",
tags=["critical", "auth"],
end_line=20,
)
sarif = _build_sarif([], [SuppressedFinding(finding=finding, reason="false positive")])
result = sarif["runs"][0]["results"][0]
assert result["suppressions"][0]["kind"] == "external"
assert result["suppressions"][0]["justification"] == "false positive"
assert result["properties"]["severity"] == "HIGH"
assert result["properties"]["category"] == "authn_security"
assert result["properties"]["pattern"] == r"api[_-]?key"
assert result["properties"]["confidence"] == 1.0
assert result["properties"]["finding"] == "credential leak"
assert result["properties"]["explanation"] == "Credential material is exposed in output"
assert result["properties"]["intent"] == "exposed_secret"