diff --git a/Dockerfile b/Dockerfile
index fa6e6d652..9dd3cdb83 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y \
libarchive-tools \
xz-utils \
libatomic1 \
+ libkrb5-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates
diff --git a/pyproject.toml b/pyproject.toml
index 0d4d36bd5..c3041e8be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,8 @@ dependencies = [
"litellm<=1.75.8",
"csaf-tool==0.3.2",
"jsonschema>=4.0.0,<5.0.0",
+ "koji",
+ "unidiff>=0.7.5",
]
requires-python = ">=3.11,<3.13"
description = "NVIDIA AI Blueprint: Vulnerability Analysis for Container Security"
diff --git a/src/exploit_iq_commons/data/hardening_kb/__init__.py b/src/exploit_iq_commons/data/hardening_kb/__init__.py
new file mode 100644
index 000000000..cf7c586a5
--- /dev/null
+++ b/src/exploit_iq_commons/data/hardening_kb/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json
new file mode 100644
index 000000000..92ab9a8f9
--- /dev/null
+++ b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json
@@ -0,0 +1,395 @@
+{
+ "kb_version": "1.1",
+ "last_updated": "2026-05-09",
+ "flag_type_definitions": {
+ "warning": "Compile-time warnings only. Does not add runtime protection. Use for 'Best Practices' audits, NOT for mitigation claims.",
+ "runtime": "Actual runtime protection or detection. Valid for 'Mitigated' status if maps to the specific CWE.",
+ "optimization": "Changes compiler optimization behavior but does not add runtime detection/prevention. Not valid for mitigation.",
+ "linker": "Linker-level hardening that affects runtime binary layout/behavior. Valid for mitigation if maps to CWE.",
+ "architecture": "Platform-specific build flag. Valid for mitigation ONLY if CVE advisory states the architecture is not affected."
+ },
+ "mappings": [
+ {
+ "flag": "-Wall -Wextra",
+ "flag_type": "warning",
+ "description": "Enable warnings for constructs often associated with defects.",
+ "vulnerability_category": "Defensive Coding",
+ "cwe_ids": [
+ "CWE-563",
+ "CWE-457",
+ "CWE-480"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wformat -Wformat=2",
+ "flag_type": "warning",
+ "description": "Enable additional format function warnings.",
+ "vulnerability_category": "Input Validation",
+ "cwe_ids": [
+ "CWE-134"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wconversion -Wsign-conversion",
+ "flag_type": "warning",
+ "description": "Enable implicit conversion warnings.",
+ "vulnerability_category": "Arithmetic Safety",
+ "cwe_ids": [
+ "CWE-190",
+ "CWE-681"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wtrampolines",
+ "flag_type": "warning",
+ "description": "Enable warnings about trampolines that require executable stacks.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wimplicit-fallthrough",
+ "flag_type": "warning",
+ "description": "Warn when a switch case falls through.",
+ "vulnerability_category": "Defensive Coding",
+ "cwe_ids": [
+ "CWE-484"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wbidi-chars=any",
+ "flag_type": "warning",
+ "description": "Enable warnings for possibly misleading Unicode bidirectional control characters.",
+ "vulnerability_category": "Code Integrity",
+ "cwe_ids": [
+ "CWE-1301"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Werror",
+ "flag_type": "warning",
+ "description": "Treat all or selected compiler warnings as errors.",
+ "vulnerability_category": "Policy Enforcement",
+ "cwe_ids": [
+ "N/A"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Werror=format-security",
+ "flag_type": "warning",
+ "description": "Treat format strings that are not string literals and used without arguments as errors.",
+ "vulnerability_category": "Input Validation",
+ "cwe_ids": [
+ "CWE-134"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion",
+ "flag_type": "warning",
+ "description": "Treat obsolete C constructs as errors.",
+ "vulnerability_category": "Type Safety",
+ "cwe_ids": [
+ "CWE-704",
+ "CWE-843"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-D_FORTIFY_SOURCE=3",
+ "flag_type": "runtime",
+ "description": "Fortify sources with compile- and run-time checks for unsafe libc usage and buffer overflows.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-119",
+ "CWE-120",
+ "CWE-121",
+ "CWE-122"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-D_FORTIFY_SOURCE=2",
+ "flag_type": "runtime",
+ "description": "Fortify sources with compile- and run-time checks for unsafe libc usage and buffer overflows (legacy level).",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-119",
+ "CWE-120",
+ "CWE-121",
+ "CWE-122"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-D_GLIBCXX_ASSERTIONS",
+ "flag_type": "runtime",
+ "description": "Precondition checks for C++ standard library calls.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-119",
+ "CWE-125",
+ "CWE-787"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fstrict-flex-arrays=3",
+ "flag_type": "runtime",
+ "description": "Consider a trailing array in a struct as a flexible array if declared as [].",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-119",
+ "CWE-125",
+ "CWE-787"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fstack-clash-protection",
+ "flag_type": "runtime",
+ "description": "Enable run-time checks for variable-size stack allocation validity.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-785"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fstack-protector-strong",
+ "flag_type": "runtime",
+ "description": "Enable run-time checks for stack-based buffer overflows.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-121"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fcf-protection=full",
+ "flag_type": "runtime",
+ "description": "Enable control-flow protection against return-oriented programming (ROP) and jump-oriented programming (JOP) attacks on x86_64.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-mbranch-protection=standard",
+ "flag_type": "runtime",
+ "description": "Enable branch protection against ROP and JOP attacks on AArch64.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-ftrapv",
+ "flag_type": "runtime",
+ "description": "Generate traps for signed arithmetic overflow on addition, subtraction, multiplication.",
+ "vulnerability_category": "Arithmetic Safety",
+ "cwe_ids": [
+ "CWE-190",
+ "CWE-191"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fsanitize=signed-integer-overflow",
+ "flag_type": "runtime",
+ "description": "Enable undefined behavior sanitizer for signed integer overflow detection.",
+ "vulnerability_category": "Arithmetic Safety",
+ "cwe_ids": [
+ "CWE-190",
+ "CWE-191"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fsanitize=unsigned-integer-overflow",
+ "flag_type": "runtime",
+ "description": "Enable undefined behavior sanitizer for unsigned integer overflow detection.",
+ "vulnerability_category": "Arithmetic Safety",
+ "cwe_ids": [
+ "CWE-190",
+ "CWE-191"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wl,-z,nodlopen",
+ "flag_type": "linker",
+ "description": "Restrict dlopen(3) calls to shared objects.",
+ "vulnerability_category": "Policy Enforcement",
+ "cwe_ids": [
+ "CWE-269"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wl,-z,noexecstack",
+ "flag_type": "linker",
+ "description": "Enable data execution prevention by marking stack memory as non-executable.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693",
+ "CWE-94"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wl,-z,relro -Wl,-z,now",
+ "flag_type": "linker",
+ "description": "Mark relocation table entries resolved at load-time as read-only.",
+ "vulnerability_category": "Code Integrity",
+ "cwe_ids": [
+ "CWE-123"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fPIE -pie",
+ "flag_type": "linker",
+ "description": "Build as position-independent executable.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fPIC -shared",
+ "flag_type": "linker",
+ "description": "Build as position-independent code.",
+ "vulnerability_category": "Control Flow Integrity",
+ "cwe_ids": [
+ "CWE-693"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fno-delete-null-pointer-checks",
+ "flag_type": "optimization",
+ "description": "Force retention of null pointer checks.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-476"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fno-strict-overflow",
+ "flag_type": "optimization",
+ "description": "Do not assume signed integer overflow is undefined behavior. Prevents aggressive optimizations but does NOT add runtime detection.",
+ "vulnerability_category": "Arithmetic Safety",
+ "cwe_ids": [
+ "CWE-190"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fno-strict-aliasing",
+ "flag_type": "optimization",
+ "description": "Do not assume strict aliasing.",
+ "vulnerability_category": "Memory Safety",
+ "cwe_ids": [
+ "CWE-416"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-ftrivial-auto-var-init",
+ "flag_type": "runtime",
+ "description": "Initialize automatic variables that lack explicit initializers.",
+ "vulnerability_category": "Information Leakage",
+ "cwe_ids": [
+ "CWE-457"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fexceptions",
+ "flag_type": "runtime",
+ "description": "Enable exception propagation to harden multi-threaded C code.",
+ "vulnerability_category": "Error Handling",
+ "cwe_ids": [
+ "CWE-391"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fhardened",
+ "flag_type": "runtime",
+ "description": "Enable pre-determined set of hardening options in GCC.",
+ "vulnerability_category": "Full Hardening",
+ "cwe_ids": [
+ "Multi"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-Wl,--as-needed -Wl,--no-copy-dt-needed-entries",
+ "flag_type": "linker",
+ "description": "Allow linker to omit libraries specified on the command line to link against if they are not used.",
+ "vulnerability_category": "Supply Chain Safety",
+ "cwe_ids": [
+ "N/A"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-fzero-init-padding-bits=all",
+ "flag_type": "runtime",
+ "description": "Guarantee zero initialization of padding bits in all automatic variable initializers.",
+ "vulnerability_category": "Information Leakage",
+ "cwe_ids": [
+ "CWE-200"
+ ],
+ "requires": {}
+ },
+ {
+ "flag": "-m64",
+ "flag_type": "architecture",
+ "description": "Compile for 64-bit x86_64 architecture. Many integer overflow vulnerabilities only affect 32-bit systems.",
+ "vulnerability_category": "Architecture",
+ "cwe_ids": [
+ "CWE-190",
+ "CWE-680",
+ "CWE-681"
+ ],
+ "requires": {
+ "advisory_states": "Mitigation valid ONLY if CVE advisory explicitly states 64-bit systems are not affected."
+ }
+ },
+ {
+ "flag": "-m32",
+ "flag_type": "architecture",
+ "description": "Compile for 32-bit i686 architecture.",
+ "vulnerability_category": "Architecture",
+ "cwe_ids": [],
+ "requires": {
+ "advisory_states": "Check CVE advisory for 32-bit specific vulnerabilities."
+ }
+ },
+ {
+ "flag": "-march=",
+ "flag_type": "architecture",
+ "description": "Target specific CPU architecture. May affect vulnerability applicability.",
+ "vulnerability_category": "Architecture",
+ "cwe_ids": [],
+ "requires": {
+ "advisory_states": "Check CVE advisory for architecture-specific conditions."
+ }
+ }
+ ]
+}
diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py
new file mode 100644
index 000000000..40ac620ae
--- /dev/null
+++ b/src/exploit_iq_commons/data_models/checker_status.py
@@ -0,0 +1,259 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from enum import IntEnum
+from pathlib import Path
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+class PackageCheckerStatus(IntEnum):
+ """Per-CVE status codes produced by the PackageIdentify phase."""
+ OK = 0
+ ERROR_PKG_IDENT_NO_INTEL = 1
+ PKG_IDENT_NOT_VUL = 2
+ ERROR_FAILED_TO_DOWNLOAD_SRPM = 3
+ PKG_IDENT_CVE_MISMATCH = 4
+ PKG_INTEL_LOW_SCORE = 5
+
+
+PACKAGE_CHECKER_STATUS_DESCRIPTIONS: dict[PackageCheckerStatus, str] = {
+ PackageCheckerStatus.OK:
+ "Package identified and in affected range -- continue investigation",
+ PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL:
+ "No Intel found for the package",
+ PackageCheckerStatus.PKG_IDENT_NOT_VUL:
+ "Identification state concluded from intel that target package is not vulnerable",
+ PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM:
+ "Failed to download the patched SRPM",
+ PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH:
+ "CVE does not apply to target package - RHSA does not list this package",
+ PackageCheckerStatus.PKG_INTEL_LOW_SCORE:
+ "Intel quality score below threshold - insufficient information for reliable analysis",
+}
+
+CHECKER_FAILURE_ERROR_TYPES: dict[PackageCheckerStatus, str] = {
+ PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL: "no-intel",
+ PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM: "srpm-download-failed",
+ PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH: "invalid-input",
+}
+
+
+class EnumIdentifyResult(str, Enum):
+ """Result of the PackageIdentify phase for a single CVE."""
+ YES = "yes"
+ NO = "no"
+ UNKNOWN = "unknown"
+
+class PackageIdentifyResult(BaseModel):
+ """Result of the PackageIdentify phase for a single CVE."""
+ affected_rpm_list: list[str] = []
+ fixed_rpm_list: list[str] = []
+
+ is_target_package_affected: EnumIdentifyResult = EnumIdentifyResult.UNKNOWN
+ is_target_package_fixed: EnumIdentifyResult = EnumIdentifyResult.UNKNOWN
+
+
+
+class AcquiredArtifacts(BaseModel):
+ """Resolved file locations populated by source_acquisition, consumed by downstream checker nodes."""
+ srpm_path: Path | None = None
+ source_dir: Path | None = None
+ build_log_path: Path | None = None
+ binary_rpm_path: Path | None = None
+ patch_source_dir: Path | None = None
+ patch_diff_path: Path | None = None
+ source_url: str | None = None
+
+
+class VulnerabilityIntel(BaseModel):
+ """Structured intelligence extracted from CVE advisories and patches.
+
+ Used to provide grep-ready patterns and context for L1 agent source searches.
+ """
+
+ affected_files: list[str] = Field(
+ default_factory=list,
+ description="Source file paths likely to contain vulnerable code"
+ )
+ vulnerable_functions: list[str] = Field(
+ default_factory=list,
+ description="Function names that contain or handle the vulnerability"
+ )
+ vulnerable_variables: list[str] = Field(
+ default_factory=list,
+ description="Variable names involved in the vulnerability"
+ )
+ vulnerable_patterns: list[str] = Field(
+ default_factory=list,
+ description="Code patterns/snippets indicating vulnerable code (from - lines)"
+ )
+ fix_patterns: list[str] = Field(
+ default_factory=list,
+ description="Code patterns/snippets indicating fixed code (from + lines)"
+ )
+ root_cause: str = Field(
+ default="",
+ description="Technical explanation of why the code is vulnerable"
+ )
+ vulnerability_type: str = Field(
+ default="",
+ description="Category: buffer_overflow, integer_overflow, use_after_free, null_deref, etc."
+ )
+ search_keywords: list[str] = Field(
+ default_factory=list,
+ description="Recommended grep patterns ordered by specificity (most specific first)"
+ )
+ affected_architectures: Literal["32-bit", "64-bit", "both"] = Field(
+ default="both",
+ description="Which CPU architectures are affected: 32-bit only, 64-bit only, or both (default)"
+ )
+ is_downstream_patch_available: bool = Field(
+ default=False,
+ description="True if a CVE-specific patch file exists in the downstream package"
+ )
+ is_patch_applied_in_build: bool = Field(
+ default=False,
+ description="True if the patch was confirmed applied in build logs"
+ )
+ patch_file_name: str = Field(
+ default="",
+ description="Name of the CVE-specific patch file (if available)"
+ )
+
+ def format_for_prompt(self) -> str:
+ """Format VulnerabilityIntel for injection into L1 agent runtime prompt.
+
+ Uses UPPERCASE labels so they can be referenced as anchors in thought prompts.
+ """
+ lines = []
+ if self.is_downstream_patch_available:
+ status = "APPLIED" if self.is_patch_applied_in_build else "AVAILABLE"
+ lines.append(f"DOWNSTREAM_PATCH_STATUS: {status}")
+ if self.patch_file_name:
+ lines.append(f"PATCH_FILE: {self.patch_file_name}")
+ if self.affected_files:
+ lines.append(f"AFFECTED_FILES: {', '.join(self.affected_files)}")
+ if self.vulnerable_functions:
+ lines.append(f"VULNERABLE_FUNCTIONS: {', '.join(self.vulnerable_functions)}")
+ if self.vulnerable_variables:
+ lines.append(f"VULNERABLE_VARIABLES: {', '.join(self.vulnerable_variables)}")
+ if self.vulnerable_patterns:
+ lines.append("VULNERABLE_PATTERNS:")
+ for p in self.vulnerable_patterns:
+ lines.append(f" - {p}")
+ if self.fix_patterns:
+ lines.append("FIX_PATTERNS:")
+ for p in self.fix_patterns:
+ lines.append(f" - {p}")
+ if self.search_keywords:
+ lines.append(f"SEARCH_KEYWORDS: {', '.join(self.search_keywords)}")
+ if self.root_cause:
+ lines.append(f"ROOT_CAUSE: {self.root_cause}")
+ if self.affected_architectures and self.affected_architectures != "both":
+ lines.append(f"AFFECTED_ARCHITECTURES: {self.affected_architectures}")
+ return "\n".join(lines)
+
+
+def format_vulnerability_intel_for_prompt(intel: "VulnerabilityIntel") -> str:
+ """Format VulnerabilityIntel for injection into L1 agent runtime prompt.
+
+ Uses UPPERCASE labels so they can be referenced as anchors in thought prompts.
+
+ Note: This is a standalone function for backward compatibility.
+ Prefer using intel.format_for_prompt() directly.
+ """
+ return intel.format_for_prompt()
+
+
+class L1InvestigationResult(BaseModel):
+ """Intermediate result from L1 investigation, input to L2 or report generation."""
+ downstream_report: dict[str, Any] | None = Field(
+ default=None,
+ description="Serialized DownstreamSearchReport from L1 investigation",
+ )
+ upstream_report: dict[str, Any] | None = Field(
+ default=None,
+ description="Serialized UpstreamSearchReport from L1 investigation",
+ )
+ l1_agent_answer: str | None = Field(
+ default=None,
+ description="Final answer from the L1 ReAct agent",
+ )
+ vulnerability_intel: VulnerabilityIntel | None = Field(
+ default=None,
+ description="Structured vulnerability intelligence extracted from CVE advisories and patches",
+ )
+ preliminary_verdict: Literal["vulnerable", "protected", "not_present", "uncertain"] = Field(
+ default="uncertain",
+ description="L1 verdict before L2 refinement",
+ )
+ confidence: float = Field(
+ default=0.0,
+ ge=0.0,
+ le=1.0,
+ description="Confidence in the preliminary verdict",
+ )
+
+
+class L2BuildResult(BaseModel):
+ """Result from L2 Build Agent (BuildCompilationCheck + HardeningCheck)."""
+ compilation_status: Literal["compiled", "not_compiled", "unknown"] = Field(
+ default="unknown",
+ description="Whether vulnerable code is compiled into the binary",
+ )
+ compilation_confidence: float = Field(
+ default=0.0,
+ ge=0.0,
+ le=1.0,
+ description="Confidence in compilation status",
+ )
+ compilation_evidence: str | None = Field(
+ default=None,
+ description="Evidence supporting compilation status",
+ )
+ hardening_relevant: bool | None = Field(
+ default=None,
+ description="Whether detected hardening flags are relevant to the CVE",
+ )
+ hardening_flags: list[str] = Field(
+ default_factory=list,
+ description="Hardening flags detected in build log or binary",
+ )
+ hardening_rationale: str | None = Field(
+ default=None,
+ description="Rationale for hardening relevance judgment",
+ )
+ l2_override_verdict: Literal["not_vulnerable", "vulnerable_mitigated", None] = Field(
+ default=None,
+ description="L2 verdict override (if any)",
+ )
+
+
+class PackageCheckerContext(BaseModel):
+ """Consolidates all checker-specific state on AgentMorpheusInfo."""
+ status: PackageCheckerStatus | None = None
+ source_key: str | None = None
+ artifacts: AcquiredArtifacts = Field(default_factory=AcquiredArtifacts)
+ identify_result: PackageIdentifyResult = Field(default_factory=PackageIdentifyResult)
+ l1_result: L1InvestigationResult | None = Field(
+ default=None,
+ description="Result from L1 Code Agent investigation",
+ )
+ l2_result: L2BuildResult | None = Field(
+ default=None,
+ description="Result from L2 Build Agent (optional)",
+ )
diff --git a/src/exploit_iq_commons/data_models/common.py b/src/exploit_iq_commons/data_models/common.py
index 077a98fa9..db848f4ad 100644
--- a/src/exploit_iq_commons/data_models/common.py
+++ b/src/exploit_iq_commons/data_models/common.py
@@ -28,6 +28,17 @@ class AnalysisType(str, Enum):
IMAGE = "image"
SOURCE = "source"
+
+class PipelineMode(str, Enum):
+ """
+ Controls which investigation path the pipeline takes after process_sbom.
+ Orthogonal to AnalysisType (input format) -- any combination is valid.
+ """
+ FULL_PIPELINE = "full_pipeline"
+ PACKAGE_CHECKER = "rpm_package_checker"
+
+
+
class HashableModel(BaseModel):
"""
Subclass of a Pydantic BaseModel that is hashable. Use in objects that need to be hashed for caching purposes.
@@ -50,7 +61,15 @@ def __ne__(self, other):
def __gt__(self, other):
return self.__hash__() > other.__hash__()
-
+class TargetPackage(HashableModel):
+ """
+ A package to investigate.
+ """
+ name: str
+ version: str | None = None
+ release: str | None = None # e.g. "1.el8_2.3" (needed for Brew NVR lookup)
+ arch: str = "x86_64" # e.g. "x86_64", "aarch64", "s390x", "noarch"
+
class TypedBaseModel(BaseModel, typing.Generic[_LT]):
"""
Subclass of Pydantic BaseModel that allows for specifying the object type. Use in Pydantic discriminated unions.
diff --git a/src/exploit_iq_commons/data_models/cve_intel.py b/src/exploit_iq_commons/data_models/cve_intel.py
index 8050ffe26..36495c30b 100644
--- a/src/exploit_iq_commons/data_models/cve_intel.py
+++ b/src/exploit_iq_commons/data_models/cve_intel.py
@@ -110,6 +110,7 @@ class Configuration(BaseModel):
cvss_vector: str | None = None
cvss_base_score: float | None = None
cvss_severity: str | None = None
+ cwe_id: str | None = None
cwe_name: str | None = None
cwe_description: str | None = None
cwe_extended_description: str | None = None
@@ -185,8 +186,8 @@ class CVSSV3(BaseModel):
class BaseMetricV3(BaseModel):
cvssV3: "CVSSV3"
- exploitabilityScore: float
- impactScore: float
+ exploitabilityScore: float | None = None
+ impactScore: float | None = None
class Impact(BaseModel):
baseMetricV3: "BaseMetricV3"
diff --git a/src/exploit_iq_commons/data_models/info.py b/src/exploit_iq_commons/data_models/info.py
index a01f1dda7..4f7bd1ef1 100644
--- a/src/exploit_iq_commons/data_models/info.py
+++ b/src/exploit_iq_commons/data_models/info.py
@@ -15,6 +15,7 @@
from pydantic import BaseModel
+from exploit_iq_commons.data_models.checker_status import PackageCheckerContext
from exploit_iq_commons.data_models.cve_intel import CveIntel
from exploit_iq_commons.data_models.dependencies import VulnerableDependencies
@@ -62,3 +63,4 @@ class SBOMInfo(BaseModel):
intel: list[CveIntel] | None = None
sbom: SBOMInfo | None = None
vulnerable_dependencies: list[VulnerableDependencies] | None = None
+ checker_context: PackageCheckerContext | None = None
diff --git a/src/exploit_iq_commons/data_models/input.py b/src/exploit_iq_commons/data_models/input.py
index 897c915d1..31db324e1 100644
--- a/src/exploit_iq_commons/data_models/input.py
+++ b/src/exploit_iq_commons/data_models/input.py
@@ -25,12 +25,14 @@
from pydantic import Field
from pydantic import Tag
from pydantic import field_validator
+from pydantic import model_validator
from exploit_iq_commons.utils.string_utils import is_valid_cve_id
from exploit_iq_commons.utils.string_utils import is_valid_ghsa_id
from exploit_iq_commons.utils.dep_tree import Ecosystem
from exploit_iq_commons.data_models.common import AnalysisType
from exploit_iq_commons.data_models.common import HashableModel
+from exploit_iq_commons.data_models.common import PipelineMode , TargetPackage
from exploit_iq_commons.data_models.common import TypedBaseModel
from exploit_iq_commons.data_models.info import AgentMorpheusInfo
from exploit_iq_commons.data_models.info import SBOMPackage
@@ -168,9 +170,23 @@ class ImageInfoInput(HashableModel):
- "source": Analysis of source code and commitId without SBOM data
"""
- source_info: list[SourceDocumentsInfo]
+ pipeline_mode: PipelineMode = PipelineMode.FULL_PIPELINE
+ """
+ Controls which investigation path the pipeline takes after process_sbom:
+ - "full_pipeline": Full transitive analysis (check_vuln_deps -> llm_engine)
+ - "package_checker": Focused package vulnerability checker (package_checker -> checker_output)
+ """
+ target_package: TargetPackage | None = None
+
+ source_info: list[SourceDocumentsInfo] = []
sbom_info: SBOMInfoInput | None = None
+ @model_validator(mode="after")
+ def validate_target_package(self) -> "ImageInfoInput":
+ if self.pipeline_mode == PipelineMode.PACKAGE_CHECKER and self.target_package is None:
+ raise ValueError("target_package is required when pipeline_mode is PACKAGE_CHECKER")
+ return self
+
@field_validator('source_info', mode='after')
@classmethod
def check_conflicting_refs(cls, source_info: list[SourceDocumentsInfo]) -> list[SourceDocumentsInfo]:
diff --git a/src/exploit_iq_commons/utils/hardening_kb.py b/src/exploit_iq_commons/utils/hardening_kb.py
new file mode 100644
index 000000000..16b609b18
--- /dev/null
+++ b/src/exploit_iq_commons/utils/hardening_kb.py
@@ -0,0 +1,188 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Hardening Knowledge Base utilities.
+
+Loads the hardening_kb.json file containing compiler/linker flags that mitigate
+specific CWE vulnerability categories. Provides lookup by CWE ID to retrieve
+relevant hardening flags and their descriptions for LLM context.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+
+class HardeningEntry(BaseModel):
+ """A single hardening flag entry from the knowledge base."""
+
+ flag: str = Field(description="Compiler/linker flag(s) for hardening")
+ flag_type: str = Field(description="Type: runtime, linker, warning, optimization, architecture")
+ description: str = Field(description="Description of what the flag does")
+ vulnerability_category: str = Field(description="Category of vulnerability this mitigates")
+ cwe_ids: list[str] = Field(default_factory=list, description="CWE IDs this flag helps mitigate")
+
+
+# Flag types that provide actual runtime mitigation (not just warnings or optimization changes)
+MITIGATING_FLAG_TYPES = frozenset({"runtime", "linker"})
+
+
+class HardeningKB:
+ """In-memory cache for hardening flags knowledge base.
+
+ Implements singleton pattern to ensure single instance across the application.
+ Provides lookup by CWE ID to find relevant hardening flags.
+ """
+
+ _instance = None
+ _lock = threading.Lock()
+
+ def __new__(cls):
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ return cls._instance
+
+ def __init__(self, json_path: str | Path | None = None) -> None:
+ if not hasattr(self, '_initialized'):
+ base_path = Path(__file__).resolve().parents[1]
+ default_json = base_path / "data" / "hardening_kb" / "hardening_kb.json"
+ self.json_path = Path(json_path) if json_path else default_json
+
+ self._entries: list[HardeningEntry] = []
+ self._cwe_index: dict[str, list[HardeningEntry]] = {}
+ self._initialized = True
+ self._load()
+
+ @classmethod
+ def get_instance(cls) -> "HardeningKB":
+ """Get the singleton instance of HardeningKB."""
+ return cls()
+
+ def _load(self) -> None:
+ """Load the hardening KB JSON and build the CWE index."""
+ try:
+ data = json.loads(self.json_path.read_text(encoding="utf-8"))
+ except FileNotFoundError:
+ logger.warning("Hardening KB JSON not found at %s", self.json_path)
+ return
+ except json.JSONDecodeError as exc:
+ logger.error("Failed to parse hardening KB JSON: %s", exc)
+ return
+
+ mappings = data.get("mappings", [])
+ for mapping in mappings:
+ try:
+ entry = HardeningEntry(
+ flag=mapping.get("flag", "").strip(),
+ flag_type=mapping.get("flag_type", "unknown"),
+ description=mapping.get("description", ""),
+ vulnerability_category=mapping.get("vulnerability_category", ""),
+ cwe_ids=mapping.get("cwe_ids", []),
+ )
+ self._entries.append(entry)
+
+ for cwe_id in entry.cwe_ids:
+ normalized = self._normalize_cwe_id(cwe_id)
+ if normalized:
+ if normalized not in self._cwe_index:
+ self._cwe_index[normalized] = []
+ self._cwe_index[normalized].append(entry)
+
+ except Exception as exc:
+ logger.warning("Failed to parse hardening entry: %s - %s", mapping, exc)
+
+ logger.info(
+ "Loaded hardening KB: %d entries, %d unique CWE mappings",
+ len(self._entries),
+ len(self._cwe_index),
+ )
+
+ @staticmethod
+ def _normalize_cwe_id(cwe_id: str) -> str | None:
+ """Normalize CWE ID to uppercase format (e.g., 'CWE-121').
+
+ Returns None for special values like 'N/A' or 'Multi'.
+ """
+ if not cwe_id:
+ return None
+ cwe_id = cwe_id.strip().upper()
+ if cwe_id in ("N/A", "MULTI"):
+ return None
+ if not cwe_id.startswith("CWE-"):
+ cwe_id = f"CWE-{cwe_id}"
+ return cwe_id
+
+ def lookup_by_cwe(
+ self,
+ cwe_id: str | None,
+ include_non_mitigating: bool = False,
+ ) -> list[HardeningEntry]:
+ """Return hardening entries that match the given CWE ID.
+
+ By default, only returns flags that provide actual runtime mitigation
+ (flag_type: runtime, linker). Warning-only and optimization flags are
+ excluded since they don't mitigate vulnerabilities at runtime.
+
+ Args:
+ cwe_id: The CWE identifier (e.g., 'CWE-121' or '121')
+ include_non_mitigating: If True, include warning/optimization flags
+ that don't provide runtime mitigation (for auditing purposes)
+
+ Returns:
+ List of HardeningEntry objects that help mitigate this CWE
+ """
+ if not cwe_id:
+ return []
+
+ normalized = self._normalize_cwe_id(cwe_id)
+ if not normalized:
+ return []
+
+ entries = self._cwe_index.get(normalized, [])
+
+ if not include_non_mitigating:
+ entries = [e for e in entries if e.flag_type in MITIGATING_FLAG_TYPES]
+
+ logger.debug(
+ "HardeningKB lookup for %s: found %d entries (include_non_mitigating=%s)",
+ normalized,
+ len(entries),
+ include_non_mitigating,
+ )
+ return list(entries)
+
+ def get_all_entries(self) -> list[HardeningEntry]:
+ """Return all hardening entries in the knowledge base."""
+ return list(self._entries)
+
+ @property
+ def kb_version(self) -> str | None:
+ """Return the version of the loaded knowledge base."""
+ try:
+ data = json.loads(self.json_path.read_text(encoding="utf-8"))
+ return data.get("kb_version")
+ except Exception:
+ return None
diff --git a/src/exploit_iq_commons/utils/source_rpm_downloader.py b/src/exploit_iq_commons/utils/source_rpm_downloader.py
index 09dc81078..d0549b9d4 100644
--- a/src/exploit_iq_commons/utils/source_rpm_downloader.py
+++ b/src/exploit_iq_commons/utils/source_rpm_downloader.py
@@ -462,7 +462,8 @@ def parse_sbom(self):
logger.info(f"Found {len(packages)} packages in SBOM, platform: {platform_version}")
return packages, platform_version
- def extract_src_rpm(self, rpm_path: Path, extract_dir: Path):
+ @staticmethod
+ def extract_src_rpm(rpm_path: Path, extract_dir: Path):
#logger.info(f" Extracting {rpm_path.name} to {extract_dir} ...")
extract_dir.mkdir(parents=True, exist_ok=True)
try:
diff --git a/src/vuln_analysis/configs/brew/internal-user-profile.yml b/src/vuln_analysis/configs/brew/internal-user-profile.yml
new file mode 100644
index 000000000..52b4b993d
--- /dev/null
+++ b/src/vuln_analysis/configs/brew/internal-user-profile.yml
@@ -0,0 +1,24 @@
+# Internal User Profile — Red Hat VPN-connected environment
+#
+# Assumptions:
+# - User is on the Red Hat VPN (can reach *.redhat.com internal hosts)
+# - Build logs are available via Brew task output
+
+profile:
+ name: redhat-internal
+
+hosts:
+ rpm:
+ brew_hub: https://brewhub.engineering.redhat.com/brewhub
+ brew_download: https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot
+ git:
+ dist_git: https://pkgs.devel.redhat.com/cgit
+
+default_arch: x86_64
+
+ssl_verify: false
+
+build_log:
+ auto_fetch: true
+
+download_binary_rpm: false
diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml
index a67e18c1b..9681002fd 100644
--- a/src/vuln_analysis/configs/config-http-openai.yml
+++ b/src/vuln_analysis/configs/config-http-openai.yml
@@ -80,6 +80,11 @@ functions:
Code Keyword Search:
_type: lexical_code_search
top_k: 5
+ Source Grep:
+ _type: source_grep
+ base_checker_dir: .cache/am_cache/checker
+ max_results: 50
+ context_lines: 2
CVE Web Search:
_type: serp_wrapper
max_retries: 5
@@ -148,6 +153,35 @@ functions:
generate_intel_score: true
intel_low_score: 51
insist_analysis: false
+ cve_source_acquisition:
+ _type: cve_source_acquisition
+ base_git_dir: .cache/am_cache/git
+ base_pickle_dir: .cache/am_cache/pickle
+ base_rpm_dir: .cache/am_cache/rpms
+ cve_checker_segmentation:
+ _type: cve_checker_segmentation
+ base_checker_dir: .cache/am_cache/checker
+ base_code_index_dir: .cache/am_cache/code_index
+ cve_package_code_agent:
+ _type: cve_package_code_agent
+ llm_name: cve_agent_executor_llm
+ base_checker_dir: .cache/am_cache/checker
+ base_code_index_dir: .cache/am_cache/code_index
+ tool_names:
+ - Source Grep
+ - Code Keyword Search
+ cve_checker_report:
+ _type: cve_checker_report
+ llm_name: cve_agent_executor_llm
+ base_checker_dir: .cache/am_cache/checker
+ cve_build_agent:
+ _type: cve_build_agent
+ llm_name: cve_agent_executor_llm
+ base_checker_dir: .cache/am_cache/checker
+ max_iterations: 10
+ tool_names:
+ - Source Grep
+ - Code Keyword Search
health_check:
_type: health_check
@@ -239,6 +273,11 @@ workflow:
cve_summarize_name: cve_summarize
cve_justify_name: cve_justify
cve_output_config_name: cve_http_output
+ cve_source_acquisition_name: cve_source_acquisition
+ cve_checker_segmentation_name: cve_checker_segmentation
+ cve_package_code_agent_name: cve_package_code_agent
+ cve_checker_report_name: cve_checker_report
+ cve_build_agent_name: cve_build_agent
eval:
general:
diff --git a/src/vuln_analysis/configs/openapi/openapi.json b/src/vuln_analysis/configs/openapi/openapi.json
index 79feca4cc..f83b2a494 100644
--- a/src/vuln_analysis/configs/openapi/openapi.json
+++ b/src/vuln_analysis/configs/openapi/openapi.json
@@ -2190,7 +2190,7 @@
"python",
"javascript",
"java",
- "c",
+ "c"
],
"title": "Ecosystem"
},
@@ -2210,7 +2210,8 @@
"$ref": "#/components/schemas/SourceDocumentsInfo"
},
"type": "array",
- "title": "Source Info"
+ "title": "Source Info",
+ "default": []
},
"sbom_info": {
"oneOf": [
@@ -2229,9 +2230,7 @@
},
"type": "object",
"required": [
- "analysis_type",
- "source_info",
- "sbom_info"
+ "analysis_type"
],
"title": "ImageInfoInput",
"description": "Information about a container image, including the source information and sbom information."
@@ -2308,7 +2307,7 @@
"python",
"javascript",
"java",
- "c",
+ "c"
],
"title": "Ecosystem"
},
@@ -2328,7 +2327,8 @@
"$ref": "#/components/schemas/SourceDocumentsInfo"
},
"type": "array",
- "title": "Source Info"
+ "title": "Source Info",
+ "default": []
},
"sbom_info": {
"oneOf": [
@@ -2347,9 +2347,7 @@
},
"type": "object",
"required": [
- "analysis_type",
- "source_info",
- "sbom_info"
+ "analysis_type"
],
"title": "ImageInfoInput",
"description": "Information about a container image, including the source information and sbom information."
diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py
new file mode 100644
index 000000000..0a41a5645
--- /dev/null
+++ b/src/vuln_analysis/functions/build_agent_graph_defs.py
@@ -0,0 +1,714 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Graph definitions for the L2 Build Agent (BuildCompilationCheck).
+
+Houses the LangGraph state schema, structured-output schemas for
+BuildHarvestReport, and L2 agent prompt templates.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from pathlib import Path
+from typing import Literal, NotRequired
+
+from langgraph.graph import MessagesState
+from pydantic import BaseModel, Field
+
+from exploit_iq_commons.utils.hardening_kb import HardeningEntry
+from vuln_analysis.functions.react_internals import CheckerThought, Observation
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Data Models
+# ---------------------------------------------------------------------------
+
+
+class BuildHarvestReport(BaseModel):
+ """Deterministic data harvested from build artifacts.
+
+ Extracted during the data_harvest_node before the ReAct loop begins.
+
+ Key vulnerability-relevant data:
+ - Feature disable flags that prevent vulnerable code from being compiled:
+ - OpenSSL style: no-sm2, no-ssl3, no-md5, no-asm
+ - Autoconf style: --disable-feature, --without-feature
+ - CMake style: -DENABLE_FEATURE=OFF
+ - Architecture flags to understand target platform
+ - Hardening flags relevant to the CVE's CWE class
+
+ Note: Compiled files are NOT pre-extracted. The LLM searches the build log
+ for affected files from l1_result.vulnerability_intel.affected_files during the ReAct loop.
+ """
+
+ disabled_features: list[str] = Field(
+ default_factory=list,
+ description="Feature-disabling flags from build log (e.g., '-DOPENSSL_NO_SM2', '-DNO_GZIP')",
+ )
+ spec_disabled_features: list[str] = Field(
+ default_factory=list,
+ description="Feature-disabling flags from spec %build section (e.g., 'no-sm2', '--disable-ssl3', '--without-openssl')",
+ )
+ expected_hardening: list[HardeningEntry] = Field(
+ default_factory=list,
+ description="Hardening flags relevant to the CVE's CWE, with descriptions for LLM context",
+ )
+ build_architecture: Literal["32-bit", "64-bit", "unknown"] = Field(
+ default="unknown",
+ description="Target architecture from -m64/-m32 flags or build target (x86_64/i686)",
+ )
+
+
+
+
+class L2CompileVerdictExtraction(BaseModel):
+ """LLM-extracted verdict from L2 agent final answer."""
+
+ compilation_status: Literal["compiled", "not_compiled", "unknown"] = Field(
+ description="Whether vulnerable code is compiled into the binary"
+ )
+ confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict")
+ reasoning: str = Field(description="Brief explanation of the verdict")
+
+class L2HardeningVerdictExtraction(BaseModel):
+ """LLM-extracted verdict from L2 Hardening investigation."""
+ hardening_status: Literal["mitigated", "not_mitigated", "not_applicable", "unknown"] = Field(
+ description="Whether hardening flags mitigate the vulnerability"
+ )
+ hardening_flags: list[str] = Field(
+ default_factory=list,
+ description="List of specific hardening flags found (e.g., -fstack-protector-strong, -D_FORTIFY_SOURCE=2)",
+ )
+ confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict")
+ reasoning: str = Field(description="Brief explanation of the verdict")
+
+
+# ---------------------------------------------------------------------------
+# Graph State
+# ---------------------------------------------------------------------------
+
+
+class BuildAgentState(MessagesState):
+ """LangGraph state for the L2 Build Agent."""
+
+ harvest_report: NotRequired[BuildHarvestReport | None]
+ vulnerability_intel_str: NotRequired[str | None]
+ l1_preliminary_verdict: NotRequired[str | None]
+ runtime_prompt: NotRequired[str | None]
+ thought: NotRequired[CheckerThought | None]
+ observation: NotRequired[Observation | None]
+ step: NotRequired[int]
+ max_steps: NotRequired[int]
+ L2CompileVerdict: NotRequired[L2CompileVerdictExtraction | None]
+ L2HardeningVerdict: NotRequired[L2HardeningVerdictExtraction | None]
+
+# ---------------------------------------------------------------------------
+# Spec File Parsing Helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_spec_build_section(spec_path: Path) -> str:
+ """Extract the %build section from an RPM spec file.
+
+ The %build section contains configure/Configure commands with feature flags
+ that determine what code is compiled.
+
+ Args:
+ spec_path: Path to the RPM spec file
+
+ Returns:
+ The raw %build section content, or empty string if not found
+ """
+ try:
+ content = spec_path.read_text(encoding="utf-8", errors="replace")
+ except OSError as e:
+ logger.warning("Failed to read spec file %s: %s", spec_path, e)
+ return ""
+
+ # Find %build section (ends at next %section like %install, %check, or EOF)
+ match = re.search(
+ r"^%build\s*\n(.*?)(?=^%\w+|\Z)",
+ content,
+ re.MULTILINE | re.DOTALL,
+ )
+ return match.group(1).strip() if match else ""
+
+
+def _extract_spec_disabled_features(build_section: str) -> list[str]:
+ """Extract feature-disable flags from spec %build section.
+
+ Recognizes patterns from common build systems:
+ - OpenSSL style: no-sm2, no-ssl3, no-asm
+ - Autoconf style: --disable-feature, --without-feature
+ - CMake style: -DENABLE_FEATURE=OFF
+
+ Args:
+ build_section: The raw %build section content
+
+ Returns:
+ Sorted list of disabled feature names (without prefix)
+ """
+ disabled: set[str] = set()
+
+ # OpenSSL style: no-feature (e.g., no-sm2, no-ssl3, no-asm)
+ disabled.update(re.findall(r"\bno-(\w+)", build_section))
+
+ # Autoconf style: --disable-feature (e.g., --disable-static)
+ disabled.update(re.findall(r"--disable-(\w+)", build_section))
+
+ # Autoconf style: --without-feature (e.g., --without-openssl)
+ disabled.update(re.findall(r"--without-(\w+)", build_section))
+
+ # CMake style: -DENABLE_FEATURE=OFF or =0 or =FALSE
+ disabled.update(
+ re.findall(r"-DENABLE_(\w+)=(?:OFF|0|FALSE)", build_section, re.IGNORECASE)
+ )
+
+ return sorted(disabled)
+
+
+# ---------------------------------------------------------------------------
+# Data Harvesting Functions
+# ---------------------------------------------------------------------------
+
+
+async def harvest_build_data(
+ build_log_path: Path | None,
+ spec_path: Path | None,
+ cwe_id: str | None = None,
+) -> BuildHarvestReport:
+ """Extract structured data from build log and spec file.
+
+ Parses:
+ - Feature-disabling -D defines (e.g., -DOPENSSL_NO_SM2, -DNO_GZIP)
+
+ Args:
+ build_log_path: Path to the build log file
+ spec_path: Path to the RPM spec file
+ cwe_id: CWE identifier to look up expected hardening flags (e.g., 'CWE-121')
+
+ Returns:
+ BuildHarvestReport with harvested data and expected hardening flags
+ """
+ from exploit_iq_commons.utils.hardening_kb import HardeningKB
+ from vuln_analysis.tools.source_inspector import SourceInspector
+
+ # Handle case where build_log_path is a directory instead of a file
+ if build_log_path and build_log_path.is_dir():
+ log_files = list(build_log_path.glob("*-build.log")) or list(build_log_path.glob("*.log"))
+ if log_files:
+ build_log_path = log_files[0]
+ logger.info("harvest_build_data: resolved build log directory to file: %s", build_log_path)
+ else:
+ logger.warning("harvest_build_data: build_log_path is a directory but no .log files found")
+ build_log_path = None
+
+ # Lookup expected hardening flags from KB based on CWE
+ expected_hardening = []
+ if cwe_id:
+ kb = HardeningKB.get_instance()
+ expected_hardening = kb.lookup_by_cwe(cwe_id)
+ logger.info(
+ "harvest_build_data: CWE %s maps to %d hardening flags",
+ cwe_id,
+ len(expected_hardening),
+ )
+
+ # Extract feature-disabling defines from build log
+ disabled_features: list[str] = []
+ if build_log_path:
+ inspector = SourceInspector(build_log_path.parent)
+
+ # Grep for lines containing -D defines
+ matches = inspector.grep_content(r"-D\w+", file_path=build_log_path)
+
+ # Extract unique defines from matched lines
+ all_defines: set[str] = set()
+ for match in matches:
+ defines = re.findall(r"-D(\w+)", match.line_content)
+ all_defines.update(defines)
+
+ # Filter for feature-disabling patterns:
+ # - NO_* prefix (e.g., NO_GZIP)
+ # - DISABLE_* prefix (e.g., DISABLE_SSL)
+ # - WITHOUT_* prefix (e.g., WITHOUT_FEATURE)
+ # - *_NO_* infix (e.g., OPENSSL_NO_SM2)
+ # - *_DISABLE_* infix
+ # - *_DISABLED suffix
+ disable_pattern = re.compile(
+ r"^(NO_|DISABLE_|WITHOUT_)|(_NO_|_DISABLE_)|(_DISABLED$)"
+ )
+ disabled_features = sorted(
+ d for d in all_defines if disable_pattern.search(d)
+ )
+
+ if disabled_features:
+ logger.info(
+ "harvest_build_data: found %d disabled features in build log",
+ len(disabled_features),
+ )
+
+ # Detect build architecture from build log
+ build_architecture: Literal["32-bit", "64-bit", "unknown"] = "unknown"
+ if build_log_path:
+ try:
+ build_log_content = build_log_path.read_text(encoding="utf-8", errors="replace")
+
+ # Check for explicit -m64 or -m32 flags (most reliable)
+ if re.search(r"\s-m64\b", build_log_content):
+ build_architecture = "64-bit"
+ elif re.search(r"\s-m32\b", build_log_content):
+ build_architecture = "32-bit"
+ # Check for build target patterns (e.g., "Building for target x86_64")
+ elif re.search(r"target\s+x86_64|x86_64-\w+-linux", build_log_content, re.IGNORECASE):
+ build_architecture = "64-bit"
+ elif re.search(r"target\s+i[3-6]86|i[3-6]86-\w+-linux", build_log_content, re.IGNORECASE):
+ build_architecture = "32-bit"
+
+ if build_architecture != "unknown":
+ logger.info(
+ "harvest_build_data: detected build architecture: %s",
+ build_architecture,
+ )
+ except OSError as e:
+ logger.warning("harvest_build_data: failed to read build log for arch detection: %s", e)
+
+ # Extract %build section and features from spec file
+ spec_build_section = ""
+ spec_disabled_features: list[str] = []
+ if spec_path and spec_path.exists():
+ spec_build_section = _extract_spec_build_section(spec_path)
+ spec_disabled_features = _extract_spec_disabled_features(spec_build_section)
+
+ if spec_disabled_features:
+ logger.info(
+ "harvest_build_data: found %d disabled features in spec",
+ len(spec_disabled_features),
+ )
+
+ return BuildHarvestReport(
+ disabled_features=disabled_features,
+ spec_disabled_features=spec_disabled_features,
+ expected_hardening=expected_hardening,
+ build_architecture=build_architecture,
+ )
+
+
+
+
+
+
+# ---------------------------------------------------------------------------
+# Investigation 1: Configuration Flags Prompts
+# ---------------------------------------------------------------------------
+
+L2_CONFIG_SYS_PROMPT = (
+ "You are an L2 Build Agent investigating whether VULNERABLE CODE is DISABLED at build time.\n\n"
+ "GOAL: Determine if the CVE-affected feature/component is compiled into the binary.\n\n"
+ "EVIDENCE SOURCES:\n"
+ "1. BUILD_HARVEST section below - disabled features ALREADY extracted (no tool call needed)\n"
+ "2. Build log (searchable with 'logs:' prefix) - verify affected source files were compiled\n\n"
+ "INVESTIGATION STEPS:\n"
+ "1. FIRST: Check BUILD_HARVEST below - disabled features are ALREADY extracted (no tool needed)\n"
+ "2. If feature IS in disabled_features or spec_disabled_features -> verdict NOT_COMPILED immediately\n"
+ "3. If feature NOT disabled (or lists empty), search build log with 'logs:' prefix to verify compilation\n\n"
+ "VERDICTS:\n"
+ "- NOT_COMPILED: Feature is disabled OR affected files not in build log\n"
+ "- COMPILED: Feature is enabled AND affected files are compiled\n"
+ "- UNKNOWN: Cannot determine from available evidence"
+)
+
+L2_CONFIG_PROMPT_TEMPLATE = """{sys_prompt}
+
+
+CVE ID: {vuln_id}
+Target Package: {target_package}
+
+
+
+{vulnerability_intel}
+
+L1 Preliminary Verdict: {l1_preliminary_verdict}
+
+
+
+** CHECK THESE FIRST - No tool call needed! **
+
+Disabled Features (from build log -D defines):
+{disabled_features}
+
+Disabled Features (from spec configure flags):
+{spec_disabled_features}
+
+DECISION GUIDE:
+- If CVE-affected feature appears above -> verdict NOT_COMPILED (no tool needed)
+- If lists are empty or feature not listed -> search build log with 'logs:' prefix
+
+
+
+{tools}
+
+
+{tool_instructions}
+
+RESPONSE:
+{{"""
+
+L2_CONFIG_THOUGHT_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+1. You MUST select a tool ONLY from . Do NOT invent tool names.
+2. Output valid JSON only. thought < 100 words. final_answer < 150 words.
+3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer.
+4. BUILD_HARVEST contains pre-extracted disabled features - CHECK IT FIRST (no grep needed).
+5. If feature in BUILD_HARVEST -> finish with NOT_COMPILED verdict immediately.
+6. If feature NOT in BUILD_HARVEST, search build log using 'logs:' prefix (e.g., 'logs:filename.c').
+7. NEVER grep source code - use 'logs:' prefix to search build logs for compilation evidence.
+8. Do NOT call the same tool with the same input twice.
+
+
+
+{{"thought": "CVE affects SM2 crypto. BUILD_HARVEST shows 'sm2' in spec_disabled_features. SM2 is disabled at build time.", "mode": "finish", "actions": null, "final_answer": "NOT_COMPILED. The spec file configures with 'no-sm2' flag, which disables SM2 cryptographic functions. The vulnerable code in crypto/sm2/ is not compiled into the binary."}}
+
+
+
+{{"thought": "CVE affects zisofs. BUILD_HARVEST disabled features are empty - zisofs not disabled. Need to verify affected file was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:archive_read_support_format_iso9660", "reason": "Check if affected file appears in build compilation log"}}, "final_answer": null}}
+
+
+
+{{"thought": "Feature not disabled. Now verify affected file crypto/sm2/sm2.c was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:sm2.c", "reason": "Check if affected source file appears in build log"}}, "final_answer": null}}
+
+
+
+{{"thought": "Found no-sm2 in spec_disabled_features. SM2 code is not compiled.", "mode": "finish", "actions": null, "final_answer": "NOT_COMPILED. The spec file configures with 'no-sm2' flag, which disables SM2 cryptographic functions. The vulnerable code in crypto/sm2/ is not compiled into the binary."}}
+
+
+
+{{"thought": "SM2 not disabled. Found sm2.c compilation in build log.", "mode": "finish", "actions": null, "final_answer": "COMPILED. SM2 is not in disabled features. Build log shows 'gcc -c crypto/sm2/sm2_crypt.c -o sm2_crypt.o', confirming the vulnerable code is compiled into the binary."}}
+
+
+
+{{"thought": "Cannot find evidence either way. Affected files not in build log but feature not disabled.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. The feature is not explicitly disabled, but the affected files do not appear in the build log. Cannot determine compilation status."}}
+"""
+
+
+# ---------------------------------------------------------------------------
+# Investigation 2: Hardening Flags Prompts
+# ---------------------------------------------------------------------------
+
+L2_HARDENING_SYS_PROMPT = (
+ "You are an L2 Build Agent investigating COMPILER HARDENING mitigations.\n\n"
+ "GOAL: Determine if hardening flags relevant to this CVE's vulnerability class are present.\n\n"
+ "CONTEXT: Investigation 1 determined the vulnerable code IS compiled. Now check if\n"
+ "compiler/linker hardening makes exploitation significantly harder.\n\n"
+ "CRITICAL - CWE-SPECIFIC MATCHING:\n"
+ "- ONLY flags listed in EXPECTED_HARDENING can justify a MITIGATED verdict\n"
+ "- General hardening flags (stack protector, FORTIFY_SOURCE) do NOT mitigate all CWEs\n"
+ "- Example: -fstack-protector helps CWE-121 (stack overflow), NOT CWE-190 (integer overflow)\n"
+ "- You MUST match the EXACT flags from EXPECTED_HARDENING table to the build output\n\n"
+ "EVIDENCE SOURCES:\n"
+ "1. EXPECTED_HARDENING table (CWE-specific flags from knowledge base) - THIS IS YOUR CHECKLIST\n"
+ "2. Build log (searchable with 'logs:' prefix) - contains CFLAGS/CXXFLAGS/LDFLAGS definitions\n\n"
+ "EFFICIENT SEARCH STRATEGY:\n"
+ "- Search 'logs:FLAGS=' to get ALL compiler/linker flags in ONE call (matches CFLAGS=, LDFLAGS=, etc.)\n"
+ "- Grep supports regex OR: 'logs:CFLAGS\\|LDFLAGS' combines patterns\n"
+ "- Analyze the output to check for expected hardening flags - avoid multiple individual searches\n\n"
+ "IMPORTANT - RHEL/Fedora Specs Files:\n"
+ "When you see these specs files in build logs, hardening flags are IMPLICITLY enabled:\n"
+ "- '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' => -fPIE (position-independent code for ASLR)\n"
+ "- '-specs=/usr/lib/rpm/redhat/redhat-hardened-ld' => -pie + -z now (PIE linking + BIND_NOW/Full RELRO)\n"
+ "These flags will NOT appear explicitly in the build log - the specs file injects them.\n"
+ "If you see these specs files, count the corresponding protections as PRESENT.\n\n"
+ "INVESTIGATION STEPS:\n"
+ "1. Review EXPECTED_HARDENING table - these are the ONLY flags that matter for this CWE\n"
+ "2. Search 'logs:FLAGS=' to get all compiler/linker flag definitions at once\n"
+ "3. For EACH flag in EXPECTED_HARDENING, check if present in build output\n"
+ "4. Verdict based ONLY on EXPECTED_HARDENING flags (ignore unrelated hardening)\n\n"
+ "VERDICTS:\n"
+ "- MITIGATED: One or more flags from EXPECTED_HARDENING are present in build\n"
+ "- NOT_MITIGATED: NONE of the EXPECTED_HARDENING flags are present (even if other hardening exists)\n"
+ "- UNKNOWN: Cannot determine from available evidence"
+)
+
+L2_HARDENING_PROMPT_TEMPLATE = """{sys_prompt}
+
+
+CVE ID: {vuln_id}
+Target Package: {target_package}
+CWE: {cwe_id}
+
+
+
+The following compiler/linker flags mitigate this vulnerability class:
+
+{expected_hardening_table}
+
+
+
+{tools}
+
+
+{tool_instructions}
+
+RESPONSE:
+{{"""
+
+L2_HARDENING_THOUGHT_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+1. You MUST select a tool ONLY from . Do NOT invent tool names.
+2. Output valid JSON only. thought < 100 words. final_answer < 150 words.
+3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer.
+4. EFFICIENT SEARCH: First search for 'logs:FLAGS=' to get ALL compiler/linker flags in one call.
+5. Grep supports regex OR: use 'logs:CFLAGS\\|LDFLAGS' to combine patterns.
+6. Analyze the FLAGS output to check for expected hardening flags - avoid multiple individual searches.
+7. Do NOT call the same tool with the same input twice.
+8. STRICT MATCHING: Only flags from EXPECTED_HARDENING justify MITIGATED verdict.
+9. IGNORE UNRELATED HARDENING: -fstack-protector, -D_FORTIFY_SOURCE do NOT mitigate all CWEs. Check the table!
+
+
+
+{{"thought": "Need to check hardening flags for this CWE. Search for all compiler/linker flag definitions at once.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:FLAGS=", "reason": "Get all CFLAGS/CXXFLAGS/LDFLAGS definitions in one search"}}, "final_answer": null}}
+
+
+
+{{"thought": "CWE-121 stack overflow. EXPECTED_HARDENING lists -fstack-protector-strong. Found -fstack-protector-strong in CFLAGS. This matches EXPECTED_HARDENING.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Found -fstack-protector-strong which is listed in EXPECTED_HARDENING for CWE-121. Stack buffer overflows will be detected at runtime."}}
+
+
+
+{{"thought": "CWE-190 integer overflow. EXPECTED_HARDENING lists -ftrapv, -fsanitize=signed-integer-overflow. Found -fstack-protector-strong and -D_FORTIFY_SOURCE=2, but these are for memory safety NOT integer overflow. Checking for expected flags: -ftrapv MISSING, -fsanitize=signed-integer-overflow MISSING.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. Build has general hardening (-fstack-protector-strong, -D_FORTIFY_SOURCE=2) but NONE of the CWE-190 specific flags from EXPECTED_HARDENING (-ftrapv, -fsanitize=signed-integer-overflow) are present. Integer overflow is not mitigated."}}
+
+
+
+{{"thought": "CWE-693 control flow. EXPECTED_HARDENING lists PIE. Found -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 which implies -fPIE. This matches EXPECTED_HARDENING.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Build uses RHEL hardened specs files which implicitly enable PIE (listed in EXPECTED_HARDENING for CWE-693). Control flow exploitation is significantly harder."}}
+
+
+
+{{"thought": "CWE-190 integer overflow. EXPECTED_HARDENING lists -ftrapv, -fsanitize=*-integer-overflow. None found in FLAGS output.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. None of the EXPECTED_HARDENING flags for CWE-190 (-ftrapv, -fsanitize=signed-integer-overflow, -fsanitize=unsigned-integer-overflow) are present in the build."}}
+
+
+
+{{"thought": "Build log does not contain FLAGS= definitions. Cannot determine if EXPECTED_HARDENING flags are present.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. Build log does not contain CFLAGS/LDFLAGS definitions. Cannot determine if EXPECTED_HARDENING mitigations are present."}}
+"""
+
+
+# ---------------------------------------------------------------------------
+# Verdict Extraction Prompts
+# ---------------------------------------------------------------------------
+
+L2_COMPILATION_VERDICT_PROMPT = """Extract the compilation verdict from the L2 Configuration investigation.
+
+
+{final_answer}
+
+
+Extract:
+1. compilation_status: "compiled", "not_compiled", or "unknown"
+2. confidence: 0.0 to 1.0 based on evidence strength
+3. reasoning: Brief explanation (1-2 sentences)
+
+Output JSON only:
+{{"compilation_status": "...", "confidence": 0.X, "reasoning": "..."}}"""
+
+L2_HARDENING_VERDICT_PROMPT = """Extract the hardening verdict from the L2 Hardening investigation.
+
+
+{final_answer}
+
+
+Extract:
+1. hardening_status: "mitigated", "not_mitigated", "not_applicable", or "unknown"
+ - "not_applicable": This CWE class has no compiler-level mitigations available
+2. hardening_flags: List of specific compiler/linker flags that provide protection (e.g., ["-fstack-protector-strong", "-D_FORTIFY_SOURCE=2", "RELRO", "PIE"])
+ - Extract the actual flag names mentioned in the investigation
+ - Empty list if no relevant flags found
+3. confidence: 0.0 to 1.0 based on evidence strength
+4. reasoning: Brief explanation (1-2 sentences)
+
+Output JSON only:
+{{"hardening_status": "...", "hardening_flags": ["..."], "confidence": 0.X, "reasoning": "..."}}"""
+
+
+# ---------------------------------------------------------------------------
+# L2 Observation Node Prompts (Comprehension & Memory Update)
+# ---------------------------------------------------------------------------
+
+L2_COMPREHENSION_PROMPT = """Analyze the tool output for L2 build/compilation verification.
+GOAL: Determine whether {vuln_id} vulnerable code is COMPILED in {target_package}
+
+
+{vulnerability_intel}
+
+
+
+Disabled Features (build log): {disabled_features}
+Disabled Features (spec file): {spec_disabled_features}
+
+
+TOOL USED: {tool_used}
+TOOL INPUT: {tool_input}
+THOUGHT: {last_thought}
+NEW OUTPUT:
+{tool_output}
+
+BUILD ANALYSIS RULES:
+1. CHECK if tool output shows:
+ - Compilation commands for AFFECTED_FILES (e.g., gcc -c file.c -o file.o)
+ - Feature-disable flags that match the CVE-affected component
+ - Object files or compilation artifacts for VULNERABLE_FUNCTIONS
+
+2. COMPILATION EVIDENCE:
+ - COMPILED: Found gcc/compile commands for affected files
+ - NOT_COMPILED: Feature is disabled OR affected files not in build
+ - UNKNOWN: Insufficient evidence
+
+3. RECORD specific file paths, compile commands, or flag matches.
+
+TOOL-SPECIFIC RULES:
+- If NEW OUTPUT is empty or error: "FAILED: [tool] [input] - [reason]"
+- Source Grep: Check if matches show compilation or feature disabling
+- Build log search: Look for compile commands and disabled features
+
+OUTPUT:
+- findings: 2-4 key observations about compilation status
+- tool_outcome: "Source Grep [pattern] -> found in build.log:123"
+RESPONSE:
+{{"""
+
+L2_MEMORY_UPDATE_PROMPT = """Merge findings into L2 build investigation memory.
+GOAL: Determine whether {vuln_id} vulnerable code is COMPILED in {target_package}
+
+PREVIOUS MEMORY: {previous_memory}
+NEW FINDINGS: {findings}
+TOOL CALL RECORD: {tool_outcome}
+
+MEMORY RULES:
+1. Append NEW FINDINGS to PREVIOUS MEMORY. No duplicates.
+2. Add TOOL CALL RECORD verbatim.
+3. If NEW FINDINGS report a failure, add the failure to memory.
+
+COMPILATION TRACKING:
+- Affected file COMPILED: "FILE_COMPILED: [file] - evidence: [compile command]"
+- Affected file NOT_COMPILED: "FILE_NOT_COMPILED: [file] - evidence: [disabled feature]"
+- Feature DISABLED: "FEATURE_DISABLED: [feature] in [build_log/spec]"
+- Feature ENABLED: "FEATURE_ENABLED: [feature] - no disable flag found"
+
+VERDICT EVIDENCE:
+- NOT_COMPILED evidence: feature disabled OR affected files not compiled
+- COMPILED evidence: affected files appear in compile commands
+- UNKNOWN: conflicting evidence or no compilation info found
+
+- results: copy the NEW FINDINGS as-is.
+- memory: updated cumulative findings with evidence tags.
+RESPONSE:
+{{"""
+
+
+# ---------------------------------------------------------------------------
+# L2 Hardening Observation Node Prompts (Comprehension & Memory Update)
+# ---------------------------------------------------------------------------
+
+L2_HARDENING_COMPREHENSION_PROMPT = """Analyze the tool output for L2 hardening flag verification.
+GOAL: Determine whether {vuln_id} has HARDENING mitigations in {target_package}
+
+
+CWE: {cwe_id}
+Expected Hardening Flags:
+{expected_hardening}
+
+
+TOOL USED: {tool_used}
+TOOL INPUT: {tool_input}
+THOUGHT: {last_thought}
+NEW OUTPUT:
+{tool_output}
+
+HARDENING ANALYSIS RULES:
+1. FIRST CHECK Expected Hardening Flags above:
+ - If "None" or empty: This CWE has NO known compiler-level mitigations
+ - Mark findings as "NO_RELEVANT_HARDENING: {cwe_id} has no compiler mitigations"
+ - Skip searching for generic flags - they won't help this vulnerability class
+
+2. IF expected hardening flags exist, CHECK tool output for:
+ - Compiler hardening flags (e.g., -fstack-protector, -fPIE, -fstack-clash-protection)
+ - Preprocessor defines (e.g., -D_FORTIFY_SOURCE=2, -D_GLIBCXX_ASSERTIONS)
+ - Linker hardening flags (e.g., -Wl,-z,relro, -Wl,-z,now)
+
+3. HARDENING EVIDENCE:
+ - FLAG_PRESENT: Found expected hardening flag in build commands
+ - FLAG_ABSENT: Searched but did not find expected flag
+ - NOT_APPLICABLE: No compiler mitigations exist for this CWE class
+ - UNKNOWN: Insufficient evidence
+
+4. RECORD specific flags found and their context (compilation line).
+
+TOOL-SPECIFIC RULES:
+- If NEW OUTPUT is empty or error: "FAILED: [tool] [input] - [reason]"
+- Source Grep: Check if matches show hardening flags in gcc/clang commands
+- Build log search: Look for -f*, -D*, -Wl,* patterns
+
+OUTPUT:
+- findings: 2-4 key observations about hardening flags
+- tool_outcome: "Source Grep [pattern] -> found in build.log:123"
+RESPONSE:
+{{"""
+
+L2_HARDENING_MEMORY_UPDATE_PROMPT = """Merge findings into L2 hardening investigation memory.
+GOAL: Determine whether {vuln_id} has HARDENING mitigations in {target_package}
+
+CWE: {cwe_id}
+PREVIOUS MEMORY: {previous_memory}
+NEW FINDINGS: {findings}
+TOOL CALL RECORD: {tool_outcome}
+
+MEMORY RULES:
+1. Append NEW FINDINGS to PREVIOUS MEMORY. No duplicates.
+2. Add TOOL CALL RECORD verbatim.
+3. If NEW FINDINGS report a failure, add the failure to memory.
+
+HARDENING TRACKING:
+- No relevant hardening: "NO_RELEVANT_HARDENING: [CWE] has no compiler mitigations"
+- Flag FOUND: "HARDENING_PRESENT: [flag] - evidence: [build command excerpt]"
+- Flag NOT FOUND: "HARDENING_ABSENT: [flag] - searched but not found"
+- Critical mitigation: "CRITICAL_MITIGATION: [flag] for [CWE] - [present/absent]"
+
+VERDICT EVIDENCE:
+- NOT_APPLICABLE evidence: this CWE class has no compiler-level mitigations
+- MITIGATED evidence: key hardening flags present that reduce exploitability
+- NOT_MITIGATED evidence: expected hardening flags absent
+- UNKNOWN: build log incomplete or no compilation commands found
+
+- results: copy the NEW FINDINGS as-is.
+- memory: updated cumulative findings with hardening evidence tags.
+RESPONSE:
+{{"""
diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py
new file mode 100644
index 000000000..c0748ed17
--- /dev/null
+++ b/src/vuln_analysis/functions/code_agent_graph_defs.py
@@ -0,0 +1,2248 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Graph definitions for the L1 Package Code Agent.
+
+Houses the LangGraph state schema, structured-output schemas for
+DownstreamSearchReport/UpstreamSearchReport pipelines, CodeAgentReport,
+and L1 agent prompt templates.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shutil
+import subprocess
+import warnings
+from pathlib import Path
+from typing import Literal, NotRequired, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from vuln_analysis.tools.brew_downloader import BrewDownloader
+
+from langchain_core.messages import HumanMessage, SystemMessage
+from langgraph.graph import MessagesState
+from pydantic import BaseModel, Field
+from unidiff import PatchSet
+
+logger = logging.getLogger(__name__)
+
+from exploit_iq_commons.data_models.checker_status import L2BuildResult, VulnerabilityIntel
+from vuln_analysis.functions.react_internals import CheckerThought, Observation, L1VerdictExtraction
+import aiohttp
+
+# ---------------------------------------------------------------------------
+# Graph state
+# ---------------------------------------------------------------------------
+
+
+class CodeAgentState(MessagesState):
+ """LangGraph state for the L1 Code Agent (DownstreamSearch -> UpstreamSearch)."""
+ downstream_report: NotRequired[DownstreamSearchReport | None]
+ upstream_report: NotRequired[UpstreamSearchReport | None]
+ runtime_prompt: NotRequired[str | None]
+ last_thought: NotRequired[CheckerThought | None]
+ step: NotRequired[int]
+ max_steps: NotRequired[int]
+ output: NotRequired[str]
+ thought: NotRequired[CheckerThought | None]
+ observation: NotRequired[Observation | None]
+ vulnerability_intel: NotRequired["VulnerabilityIntel | None"]
+
+
+# ---------------------------------------------------------------------------
+# Patch schemas (must be defined before reports that use them)
+# ---------------------------------------------------------------------------
+
+
+class PatchHunk(BaseModel):
+ """A single hunk from a downstream patch file."""
+ source_start: int
+ source_length: int
+ target_start: int
+ target_length: int
+ context_lines: list[str] = Field(default_factory=list, description="Unchanged lines")
+ removed_lines: list[str] = Field(default_factory=list, description="Deleted lines (- stripped)")
+ added_lines: list[str] = Field(default_factory=list, description="Added lines (+ stripped)")
+
+
+class PatchFile(BaseModel):
+ """Changes to a single file in a downstream patch."""
+ source_path: str
+ target_path: str
+ hunks: list[PatchHunk]
+ is_new_file: bool = False
+ is_deleted_file: bool = False
+
+
+class ParsedPatch(BaseModel):
+ """Structured representation of a downstream patch file."""
+ patch_filename: str
+ files: list[PatchFile]
+
+
+class OSVPatchResult(BaseModel):
+ """Result of fetching a patch from OSV/GitHub."""
+ cve_id: str
+ fixed_commit: str
+ repo_url: str
+ patch_url: str
+ patch_content: str | None = Field(default=None, description="Raw .patch text")
+ parsed_patch: "ParsedPatch | None" = Field(default=None, description="Structured patch data")
+ commit_message: str | None = None
+ commit_author: str | None = None
+ commit_date: str | None = None
+# ---------------------------------------------------------------------------
+# Reflection schemas
+# ---------------------------------------------------------------------------
+class DownstreamSearchReport(BaseModel):
+ """Result of a downstream search."""
+ is_patch_file_available: bool = Field(default=False, description="True if a patch file is available")
+ patch_file_name: str = Field(default="", description="The name of the patch file")
+ is_patch_in_spec_file: bool = Field(default=False, description="True if a patch file is in the spec file")
+ spec_file_log_change: str = Field(
+ default="",
+ description="All lines in the .spec file that match a grep for the CVE id (not changelog-only)",
+ )
+ is_patch_applied_in_build: bool = Field(default=False, description="True if a patch file is applied in the build")
+ build_log_patch_applied: str = Field(default="", description="The patch applied in the build log")
+ spec_patch_directives_for_cve: list[str] = Field(
+ default_factory=list,
+ description="Raw PatchN: lines from the spec whose patch filename token matches this CVE",
+ )
+ spec_changelog_cve_lines: str = Field(
+ default="",
+ description="Lines from the %changelog section of the .spec that mention the CVE",
+ )
+ spec_source0_line: str = Field(
+ default="",
+ description="The Source0: line from the spec file (upstream tarball reference)",
+ )
+ spec_version_line: str = Field(
+ default="",
+ description="The Version: line from the spec file",
+ )
+ parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed patch file")
+
+
+class UpstreamSearchReport(BaseModel):
+ """Result of an upstream search."""
+
+ is_fixed_srpm_is_needed: bool = Field(default=False, description="True if a fixed SRPM is needed downstream style patch files")
+ fixed_srpm_file_name: str = Field(default="", description="The name of the fixed SRPM file")
+ fixed_parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed fixed SRPM patch file")
+ reference_package_nvr: str = Field(
+ default="",
+ description="NVR (name-version-release) of the reference fixed package from intel",
+ )
+ reason_cve_code: str = Field(
+ default="",
+ description="Does the CVE description match the code which is vulnerable",
+ )
+ is_code_fixed_by_rebase: Literal["yes", "no", "unknown"] = Field(
+ default="unknown",
+ description="yes if the code is fixed by rebase",
+ )
+ spec_file_log_change: str = Field(
+ default="",
+ description="The log change of patch in the spec file",
+ )
+ spec_fixed_srpm_change: str = Field(
+ default="",
+ description="The change of the fixed SRPM in the spec file",
+ )
+ reason_code_fixed_by_rebase: str = Field(
+ default="",
+ description="The reason why the code is fixed by rebase",
+ )
+ osv_result: OSVPatchResult | None = Field(default=None, description="The result of the OSV patch retrieval")
+
+
+
+
+
+class ReflectionBase(BaseModel):
+ """Base schema for phase reports.
+
+ Subclasses add phase-specific fields on top.
+ """
+ instructions: str = Field(
+ description="Guidance to the generator for the next iteration.")
+ is_sufficient: bool = Field(
+ description="True if results are good enough to proceed.")
+
+
+# ---------------------------------------------------------------------------
+# Code Agent Report schema
+# ---------------------------------------------------------------------------
+
+
+class CodeSnippet(BaseModel):
+ """A code snippet from the investigation."""
+ file_path: str = Field(description="Path to the source file")
+ line_number: int | None = Field(default=None, description="Starting line number")
+ code: str = Field(description="The code content")
+ snippet_type: Literal["vulnerable", "fix", "context"] = Field(
+ description="Type of snippet: vulnerable code, fix code, or context")
+ source: Literal["downstream_patch", "upstream_patch", "source_search"] = Field(
+ description="Where this snippet came from")
+
+
+class CodeAgentReport(BaseModel):
+ """Final L1 Code Agent investigation report synthesizing all phases."""
+ justification_label: str = Field(
+ description=(
+ "Justification category aligned with VEX: one of "
+ "code_not_present, protected_by_mitigating_control, vulnerable, uncertain"
+ ))
+ executive_summary: str = Field(
+ description=(
+ "3-4 sentence synthesis. Must include: 1) Final verdict, "
+ "2) Technical nature of flaw, 3) Why L2 context overrides L1 (if applicable)."
+ ))
+ evidence_chain: list[str] = Field(
+ description="Ordered list of evidence items tracing the vulnerability through phases")
+ affected_files: list[str] = Field(
+ description="Source files where vulnerable code was identified")
+ patch_analysis: str | None = Field(
+ default=None,
+ description="Analysis of downstream patches if any were found")
+ code_snippets: list[CodeSnippet] = Field(
+ default_factory=list,
+ description="Structured code snippets showing vulnerable and fix code")
+ caveats: list[str] = Field(
+ default_factory=list,
+ description="Investigation gaps or uncertainties that may need manual review")
+
+ def to_markdown(
+ self,
+ vuln_id: str = "",
+ target_package: str = "",
+ version: str = "",
+ release: str = "",
+ downstream_report: DownstreamSearchReport | None = None,
+ ) -> str:
+ """Render the report as a formatted markdown string."""
+ lines: list[str] = []
+
+ # Header with title
+ lines.append("# L1 Code Agent Investigation Report")
+ lines.append("")
+
+ # Verdict banner based on justification label
+ verdict_map = {
+ "protected_by_mitigating_control": ("NOT VULNERABLE", "Protected by downstream patch"),
+ "protected_by_compiler": ("NOT VULNERABLE", "Protected by compiler hardening"),
+ "code_not_present": ("NOT VULNERABLE", "Vulnerable code not present"),
+ "code_not_reachable": ("NOT VULNERABLE", "Vulnerable code not reachable"),
+ "requires_environment": ("NOT VULNERABLE", "Requires specific environment"),
+ "vulnerable": ("VULNERABLE", "Package requires patching"),
+ "uncertain": ("UNCERTAIN", "Requires manual review"),
+ }
+ verdict_status, verdict_desc = verdict_map.get(
+ self.justification_label,
+ ("UNKNOWN", "Unknown status")
+ )
+
+ lines.append(f"> **Verdict: {verdict_status}** - {verdict_desc}")
+ lines.append("")
+
+ # Package information table
+ lines.append("## Package Information")
+ lines.append("")
+ lines.append("| Field | Value |")
+ lines.append("|-------|-------|")
+ if vuln_id:
+ lines.append(f"| **CVE ID** | `{vuln_id}` |")
+ if target_package:
+ lines.append(f"| **Package** | `{target_package}` |")
+ if version:
+ version_str = f"{version}-{release}" if release else version
+ lines.append(f"| **Version** | `{version_str}` |")
+ lines.append(f"| **Justification** | `{self.justification_label}` |")
+ lines.append("")
+
+ # Executive Summary
+ lines.append("---")
+ lines.append("")
+ lines.append("## Executive Summary")
+ lines.append("")
+ lines.append(self.executive_summary)
+ lines.append("")
+
+ # Evidence Chain
+ lines.append("---")
+ lines.append("")
+ lines.append("## Evidence Chain")
+ lines.append("")
+ lines.extend(_format_interleaved_evidence(
+ self.evidence_chain,
+ downstream_report,
+ ))
+
+ # Affected Files
+ if self.affected_files:
+ lines.append("---")
+ lines.append("")
+ lines.append("## Affected Files")
+ lines.append("")
+ # Separate source files from test files
+ source_files = [f for f in self.affected_files if "/test/" not in f and "test_" not in f]
+ test_files = [f for f in self.affected_files if "/test/" in f or "test_" in f]
+
+ if source_files:
+ lines.append("**Source files:**")
+ for f in source_files:
+ lines.append(f"- `{f}`")
+ lines.append("")
+
+ if test_files:
+ lines.append("**Test files:**")
+ for f in test_files:
+ lines.append(f"- `{f}`")
+ lines.append("")
+
+ # Patch Analysis
+ if self.patch_analysis:
+ lines.append("---")
+ lines.append("")
+ lines.append("## Patch Analysis")
+ lines.append("")
+ lines.append(self.patch_analysis)
+ lines.append("")
+
+ # Code Snippets - separate vulnerable from fix, prioritize main source files
+ if self.code_snippets:
+ lines.append("---")
+ lines.append("")
+ lines.append("## Code Comparison")
+ lines.append("")
+
+ # Filter and organize snippets
+ vuln_snippets = [s for s in self.code_snippets if s.snippet_type == "vulnerable"]
+ fix_snippets = [s for s in self.code_snippets if s.snippet_type == "fix"]
+
+ # Prioritize main source files (not test/build files)
+ def is_main_source(path: str) -> bool:
+ return "/test/" not in path and "test_" not in path and "Makefile" not in path and "CMakeLists" not in path
+
+ main_vuln = [s for s in vuln_snippets if is_main_source(s.file_path)]
+ main_fix = [s for s in fix_snippets if is_main_source(s.file_path)]
+
+ # Show main vulnerability code
+ if main_vuln:
+ lines.append("### Vulnerable Code")
+ lines.append("")
+ for snippet in main_vuln[:2]:
+ file_name = snippet.file_path.split("/")[-1]
+ lines.append(f"**File:** `{file_name}` (Line {snippet.line_number or 'N/A'})")
+ lines.append("")
+ lines.append("```c")
+ lines.append(snippet.code.strip())
+ lines.append("```")
+ lines.append("")
+
+ # Show fix code
+ if main_fix:
+ lines.append("### Fix Code")
+ lines.append("")
+ for snippet in main_fix[:2]:
+ file_name = snippet.file_path.split("/")[-1]
+ lines.append(f"**File:** `{file_name}` (Line {snippet.line_number or 'N/A'})")
+ lines.append("")
+ lines.append("```c")
+ lines.append(snippet.code.strip())
+ lines.append("```")
+ lines.append("")
+
+ # Show other snippets (test/build files) in collapsible section if any
+ other_vuln = [s for s in vuln_snippets if not is_main_source(s.file_path)]
+ other_fix = [s for s in fix_snippets if not is_main_source(s.file_path)]
+
+ if other_vuln or other_fix:
+ lines.append("")
+ lines.append("Additional Changes (Test/Build Files)
")
+ lines.append("")
+ for snippet in other_vuln + other_fix:
+ file_name = snippet.file_path.split("/")[-1]
+ lines.append(f"**{snippet.snippet_type.title()}** - `{file_name}`")
+ lines.append("")
+ lines.append("```")
+ lines.append(snippet.code.strip())
+ lines.append("```")
+ lines.append("")
+ lines.append(" ")
+ lines.append("")
+
+ # Caveats
+ if self.caveats:
+ lines.append("---")
+ lines.append("")
+ lines.append("## Caveats")
+ lines.append("")
+ for caveat in self.caveats:
+ lines.append(f"- {caveat}")
+ lines.append("")
+
+ # Footer
+ lines.append("---")
+ lines.append("")
+ lines.append("*Report generated by L1 Code Agent*")
+
+ return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Prompt templates
+# ---------------------------------------------------------------------------
+
+L1_VERDICT_EXTRACTION_PROMPT = """\
+Extract the security verdict from this L1 agent investigation conclusion.
+
+CVE: {vuln_id}
+Package: {target_package}
+
+L1 Agent Final Answer:
+{final_answer}
+
+Classify the conclusion into one of these categories:
+- "protected": The package is protected (patch applied, fix backported, or mitigating control present)
+- "not_present": The vulnerable code/function is not present in this version
+- "vulnerable": The vulnerable code is confirmed present and unpatched
+- "uncertain": Insufficient evidence or conflicting findings
+
+Provide your confidence level (0.0-1.0) based on the strength of evidence in the answer.
+"""
+
+VULNERABILITY_INTEL_EXTRACTION_PROMPT = """\
+Extract structured vulnerability intelligence from the CVE data and patch content.
+Your output will be used to guide source code searches, so focus on grep-able patterns.
+
+
+CVE ID: {vuln_id}
+Package: {target_package}
+CVE Description: {cve_description}
+
+
+
+{patch_data}
+
+
+
+1. affected_files: Extract file paths from patch headers (strip a/ b/ prefixes)
+2. vulnerable_functions: Extract function names from:
+ - Removed lines (- lines) in patch
+ - Function names mentioned in CVE description
+3. vulnerable_variables: Extract variable names from removed lines that are key to the vulnerability
+4. vulnerable_patterns: Extract distinctive code snippets from removed lines (- lines)
+ - Focus on patterns that can be grepped
+ - Include enough context to be unique
+5. fix_patterns: Extract distinctive code snippets from added lines (+ lines)
+ - These indicate the fix is present
+6. root_cause: Explain WHY the code is vulnerable in 1-2 sentences
+7. vulnerability_type: Classify as one of: buffer_overflow, integer_overflow, use_after_free,
+ null_deref, format_string, race_condition, path_traversal, injection, other
+8. search_keywords: List 3-5 grep patterns ordered by specificity:
+ - Start with most specific (unique variable/function names)
+ - End with broader patterns (file names, component names)
+9. affected_architectures: Determine which CPU architectures are affected:
+ - "32-bit": Only 32-bit systems affected (look for phrases like "32-bit systems", "i386", "i686", "on 32-bit", "64-bit systems are not affected")
+ - "64-bit": Only 64-bit systems affected (rare, look for "64-bit only", "x86_64 only")
+ - "both": Both architectures affected (DEFAULT - use when not explicitly stated otherwise)
+ NOTE: Do NOT assume an architecture based on the vulnerability type. Default to "both" unless explicitly stated.
+
+
+
+- If no patch is provided, extract what you can from the CVE description
+- For search_keywords, prefer identifiers over natural language
+- Patterns should be grep-friendly (avoid regex special chars unless escaped)
+
+"""
+
+
+def format_patch_data_for_intel(
+ parsed_patch: ParsedPatch | None
+) -> str:
+ """Format patch and CVE data for intelligence extraction.
+
+ Parameters
+ ----------
+ parsed_patch:
+ Parsed patch file structure (may be None if no patch available).
+ cve_description:
+ CVE description text from advisories.
+
+ Returns
+ -------
+ str
+ Formatted string suitable for the VULNERABILITY_INTEL_EXTRACTION_PROMPT.
+ """
+ if not parsed_patch:
+ return ""
+
+ lines = [f"Patch: {parsed_patch.patch_filename}", ""]
+ for pf in parsed_patch.files:
+ lines.append(f"File: {pf.target_path}")
+ for hunk in pf.hunks:
+ if hunk.removed_lines:
+ lines.append(" Removed (vulnerable):")
+ for line in hunk.removed_lines[:10]:
+ lines.append(f" - {line}")
+ if len(hunk.removed_lines) > 10:
+ lines.append(f" ... (+{len(hunk.removed_lines) - 10} more lines)")
+ if hunk.added_lines:
+ lines.append(" Added (fix):")
+ for line in hunk.added_lines[:10]:
+ lines.append(f" + {line}")
+ if len(hunk.added_lines) > 10:
+ lines.append(f" ... (+{len(hunk.added_lines) - 10} more lines)")
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+CODE_AGENT_REPORT_PROMPT = """\
+
+You are a security analyst generating the final Code Agent investigation report.
+Synthesize the results from the target package analysis, additional intel (target rebase check + reference package), L1 agent analysis,
+and optionally L2 build analysis into a comprehensive, auditable report with a clear
+justification and supporting evidence.
+
+
+
+CVE: {vuln_id}
+Target Package: {target_package}
+CVE Description: {cve_description}
+
+
+{policy_context_section}
+
+## Target Package Analysis
+(Checked the target package being scanned for CVE-specific patch files)
+{downstream_section}
+
+## Additional Intel (Target Rebase + Reference Package)
+This section contains TWO distinct checks:
+- TARGET REBASE CHECK: Searched the target package's spec file for CVE mentions indicating a rebase fix
+- REFERENCE PACKAGE: Downloaded a known-fixed package version from intel to extract patch patterns
+{upstream_section}
+
+## L1 Agent Analysis
+{l1_agent_section}
+
+
+{l2_context_section}
+
+Generate a structured report following these requirements:
+
+1. JUSTIFICATION LABEL (select the most appropriate):
+ - code_not_present: Vulnerable code/function is absent from this package version
+ - code_not_reachable: Code exists but cannot be reached/executed in this context
+ - protected_by_mitigating_control: Downstream patch or backport mitigates the vulnerability
+ - protected_by_compiler: Compiler hardening flags mitigate the vulnerability
+ - requires_environment: Vulnerability requires specific environment conditions not present
+ - vulnerable: Package is actually vulnerable and needs patching
+ - uncertain: Insufficient information to determine exploitability
+
+ PRECEDENCE RULES (L2 overrides L1 when L2_BUILD_CONTEXT is present):
+ - L2 analyzes actual compiled binaries; treat L2 findings as ground truth for exploitability.
+ - If L2 verdict is "not_vulnerable" with architecture mismatch → use "requires_environment"
+ - If L2 verdict is "not_vulnerable" with compilation_status="not_compiled" → use "code_not_present"
+ - If L2 verdict is "vulnerable_mitigated" with hardening flags → use "protected_by_compiler"
+ - Do NOT state "vulnerable" if L2 evidence contradicts it. Instead explain:
+ "While source contains vulnerable patterns, the build is not affected due to [L2 reason]."
+
+ PRECEDENCE RULES (when no L2 context):
+ - If a CVE-specific patch file exists AND is applied in build, use "protected_by_mitigating_control".
+ - If L1 agent found the fix code in source, use "protected_by_mitigating_control".
+ - If L1 agent found vulnerable code pattern still present, use "vulnerable".
+ - If upstream shows rebase fixed the issue, use "protected_by_mitigating_control".
+ - Only use "uncertain" when evidence is conflicting or insufficient.
+
+2. EVIDENCE CHAIN:
+ - Start with target package patch availability
+ - Include target rebase findings (if CVE mentioned in target's spec changelog, this is from the TARGET package)
+ - Include reference package findings (if a known-fixed package from intel was used for comparison)
+ - Include code analysis findings (patch targets, vulnerable vs fix patterns)
+ - Reference specific files, line numbers, and code snippets
+ - Summarize findings; the rendered report places an "Extracted facts" section **after** the Evidence chain with verbatim spec Patch lines, changelog hits, and build log lines (when available)—do not invent `PatchN:` numbers or spec quotes; only state patch indices you could derive from the investigation text below, or point readers to *Extracted facts* for exact lines
+
+ PHRASING GUIDANCE for code analysis findings:
+ - GOOD: "Code analysis verified that the patch modifies `filename.c` to address the vulnerability"
+ - GOOD: "Patch targets the `function_name()` function in `filename.c`"
+ - BAD: "L1 agent found the fix code in the source" (ambiguous - implies fix already exists)
+ - BAD: "Found fix in source" (unclear what was found)
+ - Use active voice: "The patch adds validation..." not "Validation was found..."
+
+3. CODE SNIPPETS:
+ - Extract key code snippets from patches showing vulnerable and fix code
+ - Include file paths and line numbers
+ - Mark each snippet as "vulnerable", "fix", or "context"
+ - When target package or reference package investigation includes a parsed patch, code_snippets may be filled programmatically from that patch; use an empty code_snippets list if you do not have verbatim lines to copy.
+ - Always populate affected_files with CVE-relevant source paths so patch hunks can be prioritized.
+
+4. EXECUTIVE SUMMARY (3-4 sentences, scenario-aware):
+
+ When L2_BUILD_CONTEXT is present (L2 override scenario):
+ - Sentence 1 (Verdict): State final posture clearly (e.g., "NOT vulnerable due to environmental constraints")
+ - Sentence 2 (Technical Context): Describe the nature of the flaw from CVE description (e.g., "integer overflow in zisofs allocation") and why L2 negates it (e.g., "64-bit addressing prevents the overflow condition")
+ - Sentence 3 (Reconciliation): Explain why L1 found code but L2 says safe (e.g., "Vulnerable patterns exist in source but are inert on this architecture")
+
+ When NO L2 context (standard L1 scenarios):
+ - Sentence 1: State verdict (protected/vulnerable/uncertain)
+ - Sentence 2: Technical nature of flaw and how it was addressed (patch) or why it's exploitable
+ - Sentence 3: Any additional context from investigation results
+
+ Do NOT invent RHSA IDs, function names, or technical details not present in the context.
+
+5. PATCH ANALYSIS (semantic fix narrative):
+ - When target package patch or reference patch evidence exists, briefly describe **what** the fix does: name the function(s) or file(s) and the nature of the change (e.g. "adds range validation 15–17 in parse_rockridge_ZF1").
+ - Derive this from Target Package Analysis, Reference Intel, patch file names, or L1 agent code excerpts—do NOT invent code or function names absent from investigation results.
+
+6. DELIVERY MODEL:
+ - When a CVE-named patch file is present, explicitly note that the fix is carried as a separate `%patch` directive while the upstream tarball (`Source0`) version may remain unchanged.
+ - Encourage citing "Extracted facts" for exact spec `PatchN:` and `Source0`/`Version` lines when shown below.
+
+7. CAVEATS (optional):
+ - Note any missing data (no patch file, no build log, etc.)
+ - Flag low-confidence findings that may need manual review
+ - Leave empty if no significant gaps exist
+
+
+
+Provide a structured JSON response with:
+- justification_label: one of the labels above
+- executive_summary: 3-4 sentence summary (see Instruction #4 for structure)
+- evidence_chain: list of evidence items in logical order
+- affected_files: list of source files involved
+- patch_analysis: analysis of patches (or null if none)
+- code_snippets: list of code snippets with file_path, line_number, code, snippet_type, source (may be overwritten from the downstream patch when one is parsed)
+- caveats: list of investigation gaps or uncertainties (empty list if none)
+
+Ensure all code snippets and special characters within JSON string values are properly escaped
+(e.g., quotes as \", backslashes as \\, newlines as \\n) to maintain valid JSON format.
+
+"""
+
+
+# ---------------------------------------------------------------------------
+# Report formatting helpers
+# ---------------------------------------------------------------------------
+
+MAX_SNIPPET_CHARS = 500
+L1_EXTRACTED_FACTS_EXCERPT_CHARS = 2000
+
+
+def _cap_text_excerpt(text: str, max_chars: int) -> tuple[str, bool]:
+ """Return (possibly truncated) text and whether truncation occurred."""
+ t = text.strip()
+ if len(t) <= max_chars:
+ return t, False
+ return t[: max_chars] + "\n[… truncated …]", True
+
+
+def _format_interleaved_evidence(
+ evidence_chain: list[str],
+ downstream_report: DownstreamSearchReport | None,
+ *,
+ max_excerpt: int = L1_EXTRACTED_FACTS_EXCERPT_CHARS,
+) -> list[str]:
+ """Build audit-ready markdown for the Evidence Chain section.
+
+ Structure follows the 3-pillar model for TARGET package verification:
+ - Status Summary table for at-a-glance verification
+ - Target Patch Metadata (the "What")
+ - Integration Evidence (the "Plan" - spec file directives)
+ - Execution Evidence (the "Action" - build logs)
+ - Source Validation (the "Result" - L1 agent findings)
+ """
+ lines: list[str] = []
+
+ if downstream_report is None:
+ for ev in evidence_chain:
+ lines.append(f"- {ev}")
+ return lines
+
+ d = downstream_report
+
+ # Categorize evidence items by keywords
+ patch_evidence: list[str] = []
+ build_evidence: list[str] = []
+ code_evidence: list[str] = []
+ other_evidence: list[str] = []
+
+ patch_keywords = ("patch", "spec", "patchn", "directive", "target", "reference")
+ build_keywords = ("build", "applied", "log")
+ code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent")
+
+ for ev in evidence_chain:
+ ev_lower = ev.lower()
+ if any(kw in ev_lower for kw in patch_keywords):
+ patch_evidence.append(ev)
+ elif any(kw in ev_lower for kw in build_keywords):
+ build_evidence.append(ev)
+ elif any(kw in ev_lower for kw in code_keywords):
+ code_evidence.append(ev)
+ else:
+ other_evidence.append(ev)
+
+ # Status Summary - at-a-glance verification of TARGET package (using bullets for UI compatibility)
+ lines.append("### Status Summary (Target Package)")
+ lines.append("")
+ patch_check = "PASS" if d.is_patch_file_available else "FAIL"
+ spec_check = "PASS" if d.is_patch_in_spec_file else "FAIL"
+ build_check = "PASS" if d.is_patch_applied_in_build else "FAIL"
+ lines.append(f"- **Target patch file exists:** {patch_check}")
+ lines.append(f"- **Referenced in target spec:** {spec_check}")
+ lines.append(f"- **Applied in target build:** {build_check}")
+ lines.append("")
+
+ # Section 1: Target Patch Metadata
+ if d.patch_file_name or patch_evidence:
+ lines.append("### 1. Patch Metadata")
+ lines.append("")
+ if d.patch_file_name:
+ lines.append(f"- **Target patch file:** `{d.patch_file_name}`")
+ for ev in patch_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ # Section 2: Integration Evidence (Spec File) - the "Plan"
+ has_integration = d.spec_patch_directives_for_cve or d.spec_changelog_cve_lines.strip()
+ if has_integration:
+ lines.append("### 2. Integration Evidence (Spec File)")
+ lines.append("")
+
+ if d.spec_patch_directives_for_cve:
+ # Split directives into declaration and application
+ declarations = [line for line in d.spec_patch_directives_for_cve
+ if line.strip().startswith("Patch")]
+ applications = [line for line in d.spec_patch_directives_for_cve
+ if line.strip().startswith("%patch")]
+
+ if declarations:
+ lines.append("**Patch declaration:**")
+ lines.append("")
+ lines.append("```ini")
+ lines.append("\n".join(declarations))
+ lines.append("```")
+ lines.append("")
+
+ if applications:
+ lines.append("**Patch application directive:**")
+ lines.append("")
+ lines.append("```ini")
+ lines.append("\n".join(applications))
+ lines.append("```")
+ lines.append("")
+
+ if d.spec_changelog_cve_lines.strip():
+ ex, trunc = _cap_text_excerpt(d.spec_changelog_cve_lines, max_excerpt)
+ hdr = "**Changelog entry:**"
+ if trunc:
+ hdr += " *(truncated)*"
+ lines.append(hdr)
+ lines.append("")
+ lines.append("```ini")
+ lines.append(ex)
+ lines.append("```")
+ lines.append("")
+
+ # Section 3: Execution Evidence (Build Log) - the "Action"
+ if d.build_log_patch_applied.strip() or build_evidence:
+ lines.append("### 3. Execution Evidence (Build Log)")
+ lines.append("")
+
+ for ev in build_evidence:
+ lines.append(f"- {ev}")
+ if build_evidence:
+ lines.append("")
+
+ if d.build_log_patch_applied.strip():
+ ex, trunc = _cap_text_excerpt(d.build_log_patch_applied, max_excerpt)
+ if trunc:
+ lines.append("**Build output:** *(truncated)*")
+ else:
+ lines.append("**Build output:**")
+ lines.append("")
+ lines.append("```bash")
+ lines.append(ex)
+ lines.append("```")
+ lines.append("")
+
+ # Section 4: Source Validation - the "Result"
+ if code_evidence:
+ lines.append("### 4. Source Validation")
+ lines.append("")
+ for ev in code_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ # Section 5: Tarball Reference
+ if d.spec_version_line or d.spec_source0_line:
+ lines.append("### 5. Tarball Reference")
+ lines.append("")
+ if d.spec_version_line:
+ lines.append(f"- `{d.spec_version_line}`")
+ if d.spec_source0_line:
+ lines.append(f"- `{d.spec_source0_line}`")
+ lines.append("")
+
+ # Additional evidence (uncategorized)
+ if other_evidence:
+ lines.append("### Additional Evidence")
+ lines.append("")
+ for ev in other_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ return lines
+
+
+def _format_extracted_facts_section(
+ d: DownstreamSearchReport,
+ *,
+ max_excerpt: int = L1_EXTRACTED_FACTS_EXCERPT_CHARS,
+) -> list[str]:
+ """Build markdown lines for the deterministic *Extracted facts* block.
+
+ .. deprecated::
+ This function is deprecated. Use `_format_interleaved_evidence()` instead,
+ which merges Evidence Chain and Extracted facts into a single interleaved
+ section for better readability.
+ """
+ warnings.warn(
+ "_format_extracted_facts_section is deprecated. "
+ "Use _format_interleaved_evidence() instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ lines: list[str] = [
+ "## Extracted facts",
+ "",
+ "*Verbatim excerpts from spec/build grep and parsers. Narrative sections below are model-generated.*",
+ "",
+ ]
+ lines.append(f"- **Downstream patch file found:** {d.is_patch_file_available}")
+ if d.patch_file_name:
+ lines.append(f"- **Patch file name:** `{d.patch_file_name}`")
+ lines.append(f"- **Patch referenced in spec (CVE grep):** {d.is_patch_in_spec_file}")
+ lines.append(f"- **Build log shows CVE / patch application:** {d.is_patch_applied_in_build}")
+ lines.append("")
+
+ if d.spec_patch_directives_for_cve:
+ lines.append("**Spec `PatchN:` line(s) whose patch filename contains this CVE:**")
+ block = "\n".join(d.spec_patch_directives_for_cve)
+ lines.extend(["", "```", block, "```", ""])
+ else:
+ lines.extend(["**Spec `PatchN:` line(s) whose patch filename contains this CVE:** *None found*", ""])
+
+ if d.spec_changelog_cve_lines.strip():
+ ex, trunc = _cap_text_excerpt(d.spec_changelog_cve_lines, max_excerpt)
+ sub = f" (truncated to ~{max_excerpt} chars)" if trunc else ""
+ lines.append(f"**%changelog line(s) mentioning this CVE:**{sub}")
+ lines.extend(["", "```", ex, "```", ""])
+ else:
+ lines.extend(["**%changelog line(s) mentioning this CVE:** *No matching lines* ", ""])
+
+ if d.spec_file_log_change.strip():
+ ex, trunc = _cap_text_excerpt(d.spec_file_log_change, max_excerpt)
+ hdr = "**All spec lines matching CVE grep (may include Patch, changelog, comments):**"
+ if trunc:
+ hdr += f" *({max_excerpt} char excerpt)*"
+ lines.append(hdr)
+ lines.extend(["", "```", ex, "```", ""])
+ else:
+ lines.extend(["**All spec lines matching CVE grep:** *None*", ""])
+
+ if d.build_log_patch_applied.strip():
+ ex, trunc = _cap_text_excerpt(d.build_log_patch_applied, max_excerpt)
+ hdr = "**Build log line(s) matching CVE grep:**"
+ if trunc:
+ hdr += f" *({max_excerpt} char excerpt)*"
+ lines.append(hdr)
+ lines.extend(["", "```", ex, "```", ""])
+ else:
+ lines.extend(["**Build log line(s) matching CVE grep:** *None or build log not available* ", ""])
+
+ # Spec tarball reference (Source0/Version) for delivery-model context
+ if d.spec_version_line or d.spec_source0_line:
+ lines.append("**Spec tarball reference:**")
+ if d.spec_version_line:
+ lines.append(f"- `{d.spec_version_line}`")
+ if d.spec_source0_line:
+ lines.append(f"- `{d.spec_source0_line}`")
+ lines.append("")
+
+ return lines
+
+
+def _format_downstream_for_report(report: DownstreamSearchReport | None) -> str:
+ """Format target package analysis results for prompt injection.
+
+ This section reports whether the TARGET package (the one being scanned)
+ contains a CVE-specific patch file.
+ """
+ if report is None:
+ return "Target package analysis did not produce results."
+
+ lines = []
+ lines.append(f"**Target Package Patch Available:** {report.is_patch_file_available}")
+
+ if report.is_patch_file_available:
+ lines.append(f"**Target Patch File:** `{report.patch_file_name}`")
+ lines.append(f"**Referenced in Spec:** {report.is_patch_in_spec_file}")
+ if report.spec_file_log_change:
+ lines.append(f"**Target Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```")
+ lines.append(f"**Applied in Build:** {report.is_patch_applied_in_build}")
+ if report.build_log_patch_applied:
+ lines.append(f"**Build Log Evidence:**\n```\n{report.build_log_patch_applied[:500]}\n```")
+
+ if report.parsed_patch:
+ lines.append(f"\n**Parsed Patch ({len(report.parsed_patch.files)} files):**")
+ for pf in report.parsed_patch.files[:5]:
+ added = sum(len(h.added_lines) for h in pf.hunks)
+ removed = sum(len(h.removed_lines) for h in pf.hunks)
+ lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)")
+ if len(report.parsed_patch.files) > 5:
+ lines.append(f" (+{len(report.parsed_patch.files) - 5} more files)")
+ else:
+ lines.append("No CVE-specific patch file found in target package.")
+
+ return "\n".join(lines)
+
+
+def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str:
+ """Format reference intel gathering results for prompt injection.
+
+ This section reports TWO distinct pieces of information:
+ 1. Rebase indicator: Checked TARGET's spec file for CVE mention
+ 2. Reference package: Downloaded a known-fixed package from intel to extract patch patterns
+ """
+ if report is None:
+ return "Reference intel gathering did not produce results."
+
+ lines = []
+
+ # Part 1: Rebase indicator (checked in TARGET's spec file)
+ rebase_status = report.is_code_fixed_by_rebase
+ if rebase_status == "unknown":
+ lines.append("**Target Rebase Indicator:** not found (no CVE mention in target's spec file)")
+ elif rebase_status == "yes":
+ lines.append("**Target Rebase Indicator:** found (CVE mentioned in target's spec changelog)")
+ else:
+ lines.append(f"**Target Rebase Indicator:** {rebase_status}")
+
+ if report.spec_file_log_change:
+ lines.append(f"**Target Spec Changelog Match:**\n```\n{report.spec_file_log_change[:500]}\n```")
+
+ # Part 2: Reference package (downloaded from intel for comparison)
+ if report.is_fixed_srpm_is_needed:
+ if report.reference_package_nvr:
+ lines.append(f"**Reference Fixed Package:** `{report.reference_package_nvr}` (from intel)")
+ else:
+ lines.append(f"**Reference Fixed Package:** Available (from intel)")
+ lines.append(f"**Reference Patch File:** `{report.fixed_srpm_file_name}`")
+ if report.fixed_parsed_patch:
+ lines.append(f"\n**Reference Patch ({len(report.fixed_parsed_patch.files)} files):**")
+ for pf in report.fixed_parsed_patch.files[:5]:
+ added = sum(len(h.added_lines) for h in pf.hunks)
+ removed = sum(len(h.removed_lines) for h in pf.hunks)
+ lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)")
+
+
+ if report.reason_code_fixed_by_rebase:
+ lines.append(f"\n**Rebase Reasoning:** {report.reason_code_fixed_by_rebase}")
+
+ return "\n".join(lines)
+
+
+def _format_l2_for_report(l2_result: L2BuildResult | None) -> str:
+ """Format L2 Build Agent results for prompt injection.
+
+ When L2 results are present and contain an override verdict, this produces
+ a structured context block that instructs the LLM to treat L2 findings as
+ ground truth for exploitability (since L2 analyzes actual compiled binaries).
+
+ Returns an empty string if L2 results are None or have no override verdict.
+ """
+ if l2_result is None or l2_result.l2_override_verdict is None:
+ return ""
+
+ lines = [
+ "",
+ f"**L2 Verdict:** {l2_result.l2_override_verdict}",
+ f"**Compilation Status:** {l2_result.compilation_status}",
+ ]
+
+ if l2_result.compilation_evidence:
+ lines.append(f"**Evidence:** {l2_result.compilation_evidence}")
+
+ if l2_result.hardening_flags:
+ flags_str = ", ".join(l2_result.hardening_flags[:10])
+ if len(l2_result.hardening_flags) > 10:
+ flags_str += f" (+{len(l2_result.hardening_flags) - 10} more)"
+ lines.append(f"**Hardening Flags:** {flags_str}")
+
+ if l2_result.hardening_rationale:
+ lines.append(f"**Hardening Rationale:** {l2_result.hardening_rationale}")
+
+ if l2_result.hardening_relevant is not None:
+ lines.append(f"**Hardening Relevant to CVE:** {l2_result.hardening_relevant}")
+
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report generation pipeline
+# ---------------------------------------------------------------------------
+
+MAX_REPORT_CODE_SNIPPETS_VULNERABLE = 3
+MAX_REPORT_CODE_SNIPPETS_FIX = 3
+
+
+def _normalize_snippet_path(path: str) -> str:
+ """Stable comparison key for patch vs affected file paths."""
+ p = path.strip().replace("\\", "/")
+ while p.startswith("./"):
+ p = p[2:]
+ if p.startswith("ab/"):
+ p = p[3:]
+ return p.lower()
+
+
+def _snippet_matches_any_affected_path(snippet_path: str, affected_files: list[str]) -> bool:
+ if not affected_files:
+ return False
+ norm_snip = _normalize_snippet_path(snippet_path)
+ snip_base = Path(snippet_path).name.lower()
+ for af in affected_files:
+ norm_af = _normalize_snippet_path(af)
+ if norm_snip == norm_af:
+ return True
+ if snip_base and snip_base == Path(af).name.lower():
+ return True
+ if norm_snip.endswith(norm_af) or norm_af.endswith(norm_snip):
+ return True
+ return False
+
+
+def _rank_patch_snippets_for_relevance(
+ snippets: list[CodeSnippet],
+ affected_files: list[str],
+) -> list[CodeSnippet]:
+ """Paths matching affected_files first; preserve original order within each bucket."""
+ if not affected_files:
+ return list(snippets)
+ indexed = list(enumerate(snippets))
+ indexed.sort(
+ key=lambda pair: (
+ 0 if _snippet_matches_any_affected_path(pair[1].file_path, affected_files) else 1,
+ pair[0],
+ ),
+ )
+ return [s for _, s in indexed]
+
+
+def _cap_snippets_by_type(
+ snippets: list[CodeSnippet],
+ *,
+ max_vulnerable: int = MAX_REPORT_CODE_SNIPPETS_VULNERABLE,
+ max_fix: int = MAX_REPORT_CODE_SNIPPETS_FIX,
+) -> list[CodeSnippet]:
+ """Keep insertion order; at most max_vulnerable vulnerable and max_fix fix snippets."""
+ n_vuln = n_fix = 0
+ out: list[CodeSnippet] = []
+ for s in snippets:
+ if s.snippet_type == "vulnerable":
+ if n_vuln >= max_vulnerable:
+ continue
+ n_vuln += 1
+ out.append(s)
+ elif s.snippet_type == "fix":
+ if n_fix >= max_fix:
+ continue
+ n_fix += 1
+ out.append(s)
+ else:
+ out.append(s)
+ return out
+
+
+def _extract_downstream_patch_code_snippets(
+ downstream_report: DownstreamSearchReport | None,
+) -> list[CodeSnippet]:
+ """Extract vulnerable/fix snippets from the downstream parsed patch only.
+
+ For purely additive patches (no removed lines), shows context lines
+ as "vulnerable" since they represent the code lacking the fix.
+ """
+ if not downstream_report or not downstream_report.parsed_patch:
+ return []
+ snippets: list[CodeSnippet] = []
+ for pf in downstream_report.parsed_patch.files:
+ for hunk in pf.hunks:
+ if hunk.removed_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.source_start,
+ code="\n".join(hunk.removed_lines[:10]),
+ snippet_type="vulnerable",
+ source="downstream_patch",
+ ))
+ elif hunk.context_lines and hunk.added_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.source_start,
+ code="\n".join(hunk.context_lines[:10]),
+ snippet_type="vulnerable",
+ source="downstream_patch",
+ ))
+ if hunk.added_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.target_start,
+ code="\n".join(hunk.added_lines[:10]),
+ snippet_type="fix",
+ source="downstream_patch",
+ ))
+ return snippets
+
+
+def _extract_code_snippets(
+ downstream_report: DownstreamSearchReport | None,
+ upstream_report: UpstreamSearchReport | None,
+) -> list[CodeSnippet]:
+ """Extract code snippets from parsed patches.
+
+ For purely additive patches (no removed lines), shows context lines
+ as "vulnerable" since they represent the code lacking the fix.
+ """
+ snippets: list[CodeSnippet] = _extract_downstream_patch_code_snippets(downstream_report)
+
+ if upstream_report and upstream_report.fixed_parsed_patch:
+ for pf in upstream_report.fixed_parsed_patch.files:
+ for hunk in pf.hunks:
+ if hunk.removed_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.source_start,
+ code="\n".join(hunk.removed_lines[:10]),
+ snippet_type="vulnerable",
+ source="upstream_patch",
+ ))
+ elif hunk.context_lines and hunk.added_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.source_start,
+ code="\n".join(hunk.context_lines[:10]),
+ snippet_type="vulnerable",
+ source="upstream_patch",
+ ))
+ if hunk.added_lines:
+ snippets.append(CodeSnippet(
+ file_path=pf.target_path.lstrip("ab/"),
+ line_number=hunk.target_start,
+ code="\n".join(hunk.added_lines[:10]),
+ snippet_type="fix",
+ source="upstream_patch",
+ ))
+
+ return snippets
+
+
+async def extract_l1_verdict(
+ llm,
+ vuln_id: str,
+ target_package: str,
+ final_answer: str,
+ tracer,
+) -> L1VerdictExtraction:
+ """Use LLM to extract structured verdict from L1 agent's final answer.
+
+ Parameters
+ ----------
+ llm:
+ LangChain LLM for verdict extraction.
+ vuln_id:
+ CVE identifier (e.g. "CVE-2026-5121").
+ target_package:
+ Name of the package being investigated.
+ final_answer:
+ The L1 agent's final answer text.
+ tracer:
+ Request-scoped tracing context.
+
+ Returns
+ -------
+ L1VerdictExtraction
+ Structured verdict with confidence and reasoning.
+ """
+ verdict_llm = llm.with_structured_output(L1VerdictExtraction)
+ prompt = L1_VERDICT_EXTRACTION_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package,
+ final_answer=final_answer,
+ )
+ with tracer.push_active_function("extract_l1_verdict", input_data={"vuln_id": vuln_id}) as span:
+ result = await verdict_llm.ainvoke([SystemMessage(content=prompt)])
+ span.set_output({
+ "preliminary_verdict": result.preliminary_verdict,
+ "confidence": result.confidence,
+ })
+ logger.info(
+ "extract_l1_verdict: verdict=%s confidence=%.2f",
+ result.preliminary_verdict, result.confidence,
+ )
+ return result
+
+
+async def generate_code_agent_report(
+ *,
+ llm,
+ vuln_id: str,
+ target_package: str,
+ descriptions: list[tuple[str, str]],
+ downstream_report: DownstreamSearchReport | None,
+ upstream_report: UpstreamSearchReport | None,
+ l1_agent_answer: str | None,
+ tracer,
+ policy_context: str = "",
+ l2_result: L2BuildResult | None = None,
+) -> CodeAgentReport:
+ """Generate the final L1 Code Agent investigation report.
+
+ Synthesizes results from downstream search, upstream search, L1 agent analysis,
+ and optionally L2 build analysis into a comprehensive, auditable report with
+ a clear verdict.
+
+ Parameters
+ ----------
+ llm:
+ LangChain LLM for report generation.
+ vuln_id:
+ CVE identifier (e.g. "CVE-2026-5121").
+ target_package:
+ Name of the package being investigated.
+ descriptions:
+ ``(source_name, text)`` pairs from CVE intel.
+ downstream_report:
+ Output of downstream search (may be None).
+ upstream_report:
+ Output of upstream search (may be None).
+ l1_agent_answer:
+ Final answer from the L1 ReAct agent (may be None).
+ tracer:
+ Request-scoped tracing context.
+ policy_context:
+ Pre-formatted NVR posture and RHSA excerpt context for the LLM prompt.
+ l2_result:
+ Output of L2 build analysis (may be None). When present, L2 verdicts
+ override L1 findings as L2 analyzes actual compiled binaries.
+
+ Returns
+ -------
+ CodeAgentReport
+ Structured report with verdict, evidence, and recommendations.
+ """
+ from langchain_core.messages import HumanMessage, SystemMessage
+
+ cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions)
+
+ downstream_section = _format_downstream_for_report(downstream_report)
+ upstream_section = _format_upstream_for_report(upstream_report)
+ l1_agent_section = l1_agent_answer or "L1 agent did not produce a final answer."
+ l2_context_section = _format_l2_for_report(l2_result)
+
+ if policy_context:
+ policy_context_section = (
+ "\n"
+ + policy_context
+ + "\n\n"
+ )
+ else:
+ policy_context_section = ""
+
+ prompt_text = CODE_AGENT_REPORT_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package,
+ cve_description=cve_description,
+ policy_context_section=policy_context_section,
+ downstream_section=downstream_section,
+ upstream_section=upstream_section,
+ l1_agent_section=l1_agent_section,
+ l2_context_section=l2_context_section,
+ )
+
+ report_llm = llm.with_structured_output(CodeAgentReport)
+
+ has_l2_override = l2_result is not None and l2_result.l2_override_verdict is not None
+ with tracer.push_active_function(
+ "generate_report",
+ input_data={
+ "vuln_id": vuln_id,
+ "target_package": target_package,
+ "has_downstream_patch": downstream_report.is_patch_file_available if downstream_report else False,
+ "has_upstream_patch": upstream_report.is_fixed_srpm_is_needed if upstream_report else False,
+ "has_l1_answer": l1_agent_answer is not None,
+ "has_l2_override": has_l2_override,
+ },
+ ) as span:
+ messages = [
+ SystemMessage(content=prompt_text),
+ HumanMessage(content="Generate the report."),
+ ]
+ report: CodeAgentReport = await report_llm.ainvoke(messages)
+
+ snippet_source = "unchanged"
+ downstream_patch_snippet_count_pre_cap = 0
+ if downstream_report and downstream_report.parsed_patch:
+ raw = _extract_downstream_patch_code_snippets(downstream_report)
+ downstream_patch_snippet_count_pre_cap = len(raw)
+ ranked = _rank_patch_snippets_for_relevance(raw, report.affected_files)
+ report.code_snippets = _cap_snippets_by_type(ranked)
+ snippet_source = "downstream_patch"
+ elif upstream_report and upstream_report.fixed_parsed_patch:
+ raw = _extract_code_snippets(downstream_report, upstream_report)
+ ranked = _rank_patch_snippets_for_relevance(raw, report.affected_files)
+ report.code_snippets = _cap_snippets_by_type(ranked)
+ snippet_source = "upstream_patch"
+ elif not report.code_snippets:
+ report.code_snippets = _extract_code_snippets(downstream_report, upstream_report)
+
+ span.set_output({
+ "justification_label": report.justification_label,
+ "affected_files_count": len(report.affected_files),
+ "caveats_count": len(report.caveats),
+ "code_snippets_count": len(report.code_snippets),
+ "snippet_source": snippet_source,
+ "downstream_patch_snippet_count_pre_cap": downstream_patch_snippet_count_pre_cap,
+ })
+
+ logger.info(
+ "generate_code_agent_report: justification=%s",
+ report.justification_label,
+ )
+
+ return report
+
+
+# ---------------------------------------------------------------------------
+# Diff and patch helpers
+# ---------------------------------------------------------------------------
+
+
+def download_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, source_dir: Path, patch_dir: Path) -> Path | None:
+ """Download the patched SRPM and generate the diff file between the source and the patched SRPM."""
+ from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader
+
+ srpm_path = brew_downloader.download_patched_srpm_by_nevra(fix_info["nevra"])
+ if srpm_path is None:
+ srpm_path = brew_downloader.download_patched_srpm(fix_info["name"], fix_info["version"], fix_info["release"],)
+ if srpm_path is not None:
+ patch_dir.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(srpm_path, patch_dir)
+ SourceRPMDownloader.extract_src_rpm(srpm_path, patch_dir)
+
+ #diff_text = _generate_tree_diff(source_dir, patch_dir)
+ #diff_output_path = patch_dir.parent / "locate.diff"
+ #diff_output_path.write_text(diff_text, encoding="utf-8")
+ #return diff_output_path
+ return None
+
+
+# ---------------------------------------------------------------------------
+# Spec/build log parsing helpers
+# ---------------------------------------------------------------------------
+
+_SPEC_PATCH_RE = re.compile(r"^Patch(\d+)\s*:\s*(.+)$", re.IGNORECASE)
+
+
+def _parse_spec_patch_directives(
+ inspector, spec_path: Path,
+) -> list[tuple[int, str, str]]:
+ """Return ``[(index, filename, raw_line), ...]`` from ``PatchN:`` lines."""
+ matches = inspector.grep_content(_SPEC_PATCH_RE.pattern, spec_path)
+ results: list[tuple[int, str, str]] = []
+ for m in matches:
+ hit = _SPEC_PATCH_RE.match(m.line_content.strip())
+ if hit:
+ results.append((int(hit.group(1)), hit.group(2).strip(), m.line_content.strip()))
+ return results
+
+
+def _extract_spec_changelog(inspector, spec_path: Path) -> str | None:
+ """Return text after the ``%changelog`` directive, or ``None``."""
+ content = inspector.read_file(spec_path)
+ idx = content.find("%changelog")
+ if idx == -1:
+ return None
+ return content[idx + len("%changelog"):]
+
+
+_BINARY_FILE_EXTENSIONS = frozenset({
+ '.uu','.uue','.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2',
+ '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp',
+ '.pdf', '.doc', '.docx', '.xls', '.xlsx',
+ '.exe', '.dll', '.so', '.dylib', '.a', '.o', '.obj',
+ '.pyc', '.pyo', '.class', '.jar', '.war',
+ '.woff', '.woff2', '.ttf', '.otf', '.eot',
+ '.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv',
+ '.db', '.sqlite', '.sqlite3',
+})
+
+
+def _is_binary_file_path(path: str) -> bool:
+ """Check if file path has a binary file extension."""
+ path_lower = path.lower()
+ return any(path_lower.endswith(ext) for ext in _BINARY_FILE_EXTENSIONS)
+
+
+def parse_patch_file(patch_path: Path) -> ParsedPatch | None:
+ """Parse a downstream .patch file into structured data.
+
+ Returns None if the file cannot be parsed.
+ """
+ try:
+ diff_text = patch_path.read_text(encoding="utf-8", errors="replace")
+ patch_set = PatchSet.from_string(diff_text)
+ except Exception:
+ logger.warning("parse_patch_file: failed to parse %s", patch_path)
+ return None
+
+ files: list[PatchFile] = []
+ for patched_file in patch_set:
+ if patched_file.is_binary_file:
+ continue
+ if _is_binary_file_path(patched_file.target_file):
+ continue
+
+ hunks: list[PatchHunk] = []
+ for hunk in patched_file:
+ context, removed, added = [], [], []
+ for line in hunk:
+ if line.is_context:
+ context.append(str(line.value).rstrip("\n"))
+ elif line.is_removed:
+ removed.append(str(line.value).rstrip("\n"))
+ elif line.is_added:
+ added.append(str(line.value).rstrip("\n"))
+
+ hunks.append(PatchHunk(
+ source_start=hunk.source_start,
+ source_length=hunk.source_length,
+ target_start=hunk.target_start,
+ target_length=hunk.target_length,
+ context_lines=context,
+ removed_lines=removed,
+ added_lines=added,
+ ))
+
+ files.append(PatchFile(
+ source_path=patched_file.source_file,
+ target_path=patched_file.target_file,
+ hunks=hunks,
+ is_new_file=patched_file.is_added_file,
+ is_deleted_file=patched_file.is_removed_file,
+ ))
+
+ return ParsedPatch(patch_filename=patch_path.name, files=files)
+
+
+# ---------------------------------------------------------------------------
+# Downstream search pipeline
+# ---------------------------------------------------------------------------
+async def downstream_search_preprocss(
+ *,
+ llm,
+ vuln_id: str,
+ descriptions: list[tuple[str, str]],
+ source_path: Path,
+ build_log_path: Path | None,
+ tracer,
+) -> DownstreamSearchReport:
+ """Build the downstream search pipeline."""
+ from vuln_analysis.tools.source_inspector import SourceInspector
+ inspector = SourceInspector(source_path)
+
+ cve_pattern = re.escape(vuln_id)
+ report = DownstreamSearchReport()
+ with tracer.push_active_function("Is_patch_file_available", input_data={"vuln_id": vuln_id}) as span:
+ patch_files = inspector.find_files("*.patch", recursive=False)
+ cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)]
+ if cve_patches:
+ report.is_patch_file_available = True
+ patch_file = cve_patches[0]
+ else:
+ report.is_patch_file_available = False
+ report.is_patch_in_spec_file = False
+ return report
+
+ if not patch_file:
+ raise ValueError("No patch file found for the CVE")
+ else:
+ report.patch_file_name = patch_file.name
+
+ with tracer.push_active_function(
+ "Is_patch_in_spec_file", input_data={"patch_file_name": patch_file.name}
+ ) as span:
+ spec_files = inspector.find_files("*.spec", recursive=False)
+ spec_path = spec_files[0] if spec_files else None
+
+ if not spec_path:
+ report.is_patch_in_spec_file = False
+ else:
+ cve_c = re.compile(cve_pattern, re.IGNORECASE)
+ for _idx, fname, raw_line in _parse_spec_patch_directives(inspector, spec_path):
+ if cve_c.search(fname):
+ report.spec_patch_directives_for_cve.append(raw_line)
+ chlog = _extract_spec_changelog(inspector, spec_path)
+ if chlog:
+ cve_in_cl = [ln for ln in chlog.splitlines() if cve_c.search(ln)]
+ report.spec_changelog_cve_lines = "\n".join(cve_in_cl)
+ grep_spec_matches = inspector.grep_content(cve_pattern, spec_path)
+ if grep_spec_matches:
+ report.is_patch_in_spec_file = True
+ report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches)
+ else:
+ report.is_patch_in_spec_file = False
+
+ # Extract Source0: and Version: lines for delivery-model context
+ source0_matches = inspector.grep_content(r"^Source0:", spec_path)
+ if source0_matches:
+ report.spec_source0_line = source0_matches[0].line_content.strip()
+ version_matches = inspector.grep_content(r"^Version:", spec_path)
+ if version_matches:
+ report.spec_version_line = version_matches[0].line_content.strip()
+
+ with tracer.push_active_function(
+ "Is_patch_applied_in_build", input_data={"patch_file_name": patch_file.name}
+ ) as span:
+ if build_log_path and build_log_path.exists():
+ build_inspector = SourceInspector(build_log_path.parent)
+ build_log_matches = build_inspector.grep_content(cve_pattern, build_log_path)
+ if build_log_matches:
+ report.is_patch_applied_in_build = True
+ report.build_log_patch_applied = "\n".join(m.line_content for m in build_log_matches)
+ else:
+ report.is_patch_applied_in_build = False
+ else:
+ report.is_patch_applied_in_build = False
+
+ with tracer.push_active_function("Extract_patch_details", input_data={"patch_file_name": patch_file.name}) as span:
+ details = parse_patch_file(patch_file)
+ if details:
+ report.parsed_patch = details
+ else:
+ report.parsed_patch = None
+
+ return report
+
+async def upstream_search_preprocess(
+ *,
+ vuln_id: str,
+ source_path: Path,
+ fix_info: dict,
+ brew_downloader: BrewDownloader,
+ patch_dir: Path,
+ tracer,
+) -> UpstreamSearchReport:
+ """Build the upstream search pipeline."""
+ from vuln_analysis.tools.source_inspector import SourceInspector
+ inspector = SourceInspector(source_path)
+ report = UpstreamSearchReport()
+ cve_pattern = re.escape(vuln_id)
+ need_to_find_code = True
+ # Store reference package NVR from fix_info if available
+ if fix_info and fix_info.get("nevra"):
+ report.reference_package_nvr = fix_info["nevra"]
+
+ with tracer.push_active_function("Is_upstream_fixed_by_rebase", input_data={"vuln_id": vuln_id}) as span:
+ spec_files = inspector.find_files("*.spec", recursive=False)
+ spec_path = spec_files[0] if spec_files else None
+
+ if not spec_path:
+ report.is_code_fixed_by_rebase = "unknown"
+ else:
+ grep_spec_matches = inspector.grep_content(cve_pattern, spec_path)
+ if grep_spec_matches:
+ report.is_code_fixed_by_rebase = "yes"
+ report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches)
+ else:
+ report.is_code_fixed_by_rebase = "unknown"
+ span.set_output({
+ "is_code_fixed_by_rebase": report.is_code_fixed_by_rebase,
+ "spec_file_log_change": report.spec_file_log_change,
+ })
+
+ if fix_info and brew_downloader is not None and not patch_dir.exists():
+ with tracer.push_active_function(
+ "download_rpm_patch", input_data={"fix_info": fix_info}
+ ) as span:
+ try:
+ download_patch_and_gen_diff(fix_info, brew_downloader, source_path, patch_dir)
+ span.set_output({"patch_dir_exists": patch_dir.exists()})
+ except Exception as e:
+ logger.warning("locate: failed to download/extract patched SRPM: %s", e)
+ span.set_output({"error": str(e), "patch_dir_exists": False})
+
+ if patch_dir.exists():
+ patch_inspector = SourceInspector(patch_dir)
+ with tracer.push_active_function("is_patch_downsteam_patch_file", input_data={"patch_dir": patch_dir}) as span:
+
+ patch_files = patch_inspector.find_files("*.patch", recursive=False)
+ cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)]
+ if cve_patches:
+ report.is_fixed_srpm_is_needed = True
+ report.fixed_srpm_file_name = cve_patches[0].name
+ report.fixed_parsed_patch = parse_patch_file(cve_patches[0])
+ return report
+ else:
+ report.is_fixed_srpm_is_needed = False
+ span.set_output({
+ "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed})
+
+ if not patch_dir.exists() or need_to_find_code:
+ from vuln_analysis.utils.osv_patch_retriever import OSVPatchRetriever
+ with tracer.push_active_function("search_for_code_in_osv", input_data={"vuln_id": vuln_id}) as span:
+ async with aiohttp.ClientSession() as session:
+ retriever = OSVPatchRetriever(session=session)
+ result = await retriever.get_fix_patch(vuln_id, fix_info["version"], fix_info["name"])
+ if result and result.parsed_patch:
+ report.fixed_parsed_patch = result.parsed_patch
+ report.fixed_srpm_file_name = result.patch_url
+ report.is_fixed_srpm_is_needed = True
+ report.osv_result = result
+ span.set_output({
+ "osv_commit_message": report.osv_result.commit_message,
+ })
+ return report
+
+
+# ---------------------------------------------------------------------------
+# L1 Agent Prompt Templates (Patch Available Scenario)
+# ---------------------------------------------------------------------------
+
+L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE = (
+ "You are a security analyst investigating whether a CVE fix has been applied to a package.\n"
+ "A downstream patch file exists and has been analyzed.\n\n"
+ "VULNERABILITY_INTEL contains DOWNSTREAM_PATCH_STATUS and extracted patterns from the patch.\n"
+ "The source code index contains the UNPATCHED tarball; the patch is applied at BUILD time.\n\n"
+ "YOUR TASK: Verify (1) vulnerable code exists in source, (2) fix pattern is absent.\n"
+ "Both outcomes are EXPECTED when DOWNSTREAM_PATCH_STATUS is APPLIED.\n\n"
+ "CRITICAL RULES:\n"
+ "- If DOWNSTREAM_PATCH_STATUS is APPLIED, the package is PATCHED (patch applied at build time).\n"
+ "- Finding vulnerable code in source is EXPECTED (source is unpatched tarball).\n"
+ "- NOT finding fix pattern in source is EXPECTED (fix is in patch file, not tarball).\n"
+ "- Both findings together confirm the patch will correctly fix the code at build time.\n\n"
+ "ANSWER QUALITY:\n"
+ "- Cite specific file paths and line numbers from tool results.\n"
+ "- Quote the actual code found, not just describe it.\n"
+ "- Confirm the patch addresses the vulnerable code found.\n"
+ "- State confidence level based on evidence quality."
+)
+
+L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH = (
+ "You are a security analyst verifying that a package is VULNERABLE to a CVE.\n"
+ "The TARGET package does NOT contain a CVE-specific patch file.\n"
+ "However, patterns have been extracted from the patch in a FIXED RPM version.\n\n"
+ "VULNERABILITY_INTEL contains patterns extracted from the fixed version's patch.\n\n"
+ "YOUR TASK: Verify the TARGET package contains the vulnerable code and LACKS the fix.\n\n"
+ "VERIFICATION STRATEGY:\n"
+ "1. FIRST search for the VULNERABLE code pattern (from VULNERABLE_PATTERNS).\n"
+ " - Use function names, variable names, or unique code snippets.\n"
+ " - The vulnerable code SHOULD exist in the target package.\n"
+ "2. If vulnerable code is found, search for the FIX code pattern (from FIX_PATTERNS).\n"
+ " - The fix code should NOT exist in the target package.\n"
+ "3. CONCLUSION:\n"
+ " - If vulnerable code EXISTS and fix is ABSENT → Package is VULNERABLE.\n"
+ " - If fix code IS found → Package may be patched via rebase (investigate further).\n"
+ " - If neither is found → Use file paths from AFFECTED_FILES to locate relevant code.\n\n"
+ "CRITICAL RULES:\n"
+ "- The patch is from a FIXED version - expect the target to have vulnerable code.\n"
+ "- Use file paths and function names from VULNERABILITY_INTEL to locate code.\n"
+ "- Search for distinctive code patterns, not generic keywords.\n"
+ "- Base conclusions ONLY on tool results, not assumptions.\n\n"
+ "ANSWER QUALITY:\n"
+ "- Cite specific file paths and line numbers from tool results.\n"
+ "- Quote the actual code found, not just describe it.\n"
+ "- Compare found code against both vulnerable and fix patterns.\n"
+ "- Clearly state whether vulnerable code exists and whether fix is absent.\n"
+ "- State confidence level based on evidence quality."
+)
+
+L1_AGENT_SYS_PROMPT_REBASE_FIX = (
+ "You are a security analyst verifying that a CVE fix is PRESENT in a rebased package.\n"
+ "The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.\n\n"
+ "VULNERABILITY_INTEL contains patterns extracted from the upstream fix.\n\n"
+ "YOUR TASK: Verify the TARGET package contains the FIX code (proving rebase was effective).\n\n"
+ "VERIFICATION STRATEGY:\n"
+ "1. FIRST search for the FIX code pattern (from FIX_PATTERNS).\n"
+ " - Use function names, variable names, or unique code snippets.\n"
+ " - The fix code SHOULD exist in the target package (proving rebase worked).\n"
+ "2. If fix code is found, optionally confirm VULNERABLE code is ABSENT.\n"
+ " - The vulnerable code should NOT exist (was replaced by the fix).\n"
+ "3. CONCLUSION:\n"
+ " - If fix code EXISTS → Package is PATCHED via rebase.\n"
+ " - If vulnerable code still EXISTS and fix is ABSENT → Rebase may be incomplete.\n"
+ " - If neither is found → Use file paths from AFFECTED_FILES to locate relevant code.\n\n"
+ "CRITICAL RULES:\n"
+ "- The patch is from a FIXED version - expect the target to have the fix code.\n"
+ "- Use file paths and function names from VULNERABILITY_INTEL to locate code.\n"
+ "- Search for distinctive code patterns, not generic keywords.\n"
+ "- Base conclusions ONLY on tool results, not assumptions.\n\n"
+ "ANSWER QUALITY:\n"
+ "- Cite specific file paths and line numbers from tool results.\n"
+ "- Quote the actual code found, not just describe it.\n"
+ "- Compare found code against both vulnerable and fix patterns.\n"
+ "- Clearly state whether fix code exists, confirming the rebase.\n"
+ "- State confidence level based on evidence quality."
+)
+
+L1_AGENT_PROMPT_TEMPLATE = """{sys_prompt}
+
+
+CVE ID: {vuln_id}
+Target Package: {target_package}
+
+
+
+{vulnerability_intel}
+
+
+
+{tools}
+
+
+
+{tool_selection_strategy}
+
+
+{tool_instructions}
+
+RESPONSE:
+{{"""
+
+L1_AGENT_THOUGHT_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+PHASE 0 - CHECK PATCH STATUS (PRIORITY):
+ FIRST check VULNERABILITY_INTEL for DOWNSTREAM_PATCH_STATUS.
+ If DOWNSTREAM_PATCH_STATUS is APPLIED:
+ - The source code index contains the UNPATCHED tarball
+ - The patch file is applied at BUILD time, not in the indexed source
+ - Do 2 verification searches, then FINISH with verdict PATCHED
+
+PHASE 1 - INTELLIGENCE (PRE-COMPLETED):
+ Review VULNERABILITY_INTEL above. It contains:
+ - DOWNSTREAM_PATCH_STATUS: APPLIED means package is patched at build time
+ - PATCH_FILE: Name of the patch file
+ - AFFECTED_FILES: Files to verify
+ - VULNERABLE_FUNCTIONS: Functions to search for
+ - VULNERABLE_PATTERNS: Code patterns indicating vulnerability
+ - FIX_PATTERNS: Code patterns indicating the fix (will be ABSENT in source)
+
+PHASE 2 - SOURCE CODE INSPECTION (when DOWNSTREAM_PATCH_STATUS is APPLIED):
+ Do exactly 2 verification searches:
+ 1. Search for vulnerable function/pattern → should FIND it (source is unpatched)
+ 2. Search for fix pattern → should NOT find it (fix is in separate patch file)
+ Both outcomes are EXPECTED and confirm the patch is correct.
+ After both searches, FINISH immediately with PATCHED verdict.
+
+PHASE 3 - VERDICT:
+ If DOWNSTREAM_PATCH_STATUS is APPLIED:
+ - Found vulnerable code + fix absent = PATCHED (patch will fix it at build time)
+ - This is the EXPECTED outcome, not a failure
+ Conclude after 2 searches - do NOT keep searching.
+
+
+
+1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names.
+2. Output valid JSON only. thought < 100 words. final_answer < 150 words.
+3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer.
+4. If DOWNSTREAM_PATCH_STATUS is APPLIED, do max 2 searches then conclude PATCHED.
+5. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls.
+6. When patch is APPLIED: finding vulnerable code = GOOD, not finding fix = GOOD (expected).
+7. If a pattern contains special regex characters, escape them or use literal substrings.
+
+
+
+If DOWNSTREAM_PATCH_STATUS is APPLIED:
+- Search 1: Find vulnerable function → EXPECTED to find (source is unpatched)
+- Search 2: Check fix pattern → EXPECTED to NOT find (fix is in patch file)
+- After both: FINISH with PATCHED verdict
+If a search returned results:
+- If vulnerable code found and patch is APPLIED, proceed to verify fix is absent
+- After both checks complete, FINISH
+If a pattern wasn't found:
+- Try simpler substrings or partial patterns
+- Try a different tool (Source Grep <-> Code Keyword Search)
+
+
+
+{{"thought": "DOWNSTREAM_PATCH_STATUS is APPLIED. Search for vulnerable function first", "mode": "act", "actions": {{"tool": "Source Grep", "query": "parse_rockridge", "reason": "Verify vulnerable function exists in unpatched source"}}, "final_answer": null}}
+
+
+{{"thought": "Found vulnerable function. Now verify fix pattern is absent (expected since fix is in patch file)", "mode": "act", "actions": {{"tool": "Source Grep", "query": "if (file->pz_log2_bs < 15", "reason": "Confirm fix pattern is absent from source"}}, "final_answer": null}}
+
+
+{{"thought": "Vulnerable code found, fix absent as expected. DOWNSTREAM_PATCH_STATUS is APPLIED so package is PATCHED.", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED. Found vulnerable function at file.c:123. Fix pattern absent from source (expected - fix is in patch file applied at build time). DOWNSTREAM_PATCH_STATUS confirms patch is applied."}}
+
+
+{{"thought": "KNOWLEDGE has sufficient evidence: vulnerable code at X, fix absent", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}}
+"""
+
+L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+PHASE 1 - INTELLIGENCE (PRE-COMPLETED):
+ Review VULNERABILITY_INTEL above. It contains:
+ - AFFECTED_FILES: Files to verify
+ - VULNERABLE_FUNCTIONS: Functions to search for
+ - VULNERABLE_PATTERNS: Code patterns indicating vulnerability
+ - FIX_PATTERNS: Code patterns indicating the fix
+ - SEARCH_KEYWORDS: Terms to grep for
+
+PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK):
+ For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES:
+ 1. Search for vulnerable pattern - it SHOULD exist in unpatched target
+ 2. Search for fix pattern - it should NOT exist in unpatched target
+ IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES.
+
+PHASE 3 - VERDICT:
+ Only conclude when:
+ - ALL AFFECTED_FILES have been searched
+ - ALL VULNERABLE_FUNCTIONS have been located
+ - Evidence is sufficient for confident verdict
+
+
+
+1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names.
+2. Output valid JSON only. thought < 100 words. final_answer < 150 words.
+3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer.
+4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet).
+5. Code Keyword Search: use query field for broader searches.
+6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls.
+7. FIRST search for VULNERABLE code - it SHOULD exist in target.
+8. THEN search for FIX code - it should NOT exist in target.
+9. If a pattern contains special regex characters, escape them or use literal substrings.
+
+
+
+If a search returned results:
+- Narrow down by searching within that specific file (e.g., "pattern,filename.c")
+- Search for related symbols or variables from the code found
+If a pattern wasn't found:
+- Try simpler substrings or partial patterns
+- Try a different tool (Source Grep <-> Code Keyword Search)
+- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES
+If KNOWLEDGE shows partial evidence:
+- Investigate other files mentioned in VULNERABILITY_INTEL AFFECTED_FILES
+- Search for key variables from the fix pattern
+
+
+
+{{"thought": "No prior searches in KNOWLEDGE. Search for the vulnerable code pattern from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code that should exist in unpatched target"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows function found at iso9660.c:2074. Now verify the fix is NOT present", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if fix code is absent (confirms vulnerability)"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows fix pattern not found but need more evidence. Search for key variable in the found file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",", "reason": "Examine how the vulnerable variable is handled"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows Source Grep failed. Try Code Keyword Search for the file from patch", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Verify we are looking at the correct file"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE has sufficient evidence: vulnerable code at X:Y, fix pattern absent", "mode": "finish", "actions": null, "final_answer": "The package is VULNERABLE. Found vulnerable code pattern at [file:line]: [quote code]. The fix from the patched version is NOT present - searched for [fix pattern] with no matches. The target package lacks the security fix."}}
+
+
+{{"thought": "KNOWLEDGE shows fix code found despite no CVE patch file", "mode": "finish", "actions": null, "final_answer": "The package appears PATCHED via rebase. Found fix code at [file:line]: [quote code]. Although no CVE-specific patch exists, the fix may have been included via upstream version update."}}
+"""
+
+L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+PHASE 1 - INTELLIGENCE (PRE-COMPLETED):
+ Review VULNERABILITY_INTEL above. It contains:
+ - AFFECTED_FILES: Files to verify
+ - VULNERABLE_FUNCTIONS: Functions to search for
+ - VULNERABLE_PATTERNS: Code patterns indicating vulnerability
+ - FIX_PATTERNS: Code patterns indicating the fix
+ - SEARCH_KEYWORDS: Terms to grep for
+
+PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK):
+ For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES:
+ 1. Search for fix pattern - it SHOULD exist in rebased target
+ 2. Verify vulnerable pattern is ABSENT from target
+ IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES.
+
+PHASE 3 - VERDICT:
+ Only conclude when:
+ - ALL AFFECTED_FILES have been searched
+ - ALL VULNERABLE_FUNCTIONS have been located
+ - Evidence is sufficient for confident verdict
+
+
+
+1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names.
+2. Output valid JSON only. thought < 100 words. final_answer < 150 words.
+3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer.
+4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet).
+5. Code Keyword Search: use query field for broader searches.
+6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls.
+7. FIRST search for FIX code - it SHOULD exist in rebased target.
+8. THEN verify VULNERABLE code is ABSENT from target.
+9. If a pattern contains special regex characters, escape them or use literal substrings.
+
+
+
+If a search returned results:
+- Narrow down by searching within that specific file (e.g., "pattern,filename.c")
+- Search for related symbols or variables from the code found
+If a pattern wasn't found:
+- Try simpler substrings or partial patterns
+- Try a different tool (Source Grep <-> Code Keyword Search)
+- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES
+If KNOWLEDGE shows partial evidence:
+- Investigate other files mentioned in VULNERABILITY_INTEL AFFECTED_FILES
+- Search for key variables from the fix pattern
+
+
+
+{{"thought": "No prior searches in KNOWLEDGE. Search for the fix code pattern from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate fix code that should exist after rebase"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows fix pattern not found. Try searching for key variable from the fix", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Find how the fix-related variable is handled"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows variable found at file.c:100. Search for the full fix pattern in that file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",file.c", "reason": "Check if fix exists in the located file"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE shows fix confirmed. Now verify the vulnerable code is absent", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable code was removed (confirms fix)"}}, "final_answer": null}}
+
+
+{{"thought": "KNOWLEDGE has sufficient evidence: fix code at X:Y, vulnerable code absent", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED via rebase. Found fix code at [file:line]: [quote code]. The vulnerable code pattern is NOT present - the rebase successfully included the security fix."}}
+
+
+{{"thought": "KNOWLEDGE shows vulnerable code still present despite rebase claim", "mode": "finish", "actions": null, "final_answer": "The rebase may be INCOMPLETE. Found vulnerable code still present at [file:line]: [quote code]. The fix code was not found despite spec indicating rebase fixed this CVE. Manual review required."}}
+"""
+
+
+# ---------------------------------------------------------------------------
+# L1 Observation Node Prompts (Comprehension + Memory Update)
+# ---------------------------------------------------------------------------
+
+L1_COMPREHENSION_PROMPT = """Analyze the tool output and extract key findings for CVE patch verification.
+GOAL: Verify whether {vuln_id} fix is applied to {target_package}
+
+**CRITICAL FIRST CHECK - DO THIS BEFORE ANYTHING ELSE:**
+Examine NEW OUTPUT below. If it is EMPTY, contains only whitespace, or shows an error:
+- findings MUST be: ["FAILED: {tool_used} '{tool_input}' returned empty/no matches"]
+- tool_outcome MUST be: "{tool_used} [{tool_input}] -> NO MATCHES"
+- DO NOT fabricate, infer, or assume any results. STOP HERE.
+
+
+{vulnerability_intel}
+
+
+TOOL USED: {tool_used}
+TOOL INPUT: {tool_input}
+THOUGHT: {last_thought}
+NEW OUTPUT:
+{tool_output}
+
+**ANTI-HALLUCINATION RULES:**
+1. You can ONLY report findings based on text that ACTUALLY APPEARS in NEW OUTPUT above.
+2. Every finding claiming code was "found" MUST include a direct quote from NEW OUTPUT.
+3. If NEW OUTPUT is empty, you CANNOT claim any code was found - report FAILED.
+4. The tool_outcome MUST accurately reflect what NEW OUTPUT shows, not what you expect.
+
+CODE ANALYSIS RULES (only if NEW OUTPUT has content):
+1. READ the actual code snippets in NEW OUTPUT. Compare against VULNERABLE_PATTERNS and FIX_PATTERNS.
+2. For each match found:
+ - Quote the actual line from NEW OUTPUT
+ - State the file:line where it was found
+ - Determine if it matches VULNERABLE or FIX pattern
+3. RECORD file paths and line numbers for all relevant matches.
+
+
+Based on VULNERABILITY_INTEL above, assess investigation completeness:
+- Have you searched in ALL files listed in AFFECTED_FILES?
+- Have you found ALL instances of VULNERABLE_FUNCTIONS?
+- Are there OTHER files containing the same vulnerable pattern?
+If coverage is incomplete, note which files/functions remain unchecked.
+
+
+OUTPUT RULES:
+- findings: 2-4 observations. Each positive finding MUST quote actual content from NEW OUTPUT.
+- tool_outcome: "{tool_used} [pattern] -> found in file.c:123" OR "{tool_used} [pattern] -> NO MATCHES"
+RESPONSE:
+{{"""
+
+L1_MEMORY_UPDATE_PROMPT = """Merge new findings into the CVE patch investigation memory.
+GOAL: Verify whether {vuln_id} fix is applied to {target_package}
+PREVIOUS MEMORY: {previous_memory}
+NEW FINDINGS (from tool analysis):
+{findings}
+TOOL CALL RECORD: {tool_outcome}
+
+**CRITICAL: HANDLE FAILURES CORRECTLY**
+If NEW FINDINGS contains "FAILED:" or TOOL CALL RECORD shows "NO MATCHES":
+- Add the failure/no-match to memory verbatim
+- Do NOT convert a failed search into a positive finding
+- "NO MATCHES" for a fix pattern means FIX_CODE_ABSENT, not FIX_CODE_FOUND
+
+MEMORY RULES:
+1. Start from PREVIOUS MEMORY. Append new facts from NEW FINDINGS. No duplicates.
+2. Add TOOL CALL RECORD verbatim so future steps know what was already searched.
+3. If NEW FINDINGS report a failure or no matches, record it as-is. Do NOT infer positive findings.
+
+PATCH VERIFICATION TRACKING:
+- If vulnerable code pattern FOUND (with file:line evidence): add "VULNERABLE_CODE_FOUND: [pattern] in [file:line]"
+- If fix code pattern FOUND (with file:line evidence): add "FIX_CODE_FOUND: [pattern] in [file:line]"
+- If search returned NO MATCHES for vulnerable code: add "VULNERABLE_CODE_ABSENT: [pattern] not found"
+- If search returned NO MATCHES for fix code: add "FIX_CODE_ABSENT: [pattern] not found"
+
+VERDICT EVIDENCE:
+- PATCHED evidence: fix code found AND/OR vulnerable code absent
+- VULNERABLE evidence: vulnerable code found AND fix code absent
+- INCONCLUSIVE: neither pattern found, or conflicting evidence
+
+- results: copy the NEW FINDINGS as-is.
+- memory: updated cumulative findings with search results and evidence tags.
+RESPONSE:
+{{"""
+
+
+# ---------------------------------------------------------------------------
+# L1 Observation Node Prompts (CVE-Description Mode - No Patch Available)
+# ---------------------------------------------------------------------------
+
+L1_COMPREHENSION_PROMPT_CVE_DESC = """Analyze the tool output for CVE patch verification using CVE description context.
+GOAL: Verify whether {vuln_id} fix is applied to {target_package}
+
+**CRITICAL FIRST CHECK - DO THIS BEFORE ANYTHING ELSE:**
+Examine NEW OUTPUT below. If it is EMPTY, contains only whitespace, or shows an error:
+- findings MUST be: ["FAILED: {tool_used} '{tool_input}' returned empty/no matches"]
+- tool_outcome MUST be: "{tool_used} [{tool_input}] -> NO MATCHES"
+- DO NOT fabricate, infer, or assume any results. STOP HERE.
+
+CVE DESCRIPTION:
+{cve_description}
+
+SPEC CHANGELOG (rebase info):
+{spec_log_change}
+
+NOTE: No patch file available. Extract search terms from CVE description.
+
+TOOL USED: {tool_used}
+TOOL INPUT: {tool_input}
+THOUGHT: {last_thought}
+NEW OUTPUT:
+{tool_output}
+
+**ANTI-HALLUCINATION RULES:**
+1. You can ONLY report findings based on text that ACTUALLY APPEARS in NEW OUTPUT above.
+2. Every finding claiming code was "found" MUST include a direct quote from NEW OUTPUT.
+3. If NEW OUTPUT is empty, you CANNOT claim any code was found - report FAILED.
+4. The tool_outcome MUST accurately reflect what NEW OUTPUT shows, not what you expect.
+
+CODE ANALYSIS RULES (only if NEW OUTPUT has content):
+1. EXTRACT key identifiers from the CVE description:
+ - Function names, variable names, API calls
+ - File paths or component names mentioned
+
+2. For each code match in NEW OUTPUT:
+ - Quote the actual line from NEW OUTPUT
+ - Does it relate to the vulnerability described?
+ - Does it show defensive patterns (bounds checking, null validation)?
+ - Record file path and line number as evidence
+
+3. DEFENSIVE PATTERNS indicating a fix:
+ - Input validation, bounds checking, null guards
+ - Resource cleanup, error handling
+
+OUTPUT:
+- findings: 2-4 observations. Each positive finding MUST quote actual content from NEW OUTPUT.
+- tool_outcome: "{tool_used} [pattern] -> found in file.c:123" OR "{tool_used} [pattern] -> NO MATCHES"
+RESPONSE:
+{{"""
+
+L1_MEMORY_UPDATE_PROMPT_CVE_DESC = """Merge findings into CVE patch investigation memory.
+GOAL: Verify whether {vuln_id} fix is applied to {target_package}
+MODE: CVE-description based (no patch patterns)
+
+PREVIOUS MEMORY: {previous_memory}
+NEW FINDINGS: {findings}
+TOOL CALL RECORD: {tool_outcome}
+
+MEMORY RULES:
+1. Append new facts from NEW FINDINGS to PREVIOUS MEMORY. No duplicates.
+2. Add TOOL CALL RECORD verbatim.
+
+CVE-BASED TRACKING:
+- CVE-related code FOUND: "CVE_CODE_FOUND: [symbol] in [file:line]"
+- Defensive pattern FOUND: "DEFENSIVE_CODE_FOUND: [pattern] in [file:line]"
+- Search no match: "SEARCH_NO_MATCH: [pattern]"
+
+VERDICT (CVE-description mode):
+- LIKELY_PATCHED: defensive code found, no vulnerability indicators
+- LIKELY_VULNERABLE: vulnerability patterns found, no defensive code
+- INCONCLUSIVE: insufficient evidence
+
+- results: copy the NEW FINDINGS as-is.
+- memory: updated cumulative findings with evidence tags.
+RESPONSE:
+{{"""
+
+
+# ---------------------------------------------------------------------------
+# L1 Agent Prompt Template (No Patch - CVE Description Mode)
+# ---------------------------------------------------------------------------
+
+L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH = """You are a security analyst verifying that a CVE fix is PRESENT in a rebased package.
+The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.
+NO PATCH FILE IS AVAILABLE - you must use the CVE description to guide your search.
+
+YOUR TASK: Verify the TARGET package contains the fix by searching for:
+1. Code patterns mentioned in the CVE description
+2. Defensive code that would mitigate the vulnerability
+3. Function/symbol names related to the CVE
+
+VERIFICATION STRATEGY (No Patch Mode):
+1. EXTRACT key identifiers from the CVE description:
+ - Function names, API calls, variable names
+ - Vulnerable code constructs described
+ - Fixed/secure code patterns described
+2. SEARCH for these patterns in the target source code
+3. ANALYZE the code to determine if it shows the fix behavior
+4. CONCLUDE based on presence of defensive code and absence of vulnerability indicators"""
+
+L1_AGENT_PROMPT_TEMPLATE_NO_PATCH = """{sys_prompt}
+
+
+CVE ID: {vuln_id}
+Target Package: {target_package}
+
+
+
+{vulnerability_intel}
+
+
+
+{tools}
+
+
+{tool_selection_strategy}
+
+{tool_instructions}"""
+
+L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS = """
+You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results).
+BEFORE ACTING, you MUST:
+1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries)
+2. Review LATEST FINDINGS for the most recent tool output analysis
+3. NEVER repeat any action already in TOOL_CALL_RECORD
+4. Your next action MUST build on findings - progress the investigation
+
+
+
+PHASE 1 - INTELLIGENCE (PRE-COMPLETED):
+ Review VULNERABILITY_INTEL above. It contains:
+ - AFFECTED_FILES: Files to verify (may be inferred from CVE description)
+ - VULNERABLE_FUNCTIONS: Functions to search for
+ - VULNERABLE_PATTERNS: Code patterns indicating vulnerability
+ - FIX_PATTERNS: Code patterns indicating the fix
+ - SEARCH_KEYWORDS: Terms to grep for
+ - ROOT_CAUSE: Description of the vulnerability mechanism
+
+PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK):
+ For EACH item in VULNERABLE_FUNCTIONS and SEARCH_KEYWORDS:
+ 1. Search for vulnerable code patterns
+ 2. Search for defensive/fix patterns (bounds checks, validation, etc.)
+ IMPORTANT: Do NOT stop after finding the first file. Check ALL potential locations.
+
+PHASE 3 - VERDICT:
+ Only conclude when:
+ - Key files have been searched
+ - Vulnerable functions have been located
+ - Evidence is sufficient for confident verdict
+
+
+RESPONSE FORMAT (JSON):
+You must respond with a JSON object with these fields:
+- thought: Your reasoning based on KNOWLEDGE and VULNERABILITY_INTEL (reference what was already found)
+- mode: "act" (to use a tool) or "finish" (to provide final answer)
+- actions: (only if mode="act") {{"tool": "Tool Name", "query": "search term", "reason": "why this search"}}
+- final_answer: (only if mode="finish") Your conclusion about patch status
+
+
+1. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls.
+2. If KNOWLEDGE shows a search was done, your next action must be DIFFERENT.
+3. Output valid JSON only. thought < 100 words.
+
+
+
+If a search returned results:
+- Narrow down by searching within that specific file (e.g., "pattern,filename.c")
+- Search for related symbols or defensive patterns in the found code
+If a pattern wasn't found:
+- Try simpler substrings or partial patterns
+- Try a different tool (Source Grep <-> Code Keyword Search)
+- Search for SEARCH_KEYWORDS from VULNERABILITY_INTEL
+
+
+
+{{"thought": "No prior searches in KNOWLEDGE. CVE mentions zisofs block pointer overflow. Search for zisofs handling.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "zisofs", "reason": "Find CVE-related code handling zisofs"}}, "final_answer": null}}
+
+
+
+{{"thought": "KNOWLEDGE shows zisofs found at iso9660.c. CVE mentions bounds check fix. Search for defensive bounds checking in that file.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "pz_log2_bs,iso9660.c", "reason": "Check for bounds validation on the vulnerable variable"}}, "final_answer": null}}
+
+
+
+{{"thought": "KNOWLEDGE shows pz_log2_bs used but no bounds check found. Try Code Keyword Search for broader context.", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "pz_log2_bs", "reason": "Find all usages to verify no defensive checks exist"}}, "final_answer": null}}
+
+
+
+{{"thought": "KNOWLEDGE shows: zisofs at iso9660.c, pz_log2_bs has no bounds check. Evidence sufficient.", "mode": "finish", "actions": null, "final_answer": "The package is LIKELY VULNERABLE. Found zisofs handling at iso9660.c but no bounds checking on pz_log2_bs variable. The CVE describes missing validation on block size which matches the observed code."}}
+"""
+
diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py
new file mode 100644
index 000000000..8f8c7e709
--- /dev/null
+++ b/src/vuln_analysis/functions/cve_build_agent.py
@@ -0,0 +1,677 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Level 2 Build Agent for Package Vulnerability Checker.
+
+Performs BuildCompilationCheck (Phase 1) and HardeningCheck (Phase 2) to
+determine if vulnerable code identified by L1 is actually compiled into
+the binary and whether hardening flags provide mitigation.
+"""
+
+from pathlib import Path
+from enum import StrEnum
+
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id
+from exploit_iq_commons.data_models.checker_status import (
+ L2BuildResult,
+ format_vulnerability_intel_for_prompt,
+)
+
+from langgraph.graph import StateGraph, START, END
+from langgraph.prebuilt import ToolNode
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage
+
+from nat.builder.context import Context
+from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
+
+from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation, FORCED_FINISH_PROMPT
+
+from vuln_analysis.functions.build_agent_graph_defs import (
+ BuildAgentState,
+ BuildHarvestReport,
+ harvest_build_data,
+ L2_CONFIG_PROMPT_TEMPLATE,
+ L2_CONFIG_SYS_PROMPT,
+ L2_CONFIG_THOUGHT_INSTRUCTIONS,
+ L2_COMPREHENSION_PROMPT,
+ L2_MEMORY_UPDATE_PROMPT,
+ L2_HARDENING_PROMPT_TEMPLATE,
+ L2_HARDENING_SYS_PROMPT,
+ L2_HARDENING_THOUGHT_INSTRUCTIONS,
+ L2CompileVerdictExtraction,
+ L2_COMPILATION_VERDICT_PROMPT,
+ L2_HARDENING_VERDICT_PROMPT,
+ L2HardeningVerdictExtraction,
+)
+from vuln_analysis.runtime_context import ctx_state
+import uuid
+import tiktoken
+logger = LoggingFactory.get_agent_logger(__name__)
+
+
+class CVEBuildAgentConfig(FunctionBaseConfig, name="cve_build_agent"):
+ """
+ Level 2 Build Agent. Analyzes build artifacts to determine if vulnerable
+ code is compiled into the binary and whether hardening flags mitigate.
+
+ Phase 1: BuildCompilationCheck - Is vulnerable code compiled?
+ Phase 2: HardeningCheck - Do hardening flags mitigate the CVE?
+ """
+
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts.",
+ )
+ max_iterations: int = Field(
+ default=5,
+ description="The maximum number of iterations for the agent.",
+ )
+ llm_name: str = Field(description="The LLM model to use with the L1 code agent.")
+ tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent")
+ context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.")
+
+def _build_tool_strategy(tool_names: list[str]) -> str:
+ """Generate tool usage guidance based on available tools."""
+ strategies = []
+ tool_names_lower = [t.lower().replace("_", " ") for t in tool_names]
+
+ if any("grep" in t for t in tool_names_lower):
+ strategies.append("- Use Source Grep for exact code patterns from patch (function names, variable names, specific code)")
+ if any("keyword" in t or "search" in t for t in tool_names_lower):
+ strategies.append("- Use Code Keyword Search for broader concept searches when grep fails")
+ if any("read" in t for t in tool_names_lower):
+ strategies.append("- Use Read File to examine full context around matches")
+
+ return "\n".join(strategies) if strategies else "Use available tools to search for vulnerable and fixed code patterns."
+
+
+class L2InvestigationPhase(StrEnum):
+ CONFIGURATION = "configuration"
+ HARDENING = "hardening"
+
+
+async def create_graph_build_agent(
+ config: CVEBuildAgentConfig,
+ builder: Builder,
+ state: AgentMorpheusEngineInput,
+ tracer,
+):
+ """Build the L2 Build Agent LangGraph.
+
+ Graph structure:
+ START -> data_harvest_node -> thought_node -+-> END (finish)
+ |
+ +-> tool_node -> observation_node -> thought_node
+ """
+ # Node name constants
+ DATA_HARVEST_NODE = "data_harvest"
+ THOUGHT_NODE = "thought_node"
+ TOOL_NODE = "tool_node"
+ OBSERVATION_NODE = "observation_node"
+ FORCED_FINISH_NODE = "forced_finish"
+ INVESTIGATION_PHASE_NODE = "investigation_phase"
+ llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+ tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+
+ thought_llm = llm.with_structured_output(CheckerThought)
+ comprehension_llm = llm.with_structured_output(CodeFindings)
+ observation_llm = llm.with_structured_output(Observation)
+ compilation_verdict_llm = llm.with_structured_output(L2CompileVerdictExtraction)
+ hardening_verdict_llm = llm.with_structured_output(L2HardeningVerdictExtraction)
+ tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None
+ enabled_tool_names = [tool.name for tool in tools]
+ tool_descriptions_list = [t.name + ": " + t.description for t in tools]
+ tools_str = "\n".join(tool_descriptions_list)
+ tool_strategy = _build_tool_strategy(enabled_tool_names)
+ # Extract context from state (guaranteed by early exit checks in _arun)
+ ctx = state.info.checker_context
+ assert ctx is not None, "checker_context must exist (checked in _arun)"
+ assert ctx.l1_result is not None, "l1_result must exist (checked in _arun)"
+ assert ctx.artifacts.build_log_path, "build_log_path must exist (checked in _arun)"
+ assert ctx.source_key, "source_key must exist when artifacts exist"
+
+ l1_result = ctx.l1_result
+ artifacts = ctx.artifacts
+ target_package = state.input.image.target_package
+ vuln_id = state.input.scan.vulns[0].vuln_id
+
+ # Paths
+ source_key = ctx.source_key
+ checker_dir = Path(config.base_checker_dir) / source_key
+ build_log_path = Path(artifacts.build_log_path)
+
+ # L1 results - use full VulnerabilityIntel for richer context
+ vulnerability_intel = l1_result.vulnerability_intel
+ vulnerability_intel_str = format_vulnerability_intel_for_prompt(vulnerability_intel) if vulnerability_intel else "No intel available"
+ l1_preliminary_verdict = l1_result.preliminary_verdict
+
+ # Extract CWE ID from intel (if available)
+ cwe_id = None
+ intel_list = state.info.intel
+ intel = intel_list[0]
+ if intel.nvd and intel.nvd.cwe_id:
+ cwe_id = intel.nvd.cwe_id
+ logger.info("build_agent: CWE ID from intel: %s", cwe_id)
+
+ _tiktoken_enc = tiktoken.get_encoding("cl100k_base")
+
+ investigation_stack: list[L2InvestigationPhase] = []
+ investigation_stack.append(L2InvestigationPhase.HARDENING)
+ investigation_stack.append(L2InvestigationPhase.CONFIGURATION)
+ # -------------------------------------------------------------------------
+ # Node definitions (PLACEHOLDER implementations)
+ # -------------------------------------------------------------------------
+ def _count_tokens(text: str) -> int:
+ """Count tokens using tiktoken cl100k_base encoding (~90-95% accurate for Llama 3.1)."""
+ try:
+ return len(_tiktoken_enc.encode(text))
+ except Exception:
+ return len(text) // 4
+
+ def _estimate_tokens(runtime_prompt: str, messages: list, observation: Observation | None) -> int:
+ """Estimate the token count thought_node will send to the LLM."""
+ parts = [runtime_prompt]
+ for msg in messages:
+ if hasattr(msg, "content") and isinstance(msg.content, str):
+ parts.append(msg.content)
+ if observation is not None:
+ for item in (observation.memory or []):
+ parts.append(item)
+ for item in (observation.results or []):
+ parts.append(item)
+ return _count_tokens("\n".join(parts))
+ # -------------------------------------------------------------------------
+ # Data Harvest Node
+ # -------------------------------------------------------------------------
+ async def build_runtime_prompt(harvest_report: BuildHarvestReport) -> str:
+ """Generate the runtime prompt for the current investigation phase."""
+ current_phase = investigation_stack[-1]
+ runtime_prompt = ""
+ if current_phase == L2InvestigationPhase.CONFIGURATION:
+ runtime_prompt = L2_CONFIG_PROMPT_TEMPLATE.format(
+ sys_prompt=L2_CONFIG_SYS_PROMPT,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=vulnerability_intel_str,
+ l1_preliminary_verdict=l1_preliminary_verdict,
+ disabled_features=harvest_report.disabled_features,
+ spec_disabled_features=harvest_report.spec_disabled_features,
+ tools=tools_str,
+ tool_instructions=L2_CONFIG_THOUGHT_INSTRUCTIONS,
+ )
+ return runtime_prompt
+ elif current_phase == L2InvestigationPhase.HARDENING:
+ runtime_prompt = L2_HARDENING_PROMPT_TEMPLATE.format(
+ sys_prompt=L2_HARDENING_SYS_PROMPT,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ cwe_id=cwe_id,
+ expected_hardening_table=harvest_report.expected_hardening,
+ tools=tools_str,
+ tool_instructions=L2_HARDENING_THOUGHT_INSTRUCTIONS,
+ )
+ return runtime_prompt
+ else:
+ raise ValueError(f"Unknown investigation phase: {current_phase}")
+
+ async def data_harvest_node(state: BuildAgentState) -> dict:
+ """Harvest build data from artifacts.
+
+ PLACEHOLDER - Extracts structured data from build log before ReAct loop.
+ """
+ logger.info("data_harvest_node: starting")
+
+ with tracer.push_active_function("data_harvest", input_data={}) as span:
+ # Find spec file if available
+ spec_path = None
+ if checker_dir and checker_dir.exists():
+ spec_files = list((checker_dir / "source").glob("*.spec"))
+ spec_path = spec_files[0] if spec_files else None
+
+ harvest_report = await harvest_build_data(
+ build_log_path=build_log_path,
+ spec_path=spec_path,
+ cwe_id=cwe_id,
+ )
+
+ runtime_prompt = await build_runtime_prompt(harvest_report)
+
+ affected_files_count = len(vulnerability_intel.affected_files) if vulnerability_intel else 0
+ span.set_output({
+ "disabled_features_count": len(harvest_report.disabled_features),
+ "spec_disabled_features_count": len(harvest_report.spec_disabled_features),
+ "expected_hardening_count": len(harvest_report.expected_hardening),
+ "vulnerability_intel_files_count": affected_files_count,
+ })
+
+ return {
+ "harvest_report": harvest_report,
+ "vulnerability_intel_str": vulnerability_intel_str,
+ "l1_preliminary_verdict": l1_preliminary_verdict,
+ "runtime_prompt": runtime_prompt,
+ "messages": [AIMessage(content="Build data harvested, beginning analysis.")],
+ }
+
+ async def thought_node(state: BuildAgentState) -> dict:
+ """Generate next thought/action using the LLM.
+
+ PLACEHOLDER - ReAct reasoning for build analysis.
+ """
+ step_num = state.get("step", 0)
+ logger.info("thought_node: starting step %d", step_num)
+
+ runtime_prompt = state.get("runtime_prompt")
+ _messages = [SystemMessage(content=runtime_prompt)] + state["messages"] # Reserved for LLM call
+
+ with tracer.push_active_function("thought_node", input_data={}) as span:
+ obs = state.get("observation", None)
+ if obs is not None:
+ memory_list = obs.memory if obs.memory else ["No prior knowledge."]
+ recent_findings = obs.results if obs.results else ["No recent findings."]
+ memory_context = "\n".join(f"- {m}" for m in memory_list)
+ findings_context = "\n".join(f"- {f}" for f in recent_findings)
+ context_block = f"KNOWLEDGE:\n{memory_context}\nLATEST FINDINGS:\n{findings_context}"
+ _messages.append(SystemMessage(content=context_block))
+
+ response: CheckerThought = await thought_llm.ainvoke(_messages)
+
+ if response.mode == "finish":
+ ai_message = AIMessage(content=response.final_answer or "Analysis complete.")
+ else:
+ tool_name = response.actions.tool
+ arguments = response.actions.query
+ tool_call_id = str(uuid.uuid4())
+ ai_message = AIMessage(
+ content=response.thought,
+ tool_calls=[{"name": tool_name, "args": {"query": arguments}, "id": tool_call_id}]
+ )
+ span.set_output({
+ "thought": response.thought,
+ "mode": response.mode,
+ "actions": response.actions,
+ "final_answer": response.final_answer,
+ })
+
+ return {
+ "messages": [ai_message],
+ "thought": response,
+ "step": step_num + 1,
+ "max_steps": config.max_iterations,
+ }
+
+ async def observation_node(state: BuildAgentState) -> dict:
+ """Process tool output: comprehension -> memory update for build analysis."""
+ logger.info("observation_node: starting")
+ tool_message = state["messages"][-1]
+ last_thought = state.get("thought")
+ if not last_thought:
+ return {
+ "messages": [AIMessage(content="No thought found")],
+ }
+ last_thought_text = last_thought.thought
+ tool_used = last_thought.actions.tool
+ tool_input_detail = last_thought.actions.query
+ previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."]
+
+ harvest_report = state.get("harvest_report") or BuildHarvestReport()
+ target_package_name = target_package.name if target_package else "unknown"
+
+ with tracer.push_active_function("observation_node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span:
+ tool_output_for_llm = tool_message.content
+
+ # Step 1: Comprehension - extract findings from tool output
+ comp_prompt = L2_COMPREHENSION_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ vulnerability_intel=vulnerability_intel_str,
+ disabled_features=", ".join(harvest_report.disabled_features) if harvest_report.disabled_features else "None",
+ spec_disabled_features=", ".join(harvest_report.spec_disabled_features) if harvest_report.spec_disabled_features else "None",
+ tool_used=tool_used,
+ tool_input=tool_input_detail,
+ last_thought=last_thought_text,
+ tool_output=tool_output_for_llm[:8000],
+ )
+ code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)])
+ findings_text = "\n".join(f"- {f}" for f in code_findings.findings)
+
+ # Step 2: Memory update - merge findings into cumulative memory
+ mem_prompt = L2_MEMORY_UPDATE_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory,
+ findings=findings_text,
+ tool_outcome=code_findings.tool_outcome,
+ )
+ new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)])
+
+ messages = state["messages"]
+ active_prompt = state.get("runtime_prompt") or ""
+ estimated = _estimate_tokens(active_prompt, messages, new_observation)
+ orig_estimated = estimated
+ prune_messages = []
+ if estimated > config.context_window_token_limit and len(messages) > 3:
+ with tracer.push_active_function("context_pruning", input_data={"estimated_tokens": estimated, "limit": config.context_window_token_limit}) as prune_span:
+
+ for msg in messages:
+ prune_messages.append(RemoveMessage(id=msg.id))
+ estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0
+ if estimated <= config.context_window_token_limit:
+ break
+ logger.info(
+ "Context pruning: removed %d messages, estimated tokens now ~%d (limit %d)",
+ len(prune_messages), estimated, config.context_window_token_limit,
+ )
+ prune_span.set_output({
+ "pruning_triggered": len(prune_messages) > 0,
+ "messages_pruned": len(prune_messages),
+ "tokens_before": orig_estimated,
+ "tokens_after": estimated,
+ })
+
+
+ span.set_output({
+ "last_thought_text": last_thought_text,
+ "tool_output_for_llm": tool_output_for_llm[:500],
+ "findings": code_findings.findings,
+ "tool_outcome": code_findings.tool_outcome,
+ "new_memory": new_observation.memory,
+ "amount_of_orig_tokens": orig_estimated,
+ "amount_of_estimated_tokens": estimated,
+ })
+ return {
+ "messages": prune_messages,
+ "observation": new_observation,
+ }
+
+ async def forced_finish_node(state: BuildAgentState) -> dict:
+ """Force finish when max iterations reached.
+
+ Invokes the LLM with FORCED_FINISH_PROMPT to generate a final answer
+ based on evidence gathered so far.
+ """
+ step_num = state.get("step", 0)
+ with tracer.push_active_function("forced_finish_node", input_data=f"step:{step_num}") as span:
+ try:
+ active_prompt = state.get("runtime_prompt") or ""
+ messages = [SystemMessage(content=active_prompt)] + state["messages"]
+ messages.append(HumanMessage(content=FORCED_FINISH_PROMPT))
+
+ obs = state.get("observation")
+ if obs is not None and obs.memory:
+ memory_context = "\n".join(f"- {m}" for m in obs.memory)
+ messages.append(SystemMessage(content=f"KNOWLEDGE:\n{memory_context}"))
+
+ response: CheckerThought = await thought_llm.ainvoke(messages)
+
+ if response.mode == "finish" and response.final_answer:
+ ai_message = AIMessage(content=response.final_answer)
+ final_answer = response.final_answer
+ else:
+ final_answer = "Unable to determine compilation status within iteration limit."
+ ai_message = AIMessage(content=final_answer)
+ response = CheckerThought(
+ thought=response.thought or "Max steps exceeded",
+ mode="finish",
+ actions=None,
+ final_answer=final_answer,
+ )
+
+ span.set_output({"final_answer_length": len(final_answer), "step": step_num})
+ return {
+ "messages": [ai_message],
+ "thought": response,
+ "step": step_num,
+ "max_steps": state.get("max_steps", config.max_iterations),
+ "observation": state.get("observation"),
+ "output": final_answer,
+ }
+ except Exception as e:
+ logger.exception("forced_finish_node failed at step %d", step_num)
+ span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num})
+ raise
+
+ async def should_continue(state: BuildAgentState) -> str:
+ """Route based on thought mode."""
+ thought = state.get("thought")
+ if thought is not None and thought.mode == "finish":
+ return INVESTIGATION_PHASE_NODE
+ if state.get("step", 0) >= state.get("max_steps", config.max_iterations):
+ return FORCED_FINISH_NODE
+ return TOOL_NODE
+
+
+ async def is_investigation_finished(state: BuildAgentState) -> str:
+ """Check if the investigation is finished."""
+ if len(investigation_stack) == 0:
+ return END
+ return THOUGHT_NODE
+
+ async def investigation_phase_node(state: BuildAgentState) -> dict:
+ """Determine the next investigation phase."""
+ if len(investigation_stack) == 0:
+ raise ValueError("Investigation stack is empty")
+
+ final_answer = None
+ thought = state.get("thought")
+ if thought and thought.mode == "finish":
+ final_answer = thought.final_answer
+
+ current_phase = investigation_stack[-1]
+ with tracer.push_active_function("investigation_phase_node", input_data=f"phase :{current_phase}") as span:
+ investigation_stack.pop()
+ if current_phase == L2InvestigationPhase.CONFIGURATION:
+ verdict: L2CompileVerdictExtraction = await compilation_verdict_llm.ainvoke([SystemMessage(content=L2_COMPILATION_VERDICT_PROMPT.format(final_answer=final_answer))])
+ span.set_output({
+ "compilation_status": verdict.compilation_status,
+ "confidence": verdict.confidence,
+ "reasoning": verdict.reasoning,
+ })
+ if verdict.compilation_status == "not_compiled":
+ return {"L2CompileVerdict": verdict}
+ else:
+ # Check for architecture mismatch before proceeding to hardening
+ harvest_report = state.get("harvest_report")
+ l1_arch = vulnerability_intel.affected_architectures if vulnerability_intel else "both"
+ build_arch = harvest_report.build_architecture if harvest_report else "unknown"
+
+ if l1_arch != "both" and build_arch != "unknown" and l1_arch != build_arch:
+ logger.info(
+ "investigation_phase_node: Architecture mismatch - CVE affects %s, build is %s. Skipping hardening.",
+ l1_arch, build_arch
+ )
+ # Override verdict with architecture mismatch reasoning
+ arch_verdict = L2CompileVerdictExtraction(
+ compilation_status="compiled",
+ confidence=1.0,
+ reasoning=f"Architecture mismatch: CVE affects {l1_arch} only, build is {build_arch}. Vulnerability cannot occur on this architecture."
+ )
+ span.set_output({
+ "architecture_mismatch": True,
+ "l1_arch": l1_arch,
+ "build_arch": build_arch,
+ })
+ # Clear investigation stack to skip hardening phase
+ investigation_stack.clear()
+ return {"L2CompileVerdict": arch_verdict}
+
+ # Normal path: proceed to hardening phase
+ preprocess_data = state.get("harvest_report") or BuildHarvestReport()
+ runtime_prompt = await build_runtime_prompt(preprocess_data)
+ messages = state["messages"]
+ prune_messages = []
+ for msg in messages:
+ prune_messages.append(RemoveMessage(id=msg.id))
+ span.set_output({
+ "runtime_prompt": runtime_prompt,
+ })
+ return {
+ "runtime_prompt": runtime_prompt,
+ "thought": None,
+ "observation": None,
+ "step": 0,
+ "messages": prune_messages,
+ "L2CompileVerdict": verdict,
+ }
+ else:
+ #state that run was hardening need to extract the hardening verdict
+ verdict: L2HardeningVerdictExtraction = await hardening_verdict_llm.ainvoke([SystemMessage(content=L2_HARDENING_VERDICT_PROMPT.format(final_answer=final_answer))])
+ span.set_output({
+ "hardening_status": verdict.hardening_status,
+ "confidence": verdict.confidence,
+ "reasoning": verdict.reasoning,
+ })
+ return {
+ "L2HardeningVerdict": verdict,
+ }
+ # -------------------------------------------------------------------------
+ # Build graph
+ # -------------------------------------------------------------------------
+
+ flow = StateGraph(BuildAgentState)
+
+ flow.add_node(DATA_HARVEST_NODE, data_harvest_node)
+ flow.add_node(THOUGHT_NODE, thought_node)
+ flow.add_node(FORCED_FINISH_NODE, forced_finish_node)
+ flow.add_node(OBSERVATION_NODE, observation_node)
+ flow.add_node(TOOL_NODE, tools_node)
+ flow.add_node(INVESTIGATION_PHASE_NODE, investigation_phase_node)
+
+ flow.add_edge(START, DATA_HARVEST_NODE)
+ flow.add_edge(DATA_HARVEST_NODE, THOUGHT_NODE)
+ edge_map = {INVESTIGATION_PHASE_NODE: INVESTIGATION_PHASE_NODE, FORCED_FINISH_NODE: FORCED_FINISH_NODE, TOOL_NODE: TOOL_NODE}
+ flow.add_conditional_edges(THOUGHT_NODE, should_continue, edge_map)
+ flow.add_edge(TOOL_NODE, OBSERVATION_NODE)
+ flow.add_edge(OBSERVATION_NODE, THOUGHT_NODE)
+ flow.add_edge(FORCED_FINISH_NODE, INVESTIGATION_PHASE_NODE)
+ flow.add_conditional_edges(INVESTIGATION_PHASE_NODE, is_investigation_finished, {END: END, THOUGHT_NODE: THOUGHT_NODE})
+
+ app = flow.compile()
+ return app
+
+
+@register_function(config_type=CVEBuildAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def cve_build_agent(config: CVEBuildAgentConfig, builder: Builder):
+ """Level 2 Build Agent entry point."""
+
+ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Run L2 build analysis and populate l2_result on checker_context."""
+ trace_id.set(message.input.scan.id)
+ tracer = Context.get()
+
+ # Set ctx_state for tools
+ from types import SimpleNamespace
+ workflow_state = SimpleNamespace(original_input=message, info=message.info)
+ ctx_state.set(workflow_state)
+
+ logger.info("build_agent: starting L2 investigation")
+
+ ctx = message.info.checker_context
+ if not ctx or not ctx.l1_result:
+ logger.warning("build_agent: no L1 result available, skipping L2")
+ return message
+
+ if not ctx.artifacts or not ctx.artifacts.build_log_path:
+ logger.warning("build_agent: no build log available, skipping L2")
+ return message
+
+ # Build and run the graph
+ build_agent_graph = await create_graph_build_agent(config, builder, message, tracer)
+ initial_state: BuildAgentState = {
+ "messages": [HumanMessage(content="Begin L2 build analysis")],
+ "step": 0,
+ "max_steps": config.max_iterations,
+ }
+
+ with tracer.push_active_function("l2_build_agent_graph", input_data=initial_state["messages"][0].content):
+ # Each phase: (max_iterations * 3 react nodes) + data_harvest/forced_finish/investigation_phase
+ # Two phases (CONFIG + HARDENING) when code is compiled
+ steps_per_phase = (config.max_iterations * 3) + 4
+ recursion_limit = steps_per_phase * 2 + 5 # buffer for edge cases
+ result = await build_agent_graph.ainvoke(
+ initial_state,
+ config={"recursion_limit": recursion_limit},
+ )
+
+ logger.info("build_agent: L2 investigation finished")
+
+ # Extract verdict from result
+ compile_verdict = result.get("L2CompileVerdict") or None
+ hardening_verdict = result.get("L2HardeningVerdict") or None
+ hardening_reason = None
+ hardening_flags = []
+ if compile_verdict.compilation_status == "not_compiled":
+ hardening_relevant = False
+ l2_override_verdict = "not_vulnerable"
+ elif hardening_verdict is None:
+ # Architecture mismatch case: compiled but hardening was skipped
+ hardening_relevant = False
+ l2_override_verdict = "not_vulnerable"
+ else:
+ hardening_relevant = True
+ hardening_reason = hardening_verdict.reasoning
+ hardening_flags = hardening_verdict.hardening_flags or []
+ if hardening_verdict.hardening_status == "mitigated":
+ l2_override_verdict = "vulnerable_mitigated"
+ else:
+ l2_override_verdict = None
+
+ # Build L2 result
+ l2_result = L2BuildResult(
+ compilation_status=compile_verdict.compilation_status,
+ compilation_confidence=compile_verdict.confidence,
+ compilation_evidence=compile_verdict.reasoning,
+ hardening_relevant=hardening_relevant,
+ hardening_flags=hardening_flags,
+ hardening_rationale=hardening_reason,
+ l2_override_verdict=l2_override_verdict,
+ )
+
+ with tracer.push_active_function(
+ "l2_agent_finish",
+ input_data={"compilation_status": l2_result.compilation_status},
+ ) as span:
+ span.set_output({
+ "compilation_status": l2_result.compilation_status,
+ "compilation_confidence": l2_result.compilation_confidence,
+ "l2_override_verdict": l2_result.l2_override_verdict,
+ })
+
+ # Store result on checker_context
+ if message.info.checker_context is not None:
+ message.info.checker_context.l2_result = l2_result
+ else:
+ logger.warning("build_agent: checker_context is None, cannot store l2_result")
+
+ logger.info(
+ "build_agent: L2 result - status=%s, confidence=%.2f, override=%s",
+ l2_result.compilation_status,
+ l2_result.compilation_confidence,
+ l2_result.l2_override_verdict,
+ )
+
+ return message
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ description="Level 2 Build Agent: analyzes build artifacts for compilation status and hardening",
+ )
diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py
new file mode 100644
index 000000000..80f020ff4
--- /dev/null
+++ b/src/vuln_analysis/functions/cve_checker_report.py
@@ -0,0 +1,696 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+CVE Checker Report Generation Function.
+
+This module provides the report generation node for the L1/L2 pipeline.
+It consumes L1InvestigationResult (and optionally L2BuildResult) from
+checker_context and produces the final AgentMorpheusOutput.
+"""
+
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
+
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id
+from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
+from exploit_iq_commons.data_models.checker_status import L1InvestigationResult, L2BuildResult
+
+from nat.builder.context import Context
+from vuln_analysis.data_models.output import (
+ AgentMorpheusEngineOutput,
+ AgentMorpheusOutput,
+ ChecklistItemOutput,
+ JustificationOutput,
+ OutputPayload,
+)
+from vuln_analysis.functions.code_agent_graph_defs import (
+ CodeAgentReport,
+ CodeSnippet,
+ DownstreamSearchReport,
+ UpstreamSearchReport,
+ generate_code_agent_report,
+)
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+
+_StatusLiteral = Literal["TRUE", "FALSE", "UNKNOWN"]
+
+_JUSTIFICATION_LABEL_TO_STATUS: dict[str, _StatusLiteral] = {
+ "code_not_present": "FALSE",
+ "code_not_reachable": "FALSE",
+ "protected_by_mitigating_control": "FALSE",
+ "protected_by_compiler": "FALSE",
+ "requires_environment": "FALSE",
+ "vulnerable": "TRUE",
+ "uncertain": "UNKNOWN",
+}
+
+
+_POLICY_MAX_RPM_LIST_ITEMS = 5
+_POLICY_RHSA_STATEMENT_CAP = 400
+_POLICY_MAX_PACKAGE_STATE_ITEMS = 8
+
+
+@dataclass
+class ReportBlocks:
+ """Formatted report blocks - each piece of data formatted once for UI output."""
+
+ # Package info
+ package_name: str
+ package_version: str
+ package_release: str
+ package_arch: str
+
+ # CVE info
+ cve_id: str
+ cve_description: str
+
+ # Verdict
+ justification_label: str
+ executive_summary: str
+
+ # Evidence
+ evidence_chain: list[str]
+ affected_files: list[str]
+
+ # Extracted facts from downstream search
+ patch_file_name: str
+ spec_patch_directives: list[str]
+ build_log_evidence: str
+ spec_changelog_cve_lines: str
+ spec_version_line: str
+ spec_source0_line: str
+ is_patch_file_available: bool
+ is_patch_in_spec_file: bool
+ is_patch_applied_in_build: bool
+
+ # Code snippets
+ vulnerable_snippets: list[CodeSnippet]
+ fix_snippets: list[CodeSnippet]
+
+ @property
+ def package_header_md(self) -> str:
+ """Format package metadata as Markdown header."""
+ version_release = f"{self.package_version}-{self.package_release}" if self.package_release else self.package_version
+ return f"**Package:** `{self.package_name}-{version_release}` ({self.package_arch})"
+
+ @property
+ def evidence_chain_md(self) -> str:
+ """Format audit-ready evidence chain as Markdown.
+
+ Structure follows the 3-pillar model for TARGET package audit readability:
+ - Status Summary table for at-a-glance verification
+ - Target Patch Metadata (the "What")
+ - Integration Evidence (the "Plan" - spec file directives)
+ - Execution Evidence (the "Action" - build logs)
+ - Source Validation (the "Result" - L1 agent findings)
+ """
+ lines: list[str] = ["## Evidence Chain", ""]
+
+ # Categorize evidence items
+ patch_keywords = ("patch", "spec", "patchn", "directive", "target", "reference")
+ build_keywords = ("build", "applied", "log")
+ code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent")
+
+ patch_evidence: list[str] = []
+ build_evidence: list[str] = []
+ code_evidence: list[str] = []
+ other_evidence: list[str] = []
+
+ for ev in self.evidence_chain[:8]:
+ ev_lower = ev.lower()
+ if any(kw in ev_lower for kw in patch_keywords):
+ patch_evidence.append(ev)
+ elif any(kw in ev_lower for kw in build_keywords):
+ build_evidence.append(ev)
+ elif any(kw in ev_lower for kw in code_keywords):
+ code_evidence.append(ev)
+ else:
+ other_evidence.append(ev)
+
+ # Status Summary - at-a-glance verification of TARGET package (using bullets for UI compatibility)
+ lines.append("### Status Summary (Target Package)")
+ lines.append("")
+ patch_check = "PASS" if self.is_patch_file_available else "FAIL"
+ spec_check = "PASS" if self.is_patch_in_spec_file else "FAIL"
+ build_check = "PASS" if self.is_patch_applied_in_build else "FAIL"
+ lines.append(f"- **Target patch file exists:** {patch_check}")
+ lines.append(f"- **Referenced in target spec:** {spec_check}")
+ lines.append(f"- **Applied in target build:** {build_check}")
+ lines.append("")
+
+ # Section 1: Target Patch Metadata
+ if self.patch_file_name or patch_evidence:
+ lines.append("### 1. Patch Metadata")
+ lines.append("")
+ if self.patch_file_name:
+ lines.append(f"- **Target patch file:** `{self.patch_file_name}`")
+ for ev in patch_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ # Section 2: Integration Evidence (Spec File) - the "Plan"
+ has_integration = self.spec_patch_directives or self.spec_changelog_cve_lines.strip()
+ if has_integration:
+ lines.append("### 2. Integration Evidence (Spec File)")
+ lines.append("")
+
+ if self.spec_patch_directives:
+ declarations = [d for d in self.spec_patch_directives if d.strip().startswith("Patch")]
+ applications = [d for d in self.spec_patch_directives if d.strip().startswith("%patch")]
+
+ if declarations:
+ lines.append("**Patch declaration:**")
+ lines.append("")
+ lines.append("```ini")
+ lines.append("\n".join(declarations))
+ lines.append("```")
+ lines.append("")
+
+ if applications:
+ lines.append("**Patch application directive:**")
+ lines.append("")
+ lines.append("```ini")
+ lines.append("\n".join(applications))
+ lines.append("```")
+ lines.append("")
+
+ if self.spec_changelog_cve_lines.strip():
+ lines.append("**Changelog entry:**")
+ lines.append("")
+ lines.append("```ini")
+ lines.append(self.spec_changelog_cve_lines.strip())
+ lines.append("```")
+ lines.append("")
+
+ # Section 3: Execution Evidence (Build Log) - the "Action"
+ if self.build_log_evidence.strip() or build_evidence:
+ lines.append("### 3. Execution Evidence (Build Log)")
+ lines.append("")
+
+ for ev in build_evidence:
+ lines.append(f"- {ev}")
+ if build_evidence:
+ lines.append("")
+
+ if self.build_log_evidence.strip():
+ lines.append("**Build output:**")
+ lines.append("")
+ lines.append("```bash")
+ lines.append(self.build_log_evidence.strip())
+ lines.append("```")
+ lines.append("")
+
+ # Section 4: Source Validation - the "Result"
+ if code_evidence:
+ lines.append("### 4. Source Validation")
+ lines.append("")
+ for ev in code_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ # Section 5: Tarball Reference
+ if self.spec_version_line or self.spec_source0_line:
+ lines.append("### 5. Tarball Reference")
+ lines.append("")
+ if self.spec_version_line:
+ lines.append(f"- `{self.spec_version_line}`")
+ if self.spec_source0_line:
+ lines.append(f"- `{self.spec_source0_line}`")
+ lines.append("")
+
+ # Additional evidence (uncategorized)
+ if other_evidence:
+ lines.append("### Additional Evidence")
+ lines.append("")
+ for ev in other_evidence:
+ lines.append(f"- {ev}")
+ lines.append("")
+
+ return "\n".join(lines)
+
+ @property
+ def affected_files_md(self) -> str:
+ """Format affected files as Markdown list."""
+ if not self.affected_files:
+ return ""
+ lines = ["**Affected Files:**"]
+ for f in self.affected_files[:10]:
+ lines.append(f"- `{f}`")
+ return "\n".join(lines)
+
+def _build_report_blocks(
+ message: AgentMorpheusEngineInput,
+ code_agent_report: CodeAgentReport,
+ cve_description: str,
+ downstream_report: DownstreamSearchReport | None,
+) -> ReportBlocks:
+ """Extract and format all report data into blocks."""
+ target_package = message.input.image.target_package
+
+ # Extract code snippets by type
+ vulnerable_snippets = [s for s in code_agent_report.code_snippets if s.snippet_type == "vulnerable"]
+ fix_snippets = [s for s in code_agent_report.code_snippets if s.snippet_type == "fix"]
+
+ # Extract facts from downstream report
+ patch_file_name = ""
+ spec_patch_directives: list[str] = []
+ build_log_evidence = ""
+ spec_changelog_cve_lines = ""
+ spec_version_line = ""
+ spec_source0_line = ""
+ is_patch_file_available = False
+ is_patch_in_spec_file = False
+ is_patch_applied_in_build = False
+
+ if downstream_report:
+ patch_file_name = downstream_report.patch_file_name or ""
+ spec_patch_directives = downstream_report.spec_patch_directives_for_cve or []
+ build_log_evidence = downstream_report.build_log_patch_applied or ""
+ spec_changelog_cve_lines = downstream_report.spec_changelog_cve_lines or ""
+ spec_version_line = downstream_report.spec_version_line or ""
+ spec_source0_line = downstream_report.spec_source0_line or ""
+ is_patch_file_available = downstream_report.is_patch_file_available
+ is_patch_in_spec_file = downstream_report.is_patch_in_spec_file
+ is_patch_applied_in_build = downstream_report.is_patch_applied_in_build
+
+ return ReportBlocks(
+ package_name=target_package.name if target_package else "unknown",
+ package_version=target_package.version or "" if target_package else "",
+ package_release=target_package.release or "" if target_package else "",
+ package_arch=target_package.arch or "x86_64" if target_package else "x86_64",
+ cve_id=message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "",
+ cve_description=cve_description,
+ justification_label=code_agent_report.justification_label,
+ executive_summary=code_agent_report.executive_summary,
+ evidence_chain=list(code_agent_report.evidence_chain),
+ affected_files=list(code_agent_report.affected_files),
+ patch_file_name=patch_file_name,
+ spec_patch_directives=spec_patch_directives,
+ build_log_evidence=build_log_evidence,
+ spec_changelog_cve_lines=spec_changelog_cve_lines,
+ spec_version_line=spec_version_line,
+ spec_source0_line=spec_source0_line,
+ is_patch_file_available=is_patch_file_available,
+ is_patch_in_spec_file=is_patch_in_spec_file,
+ is_patch_applied_in_build=is_patch_applied_in_build,
+ vulnerable_snippets=vulnerable_snippets,
+ fix_snippets=fix_snippets,
+ )
+
+
+def _format_policy_context_for_report(
+ *,
+ target_nvr: str,
+ identify_result,
+ intel,
+) -> str:
+ """Build a context block for the LLM prompt covering NVR posture and RHSA excerpts."""
+ lines: list[str] = []
+
+ if target_nvr:
+ lines.append(f"**Scanned target NVR:** `{target_nvr}`")
+
+ if identify_result:
+ affected = identify_result.affected_rpm_list or []
+ fixed = identify_result.fixed_rpm_list or []
+
+ if affected:
+ shown = affected[:_POLICY_MAX_RPM_LIST_ITEMS]
+ suffix = f" (+ {len(affected) - len(shown)} more)" if len(affected) > len(shown) else ""
+ lines.append(f"**Affected NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}")
+ lines.append(f" - is_target_package_affected: `{identify_result.is_target_package_affected.value}`")
+
+ if fixed:
+ shown = fixed[:_POLICY_MAX_RPM_LIST_ITEMS]
+ suffix = f" (+ {len(fixed) - len(shown)} more)" if len(fixed) > len(shown) else ""
+ lines.append(f"**Fixed NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}")
+ lines.append(f" - is_target_package_fixed: `{identify_result.is_target_package_fixed.value}`")
+
+ rhsa = None
+ if intel and len(intel) > 0:
+ rhsa = intel[0].rhsa
+
+ if rhsa:
+ if rhsa.statement:
+ stmt = rhsa.statement
+ if len(stmt) > _POLICY_RHSA_STATEMENT_CAP:
+ stmt = stmt[:_POLICY_RHSA_STATEMENT_CAP] + " …"
+ lines.append(f"**RHSA statement excerpt:** {stmt}")
+
+ if rhsa.upstream_fix:
+ lines.append(f"**RHSA upstream_fix:** `{rhsa.upstream_fix}`")
+
+ pkg_states = rhsa.package_state or []
+ if pkg_states:
+ lines.append("**RHSA package_state:**")
+ for ps in pkg_states[:_POLICY_MAX_PACKAGE_STATE_ITEMS]:
+ parts = []
+ if ps.product_name:
+ parts.append(ps.product_name)
+ if ps.package_name:
+ parts.append(f"pkg={ps.package_name}")
+ if ps.fix_state:
+ parts.append(f"fix_state={ps.fix_state}")
+ if parts:
+ lines.append(f" - {' | '.join(parts)}")
+ if len(pkg_states) > _POLICY_MAX_PACKAGE_STATE_ITEMS:
+ lines.append(f" - (+ {len(pkg_states) - _POLICY_MAX_PACKAGE_STATE_ITEMS} more)")
+
+ return "\n".join(lines)
+
+
+def _apply_l2_verdict(
+ report: CodeAgentReport,
+ l2_result: L2BuildResult,
+) -> CodeAgentReport:
+ """Apply L2 Build Agent verdict overrides to the CodeAgentReport.
+
+ .. deprecated::
+ This function is deprecated. L2 results are now passed directly to
+ `generate_code_agent_report()` so the LLM can synthesize L1 and L2
+ findings into a cohesive narrative. This function will be removed
+ in a future release.
+ """
+ warnings.warn(
+ "_apply_l2_verdict is deprecated. L2 results are now integrated "
+ "directly into the LLM prompt via generate_code_agent_report().",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ if l2_result.l2_override_verdict is None:
+ return report
+
+ updated_fields = {}
+
+ if l2_result.l2_override_verdict == "not_vulnerable":
+ evidence = l2_result.compilation_evidence or ""
+ if "Architecture mismatch" in evidence:
+ # Architecture-based not affected - vulnerability cannot occur on this platform
+ updated_fields["justification_label"] = "requires_environment"
+ updated_fields["executive_summary"] = (
+ f"{report.executive_summary}\n\n"
+ f"**L2 Override:** {evidence} "
+ f"Vulnerability condition cannot occur on this architecture."
+ )
+ elif l2_result.compilation_status == "not_compiled":
+ updated_fields["justification_label"] = "code_not_present"
+ updated_fields["executive_summary"] = (
+ f"{report.executive_summary}\n\n"
+ f"**L2 Override:** Vulnerable code is NOT compiled into the binary. "
+ f"Evidence: {evidence or 'Build analysis confirmed exclusion.'}"
+ )
+ else:
+ updated_fields["justification_label"] = "code_not_reachable"
+ updated_fields["executive_summary"] = (
+ f"{report.executive_summary}\n\n"
+ f"**L2 Override:** Code determined not vulnerable by L2 analysis."
+ )
+
+ elif l2_result.l2_override_verdict == "vulnerable_mitigated":
+ if l2_result.hardening_relevant and l2_result.hardening_flags:
+ updated_fields["justification_label"] = "protected_by_compiler"
+ flags_str = ", ".join(l2_result.hardening_flags[:5])
+ updated_fields["executive_summary"] = (
+ f"{report.executive_summary}\n\n"
+ f"**L2 Override:** Vulnerability mitigated by compiler hardening flags: {flags_str}. "
+ f"Rationale: {l2_result.hardening_rationale or 'Hardening flags provide protection.'}"
+ )
+ else:
+ updated_fields["justification_label"] = "protected_by_mitigating_control"
+ updated_fields["executive_summary"] = (
+ f"{report.executive_summary}\n\n"
+ f"**L2 Override:** Vulnerability mitigated by build-time controls."
+ )
+
+ if updated_fields:
+ evidence = list(report.evidence_chain)
+ evidence.append(f"L2 Build Agent: {l2_result.l2_override_verdict}")
+ if l2_result.compilation_evidence:
+ evidence.append(f"L2 compilation evidence: {l2_result.compilation_evidence}")
+ if l2_result.hardening_rationale:
+ evidence.append(f"L2 hardening rationale: {l2_result.hardening_rationale}")
+ updated_fields["evidence_chain"] = evidence
+
+ return report.model_copy(update=updated_fields)
+
+ return report
+
+
+def _build_analysis(
+ message: AgentMorpheusEngineInput,
+ code_agent_report: CodeAgentReport,
+ intel_score: int,
+ cve_description: str = "",
+ downstream_report: DownstreamSearchReport | None = None,
+) -> list[AgentMorpheusEngineOutput]:
+ """Build the final analysis output from the code agent report using ReportBlocks.
+
+ Output structure (no duplication):
+ - summary: Package header + executive summary (brief)
+ - reason: Evidence chain (with extracted facts) + Recommendation (details)
+ - checklist: CVE desc, Affected files, Vulnerable code, Fix code (expandable items)
+ """
+ # Build report blocks from inputs
+ blocks = _build_report_blocks(message, code_agent_report, cve_description, downstream_report)
+
+ label = blocks.justification_label
+ status: _StatusLiteral = _JUSTIFICATION_LABEL_TO_STATUS.get(label, "UNKNOWN")
+
+ # Build summary: Package header + executive summary
+ summary = f"{blocks.package_header_md}\n\n{blocks.executive_summary}"
+
+ # Build reason: Evidence chain (with extracted facts) + Recommendation
+ # NO code snippets, NO affected files, NO limitations (per plan)
+ reason_parts: list[str] = [
+ blocks.evidence_chain_md,
+ ]
+ reason = "\n".join(reason_parts)
+
+ # Build checklist items in order: CVE desc, Affected files, Vulnerable code, Fix code
+ checklist_items: list[ChecklistItemOutput] = []
+
+ # [0] CVE Description
+ if blocks.cve_description:
+ checklist_items.append(ChecklistItemOutput(
+ input="CVE Description",
+ response=blocks.cve_description,
+ ))
+
+ # [1] Affected Files
+ if blocks.affected_files:
+ files_md = "\n".join(f"- `{f}`" for f in blocks.affected_files[:10])
+ checklist_items.append(ChecklistItemOutput(
+ input="Affected Files",
+ response=files_md,
+ ))
+
+ # [2] Vulnerable Code
+ if blocks.vulnerable_snippets:
+ snippet = blocks.vulnerable_snippets[0]
+ checklist_items.append(ChecklistItemOutput(
+ input=f"Vulnerable Code (`{snippet.file_path}`)",
+ response=f"Line {snippet.line_number or 'N/A'}:\n```\n{snippet.code}\n```",
+ ))
+
+ # [3] Fix Code
+ if blocks.fix_snippets:
+ snippet = blocks.fix_snippets[0]
+ checklist_items.append(ChecklistItemOutput(
+ input=f"Fix Code (`{snippet.file_path}`)",
+ response=f"Line {snippet.line_number or 'N/A'}:\n```\n{snippet.code}\n```",
+ ))
+
+ return [
+ AgentMorpheusEngineOutput(
+ vuln_id=intel.vuln_id,
+ checklist=checklist_items,
+ summary=summary,
+ justification=JustificationOutput(
+ label=label,
+ reason=reason,
+ status=status,
+ ),
+ intel_score=intel_score,
+ cvss=None,
+ )
+ for intel in (message.info.intel if message.info and message.info.intel else [])
+ ]
+
+
+class CVECheckerReportConfig(FunctionBaseConfig, name="cve_checker_report"):
+ """Configuration for the CVE Checker Report generation function."""
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts.",
+ )
+ llm_name: str = Field(description="The LLM model to use for report generation.")
+
+
+@register_function(config_type=CVECheckerReportConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def cve_checker_report(config: CVECheckerReportConfig, builder: Builder):
+ """Report generation function for the L1/L2 checker pipeline."""
+
+ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput:
+ """Generate the final checker report from L1 (and optionally L2) results."""
+ trace_id.set(message.input.scan.id)
+ tracer = Context.get()
+
+ logger.info("cve_checker_report: starting report generation")
+
+ ctx = message.info.checker_context
+ if ctx is None or ctx.l1_result is None:
+ logger.error("cve_checker_report: no L1 result available")
+ return AgentMorpheusOutput(
+ input=message.input,
+ info=message.info,
+ output=OutputPayload(
+ analysis=[
+ AgentMorpheusEngineOutput(
+ vuln_id=intel.vuln_id,
+ checklist=[],
+ summary="Rpm scanning investigation did not produce results.",
+ justification=JustificationOutput(
+ label="uncertain",
+ reason="Rpm scanning investigation did not produce results.",
+ status="UNKNOWN",
+ ),
+ intel_score=0,
+ cvss=None,
+ )
+ for intel in (message.info.intel if message.info and message.info.intel else [])
+ ],
+ vex=None,
+ ),
+ )
+
+ l1_result = ctx.l1_result
+ l2_result = ctx.l2_result
+
+ downstream_report: DownstreamSearchReport | None = None
+ upstream_report: UpstreamSearchReport | None = None
+
+ if l1_result.downstream_report:
+ downstream_report = DownstreamSearchReport.model_validate(l1_result.downstream_report)
+ if l1_result.upstream_report:
+ upstream_report = UpstreamSearchReport.model_validate(l1_result.upstream_report)
+
+ vuln_id = message.input.scan.vulns[0].vuln_id
+ target_package = message.input.image.target_package
+ target_package_name = target_package.name if target_package else "unknown"
+ intel = message.info.intel
+
+ descriptions: list[tuple[str, str]] = []
+ if intel:
+ a_intel = intel[0]
+ if a_intel.ghsa:
+ cve_text = a_intel.ghsa.description or a_intel.ghsa.summary or ""
+ if cve_text:
+ descriptions.append(("ghsa", cve_text))
+ if a_intel.ubuntu and a_intel.ubuntu.description:
+ descriptions.append(("ubuntu", a_intel.ubuntu.description))
+
+ version = (target_package.version or "") if target_package else ""
+ release = (target_package.release or "") if target_package else ""
+ target_nvr = f"{target_package_name}-{version}-{release}" if target_package_name else ""
+
+ policy_context = _format_policy_context_for_report(
+ target_nvr=target_nvr,
+ identify_result=ctx.identify_result,
+ intel=intel,
+ )
+
+ llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+
+ with tracer.push_active_function("generate_code_agent_report", input_data={"vuln_id": vuln_id}):
+ code_agent_report: CodeAgentReport = await generate_code_agent_report(
+ llm=llm,
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ descriptions=descriptions,
+ downstream_report=downstream_report,
+ upstream_report=upstream_report,
+ l1_agent_answer=l1_result.l1_agent_answer,
+ tracer=tracer,
+ policy_context=policy_context,
+ l2_result=l2_result,
+ )
+
+ source_key = ctx.source_key
+ if source_key:
+ report_dir = Path(config.base_checker_dir) / source_key / "report"
+ report_dir.mkdir(parents=True, exist_ok=True)
+ suffix = f"-{target_package_name}" if target_package_name else ""
+ if version:
+ suffix += f"-{version}"
+ if release:
+ suffix += f"-{release}"
+ report_path = report_dir / f"L1_report_{vuln_id}{suffix}.md"
+ report_path.write_text(code_agent_report.to_markdown(
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ version=version,
+ release=release,
+ downstream_report=downstream_report,
+ ))
+ logger.info("cve_checker_report: wrote report to %s", report_path)
+
+ with tracer.push_active_function(
+ "report_finish",
+ input_data={
+ "justification_label": code_agent_report.justification_label,
+ "has_l2_override": l2_result is not None and l2_result.l2_override_verdict is not None,
+ },
+ ) as span:
+ span.set_output({
+ "executive_summary": code_agent_report.executive_summary,
+ "affected_files": code_agent_report.affected_files,
+ })
+ intel_score = intel[0].intel_score
+
+ cve_description = ""
+ if descriptions:
+ cve_description = descriptions[0][1]
+
+ return AgentMorpheusOutput(
+ input=message.input,
+ info=message.info,
+ output=OutputPayload(
+ analysis=_build_analysis(
+ message,
+ code_agent_report,
+ intel_score,
+ cve_description=cve_description,
+ downstream_report=downstream_report,
+ ),
+ vex=None,
+ ),
+ )
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ description="Generate final checker report from L1/L2 investigation results",
+ )
diff --git a/src/vuln_analysis/functions/cve_checker_segmentation.py b/src/vuln_analysis/functions/cve_checker_segmentation.py
new file mode 100644
index 000000000..da00a36d9
--- /dev/null
+++ b/src/vuln_analysis/functions/cve_checker_segmentation.py
@@ -0,0 +1,176 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+from pathlib import Path
+
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from langchain.docstore.document import Document
+from exploit_iq_commons.utils.document_embedding import MultiLanguageRecursiveCharacterTextSplitter,ExtendedLanguageParser
+from langchain_community.document_loaders.generic import GenericLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+logger = LoggingFactory.get_agent_logger(__name__)
+
+_BUILD_FILE_NAMES = {"Makefile", "GNUmakefile", "configure"}
+LANG_PARSER_EXTENSIONS = {".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", ".ts"}
+TEXT_FILE_EXTENSIONS = {".spec", ".patch", ".conf", ".cfg", ".sh", ".m4",".ac", ".am", ".in", ".txt", ".md", ".rst"}
+class RpmDocumentEmbedding:
+ def __init__(self, source_dir: Path, chunk_size: int = 800, chunk_overlap: int = 160):
+ self.source_dir = source_dir
+ self.lang_splitter = MultiLanguageRecursiveCharacterTextSplitter(
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap,
+ )
+ self.text_splitter = RecursiveCharacterTextSplitter(
+ chunk_size=1000, chunk_overlap=200,
+ )
+
+ def load_and_chunk_code(self) -> list[Document]:
+ loader = GenericLoader.from_filesystem(
+ self.source_dir,
+ glob="**/*",
+ suffixes=list(LANG_PARSER_EXTENSIONS),
+ parser=ExtendedLanguageParser(),
+ )
+ try:
+ documents = loader.load()
+ except Exception as e:
+ logger.warning("LanguageParser failed on %s: %s", self.source_dir, e)
+ return []
+ return self.lang_splitter.split_documents(documents)
+
+ def load_and_chunk_files(self) -> list[Document]:
+ documents: list[Document] = []
+ for root, _, files in os.walk(self.source_dir):
+ for file in files:
+ if any(file.endswith(ext) for ext in TEXT_FILE_EXTENSIONS) or file in _BUILD_FILE_NAMES:
+ file_path = os.path.join(root, file)
+ try:
+ with open(file_path, "r") as f:
+ content = f.read()
+ documents.append(Document(page_content=content, metadata={"source": file_path}))
+ except Exception as e:
+ logger.warning("Error reading %s: %s", file_path, e)
+ continue
+ return self.text_splitter.split_documents(documents)
+
+ def load_and_chunk_all(self) -> list[Document]:
+ documents = self.load_and_chunk_code()
+ documents.extend(self.load_and_chunk_files())
+ return documents
+
+
+
+
+class CVECheckerSegmentationConfig(FunctionBaseConfig, name="cve_checker_segmentation"):
+ """
+ Builds a scoped Tantivy lexical code index from extracted RPM source files.
+ Reads source directories populated by source_acquisition, indexes them,
+ and sets info.vdb.code_index_path for downstream checker nodes.
+ """
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts.",
+ )
+ base_code_index_dir: str = Field(
+ default=".cache/am_cache/code_index",
+ description="Base directory for Tantivy code index storage.",
+ )
+ include_extensions: list[str] = Field(
+ default=[
+ ".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js",
+ ".ts", ".spec", ".patch", ".conf", ".cfg", ".sh", ".m4",
+ ".ac", ".am", ".in", ".txt", ".md", ".rst",
+ ],
+ description="File extensions to include when building the code index.",
+ )
+
+
+@register_function(config_type=CVECheckerSegmentationConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def cve_checker_segmentation(config: CVECheckerSegmentationConfig, builder: Builder):
+ from exploit_iq_commons.data_models.info import AgentMorpheusInfo
+ from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
+ from vuln_analysis.utils.full_text_search import FullTextSearch
+
+ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ if not message.info.checker_context or not message.info.checker_context.source_key:
+ logger.info("checker_segmentation: no checker_context.source_keys, skipping indexing")
+ return message
+
+ source_key = message.info.checker_context.source_key
+ if not source_key:
+ logger.info("checker_segmentation: no source_key, skipping indexing")
+ return message
+
+ index_path = FullTextSearch.get_index_directory(config.base_code_index_dir, source_key)
+
+ if index_path.exists():
+ logger.info("checker_segmentation: cache hit on code index: %s", index_path)
+ else:
+ start = time.time()
+ fts = FullTextSearch(cache_path=str(index_path))
+
+ source_dir = Path(config.base_checker_dir) / source_key / "source"
+ if not source_dir.is_dir():
+ logger.warning("checker_segmentation: source dir missing: %s", source_dir)
+ return message
+
+ logger.info("checker_segmentation: indexing source dir %s", source_dir)
+ document_embedding = RpmDocumentEmbedding(source_dir=source_dir)
+ documents = document_embedding.load_and_chunk_all()
+
+ #fts.add_documents_from_code_path(
+ # str(source_dir),
+ # config.include_extensions,
+ # use_langparser=False,
+ # splitter=True,
+ # no_extension=_BUILD_FILE_NAMES,
+ #)
+ fts.add_documents_from_langchain_chunks(documents)
+
+ elapsed = time.time() - start
+ logger.info("checker_segmentation: indexing completed in %.2fs at %s", elapsed, index_path)
+
+ message.info.vdb = AgentMorpheusInfo.VdbPaths(code_index_path=str(index_path))
+ return message
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ description="Build scoped Tantivy code index from extracted checker sources",
+ )
+
+
+def _index_build_files(fts, source_dir: Path) -> None:
+ """Walk source_dir for extensionless build files and add them to the index."""
+ docs: list[tuple[str, str]] = []
+ for root, _, files in os.walk(source_dir):
+ for fname in files:
+ if fname in _BUILD_FILE_NAMES:
+ fpath = os.path.join(root, fname)
+ try:
+ with open(fpath, "r", encoding="utf-8", errors="replace") as f:
+ docs.append((fpath, f.read()))
+ except Exception as exc:
+ logger.warning("checker_segmentation: error reading %s: %s", fpath, exc)
+ if docs:
+ fts.add_documents(docs)
+ logger.info("checker_segmentation: indexed %d build files from %s", len(docs), source_dir)
diff --git a/src/vuln_analysis/functions/cve_http_output.py b/src/vuln_analysis/functions/cve_http_output.py
index b6b475645..a328faafa 100644
--- a/src/vuln_analysis/functions/cve_http_output.py
+++ b/src/vuln_analysis/functions/cve_http_output.py
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
+from dataclasses import dataclass
from http import HTTPStatus
from datetime import datetime
from aiq.builder.builder import Builder
@@ -22,13 +23,21 @@
from pydantic import Field
from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id
+from exploit_iq_commons.data_models.checker_status import (
+ CHECKER_FAILURE_ERROR_TYPES,
+ PACKAGE_CHECKER_STATUS_DESCRIPTIONS,
+)
+from exploit_iq_commons.data_models.common import PipelineMode, TypedBaseModel
+from exploit_iq_commons.data_models.input import SourceDocumentsInfo
from vuln_analysis.data_models.job import Job, LocalDateTime
-from exploit_iq_commons.data_models.common import TypedBaseModel
import typing
-from typing import Any
+from typing import Any, TYPE_CHECKING
import os
import re
+if TYPE_CHECKING:
+ from vuln_analysis.data_models.output import AgentMorpheusOutput, FailureReport
+
logger = LoggingFactory.get_agent_logger(__name__)
@@ -91,43 +100,113 @@ class CVEHttpOutputConfig(FunctionBaseConfig, name="cve_http_output"):
mlops_config: MLOpsConfig = Field(..., description="MLOps configuration")
+@dataclass
+class OutputPayload:
+ """Encapsulates the HTTP output payload details."""
+ json: str
+ url: str
+ skip_mlops: bool
+
+
+def _build_output_payload(
+ message: "AgentMorpheusOutput",
+ config: CVEHttpOutputConfig,
+ default_json: str,
+) -> OutputPayload:
+ """
+ Determine the payload to send - either the full output or a failure report.
+
+ Returns an OutputPayload with the appropriate JSON, URL, and skip_mlops flag.
+ """
+ from vuln_analysis.data_models.output import FailureReport
+
+ default_url = config.url + config.endpoint
+ failure_url = config.url + config.failure_endpoint
+
+ if message.input.code_index_success is False:
+ repo_url = message.input.image.source_info[0].git_repo if message.input.image.source_info else "unknown"
+ report = FailureReport(
+ scan_id=message.input.scan.id,
+ error_type="processing-error",
+ error_message=f"Failed to clone repository {repo_url}--{message.input.failure_reason}",
+ )
+ logger.info(f"Code index failed for scan {message.input.scan.id}, sending failure report to {failure_url}")
+ return OutputPayload(json=report.model_dump_json(by_alias=True), url=failure_url, skip_mlops=True)
+
+ checker_ctx = message.info.checker_context
+ if checker_ctx and checker_ctx.status in CHECKER_FAILURE_ERROR_TYPES:
+ error_type = CHECKER_FAILURE_ERROR_TYPES[checker_ctx.status]
+ error_msg = PACKAGE_CHECKER_STATUS_DESCRIPTIONS.get(
+ checker_ctx.status,
+ f"Checker failed with status {checker_ctx.status}"
+ )
+ cve_id = message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "unknown"
+ pkg_name = message.input.image.target_package.name if message.input.image.target_package else "unknown"
+ report = FailureReport(
+ scan_id=message.input.scan.id,
+ error_type=error_type,
+ error_message=f"{error_msg} (CVE: {cve_id}, package: {pkg_name})",
+ )
+ logger.info(
+ f"Checker early exit for scan {message.input.scan.id} with status {checker_ctx.status}, "
+ f"sending failure report to {failure_url}"
+ )
+ return OutputPayload(json=report.model_dump_json(by_alias=True), url=failure_url, skip_mlops=True)
+
+ return OutputPayload(json=default_json, url=default_url, skip_mlops=False)
+
+
@register_function(config_type=CVEHttpOutputConfig)
async def output_to_http(config: CVEHttpOutputConfig, builder: Builder): # pylint: disable=unused-argument
- from vuln_analysis.data_models.output import AgentMorpheusOutput, FailureReport
+ from vuln_analysis.data_models.output import AgentMorpheusOutput
from vuln_analysis.utils import http_utils
async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput:
trace_id.set(message.input.scan.id)
+ if message.input.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER:
+ checker_ctx = message.info.checker_context
+ artifacts = checker_ctx.artifacts if checker_ctx else None
+ source_url = artifacts.source_url if artifacts else None
+ target = message.input.image.target_package
+ if source_url and target:
+ nvr = f"{target.name}-{target.version}-{target.release}"
+ message.input.image.source_info = [
+ SourceDocumentsInfo(type="code", git_repo=source_url, ref=nvr)
+ ]
+
model_json = message.model_dump_json(by_alias=True)
- url = config.url + config.endpoint
+
+ # Save JSON for debugging - compare with local markdown reports
+ from pathlib import Path
+ debug_output_dir = Path(".cache/am_cache/checker_json_output")
+ debug_output_dir.mkdir(parents=True, exist_ok=True)
+ vuln_id = message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "unknown"
+ json_file = debug_output_dir / f"{message.input.scan.id}_{vuln_id}.json"
+ json_file.write_text(model_json)
+ logger.info(f"Saved JSON output to {json_file}")
+
headers = {'Content-type': 'application/json', 'traceId': trace_id.get()}
auth_header = get_auth_header(config)
if auth_header is not None:
headers['Authorization'] = auth_header
- verify = True
- if config.verify_path:
- verify = config.verify_path
+ verify = config.verify_path if config.verify_path else True
+
+ payload = _build_output_payload(message, config, model_json)
try:
- skipped_mlops = False
- if message.input.code_index_success is False:
- repo_url = message.input.image.source_info[0].git_repo if message.input.image.source_info else "unknown"
- failure_report = FailureReport(
- scan_id=message.input.scan.id,
- error_type="processing-error",
- error_message=f"Failed to clone repository {repo_url}--{message.input.failure_reason}",
- )
- failure_url = config.url + config.failure_endpoint
- logger.info(f"Code index failed for scan {message.input.scan.id}, sending failure report to {failure_url}")
- model_json = failure_report.model_dump_json(by_alias=True)
- url = failure_url
- skipped_mlops = True
- logger.info(f"Sending output to {url}")
- http_utils.request_with_retry(request_kwargs={
- "url": url, "method": "POST", "data": model_json.encode('utf-8'), "headers": headers, "verify": verify
- }, accept_status_codes=(HTTPStatus.OK, HTTPStatus.CREATED, HTTPStatus.ACCEPTED))
- if config.enable_mlops and not skipped_mlops:
+ logger.info(f"Sending output to {payload.url}")
+ http_utils.request_with_retry(
+ request_kwargs={
+ "url": payload.url,
+ "method": "POST",
+ "data": payload.json.encode('utf-8'),
+ "headers": headers,
+ "verify": verify,
+ },
+ accept_status_codes=(HTTPStatus.OK, HTTPStatus.CREATED, HTTPStatus.ACCEPTED),
+ )
+ if config.enable_mlops and not payload.skip_mlops:
mlops_url = None
try:
job = _extract_job_data(message)
@@ -143,9 +222,9 @@ async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput:
except Exception as mlops_e:
logger.error('Unable to send job to MLOps API at %s. Error: %s', mlops_url, mlops_e)
except Exception as e:
- logger.error('Unable to send output response to %s. Error: %s', url, e)
+ logger.error('Unable to send output response to %s. Error: %s', payload.url, e)
else:
- logger.info('Successfully sent output to %s', url)
+ logger.info('Successfully sent output to %s', payload.url)
return message
diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py
new file mode 100644
index 000000000..d06e7e1c9
--- /dev/null
+++ b/src/vuln_analysis/functions/cve_package_code_agent.py
@@ -0,0 +1,773 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from pathlib import Path
+from typing import Literal
+
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id
+from exploit_iq_commons.data_models.checker_status import (
+ L1InvestigationResult,
+ format_vulnerability_intel_for_prompt,
+)
+
+from langgraph.graph import StateGraph, START, END
+from langgraph.prebuilt import ToolNode
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage
+
+from nat.builder.context import Context
+from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
+from vuln_analysis.functions.code_agent_graph_defs import (
+ CodeAgentState,
+ DownstreamSearchReport,
+ UpstreamSearchReport,
+ downstream_search_preprocss,
+ upstream_search_preprocess,
+ extract_l1_verdict,
+ L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE,
+ L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH,
+ L1_AGENT_SYS_PROMPT_REBASE_FIX,
+ L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH,
+ L1_AGENT_PROMPT_TEMPLATE,
+ L1_AGENT_PROMPT_TEMPLATE_NO_PATCH,
+ L1_AGENT_THOUGHT_INSTRUCTIONS,
+ L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS,
+ L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS,
+ L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS,
+ L1_COMPREHENSION_PROMPT,
+ L1_MEMORY_UPDATE_PROMPT,
+ VulnerabilityIntel,
+ VULNERABILITY_INTEL_EXTRACTION_PROMPT,
+ format_patch_data_for_intel,
+)
+from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError
+
+from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation, FORCED_FINISH_PROMPT
+from vuln_analysis.runtime_context import ctx_state
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+import uuid
+import tiktoken
+
+_RPM_NEVRA_RE = re.compile(r"^(.+?)-(?:(\d+):)?(\d\S*?)-(\S+)$")
+
+
+def _parse_fix_info_from_context(ctx, target_name: str) -> dict:
+ """Extract {name, version, release} from checker_context.identify_result.fixed_rpm_list.
+
+ Handles both epoch and non-epoch NEVRAs:
+ - With epoch: libpq-0:13.20-1.el8_6.x86_64
+ - Without epoch: libpq-13.20-1.el8_6.x86_64
+
+ Finds the first NEVRA in the list that matches the target package name.
+ Returns an empty dict if no match is found.
+ """
+ if not ctx or not ctx.identify_result or not ctx.identify_result.fixed_rpm_list:
+ return {}
+ for nevra in ctx.identify_result.fixed_rpm_list:
+ m = _RPM_NEVRA_RE.match(nevra)
+ if not m:
+ continue
+ name = m.group(1)
+ if name.lower() != target_name.lower():
+ continue
+ version = m.group(3)
+ release_arch = m.group(4)
+ release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch
+ clean_nevra = f"{name}-{version}-{release_arch}"
+ return {"nevra": clean_nevra, "name": name, "version": version, "release": release}
+ return {}
+
+
+def _build_tool_strategy(tool_names: list[str]) -> str:
+ """Generate tool usage guidance based on available tools."""
+ strategies = []
+ tool_names_lower = [t.lower().replace("_", " ") for t in tool_names]
+
+ if any("grep" in t for t in tool_names_lower):
+ strategies.append("- Use Source Grep for exact code patterns from patch (function names, variable names, specific code)")
+ if any("keyword" in t or "search" in t for t in tool_names_lower):
+ strategies.append("- Use Code Keyword Search for broader concept searches when grep fails")
+ if any("read" in t for t in tool_names_lower):
+ strategies.append("- Use Read File to examine full context around matches")
+
+ return "\n".join(strategies) if strategies else "Use available tools to search for vulnerable and fixed code patterns."
+
+
+# ---------------------------------------------------------------------------
+# Policy context formatting for L1 reports (Feedback-2 gap coverage)
+# ---------------------------------------------------------------------------
+
+_POLICY_MAX_RPM_LIST_ITEMS = 5
+_POLICY_RHSA_STATEMENT_CAP = 400
+_POLICY_MAX_PACKAGE_STATE_ITEMS = 8
+
+
+def _format_policy_context_for_l1_report(
+ *,
+ target_nvr: str,
+ identify_result,
+ intel,
+) -> str:
+ """Build a context block for the LLM prompt covering NVR posture and RHSA excerpts.
+
+ Returns an empty string if no meaningful context is available.
+ """
+ lines: list[str] = []
+
+ # 1. Scanned target NVR
+ if target_nvr:
+ lines.append(f"**Scanned target NVR:** `{target_nvr}`")
+
+ # 2. PackageIdentifyResult: affected/fixed lists
+ if identify_result:
+ affected = identify_result.affected_rpm_list or []
+ fixed = identify_result.fixed_rpm_list or []
+
+ if affected:
+ shown = affected[:_POLICY_MAX_RPM_LIST_ITEMS]
+ suffix = f" (+ {len(affected) - len(shown)} more)" if len(affected) > len(shown) else ""
+ lines.append(f"**Affected NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}")
+ lines.append(f" - is_target_package_affected: `{identify_result.is_target_package_affected.value}`")
+
+ if fixed:
+ shown = fixed[:_POLICY_MAX_RPM_LIST_ITEMS]
+ suffix = f" (+ {len(fixed) - len(shown)} more)" if len(fixed) > len(shown) else ""
+ lines.append(f"**Fixed NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}")
+ lines.append(f" - is_target_package_fixed: `{identify_result.is_target_package_fixed.value}`")
+
+ # 3. RHSA excerpts (if present)
+ rhsa = None
+ if intel and len(intel) > 0:
+ rhsa = intel[0].rhsa
+
+ if rhsa:
+ # Statement excerpt
+ if rhsa.statement:
+ stmt = rhsa.statement
+ if len(stmt) > _POLICY_RHSA_STATEMENT_CAP:
+ stmt = stmt[:_POLICY_RHSA_STATEMENT_CAP] + " …"
+ lines.append(f"**RHSA statement excerpt:** {stmt}")
+
+ # Upstream fix
+ if rhsa.upstream_fix:
+ lines.append(f"**RHSA upstream_fix:** `{rhsa.upstream_fix}`")
+
+ # Package state (compact table-like bullets)
+ pkg_states = rhsa.package_state or []
+ if pkg_states:
+ lines.append("**RHSA package_state:**")
+ for ps in pkg_states[:_POLICY_MAX_PACKAGE_STATE_ITEMS]:
+ parts = []
+ if ps.product_name:
+ parts.append(ps.product_name)
+ if ps.package_name:
+ parts.append(f"pkg={ps.package_name}")
+ if ps.fix_state:
+ parts.append(f"fix_state={ps.fix_state}")
+ if parts:
+ lines.append(f" - {' | '.join(parts)}")
+ if len(pkg_states) > _POLICY_MAX_PACKAGE_STATE_ITEMS:
+ lines.append(f" - (+ {len(pkg_states) - _POLICY_MAX_PACKAGE_STATE_ITEMS} more)")
+
+ return "\n".join(lines)
+
+
+class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent"):
+ """
+ Level 1 Package Code Agent. Investigates each CVE using extracted source
+ code and the scoped Tantivy code index built by checker_segmentation.
+
+ Phases: Identify -> Locate -> Verify (see HLD-standalone-checker.md §5).
+ """
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts.",
+ )
+ base_code_index_dir: str = Field(
+ default=".cache/am_cache/code_index",
+ description="Base directory for Tantivy code index storage.",
+ )
+ base_rpm_dir: str = Field(
+ default=".cache/am_cache/rpms",
+ description="Shared RPM cache directory (for BrewDownloader).",
+ )
+ llm_name: str = Field(description="The LLM model to use with the L1 code agent.")
+ tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent")
+ max_iterations: int = Field(default=5, description="The maximum number of iterations for the agent.")
+ context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.")
+
+
+async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer):
+ # Node name constants
+ THOUGHT_NODE = "think_node"
+ TOOL_NODE = "tool"
+ FORCED_FINISH_NODE = "forced_finish"
+ OBSERVATION_NODE = "observation_node"
+ DOWNSTREAM_SEARCH_NODE = "downstream_search"
+ GATHER_MORE_INFO_NODE = "gather_more_info"
+ L1_AGENT_NODE = "L1_agent"
+
+ llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+ tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+
+ thought_llm = llm.with_structured_output(CheckerThought)
+ comprehension_llm = llm.with_structured_output(CodeFindings)
+ observation_llm = llm.with_structured_output(Observation)
+ vulnerability_intel_llm = llm.with_structured_output(VulnerabilityIntel)
+ # Get tool names after filtering for dynamic guidance
+ enabled_tool_names = [tool.name for tool in tools]
+ tool_descriptions_list = [t.name + ": " + t.description for t in tools]
+ tools_node = ToolNode(tools, handle_tool_errors=True)
+ tool_strategy = _build_tool_strategy(enabled_tool_names)
+ tools_str = "\n".join(tool_descriptions_list)
+
+ vuln_id = state.input.scan.vulns[0].vuln_id
+ ctx = state.info.checker_context
+ intel = state.info.intel
+ target_package = state.input.image.target_package
+ source_key = ctx.source_key
+
+ _tiktoken_enc = tiktoken.get_encoding("cl100k_base")
+
+ def _count_tokens(text: str) -> int:
+ """Count tokens using tiktoken cl100k_base encoding (~90-95% accurate for Llama 3.1)."""
+ try:
+ return len(_tiktoken_enc.encode(text))
+ except Exception:
+ return len(text) // 4
+
+ def _estimate_tokens(runtime_prompt: str, messages: list, observation: Observation | None) -> int:
+ """Estimate the token count thought_node will send to the LLM."""
+ parts = [runtime_prompt]
+ for msg in messages:
+ if hasattr(msg, "content") and isinstance(msg.content, str):
+ parts.append(msg.content)
+ if observation is not None:
+ for item in (observation.memory or []):
+ parts.append(item)
+ for item in (observation.results or []):
+ parts.append(item)
+ return _count_tokens("\n".join(parts))
+ # -- Locate setup: fix info + BrewDownloader + paths -----------------------
+ aIntel = intel[0]
+ fix_info = _parse_fix_info_from_context(ctx, target_package.name)
+ checker_dir = Path(config.base_checker_dir) / source_key
+ source_dir = checker_dir / "source"
+ patch_dir = checker_dir / "patch"
+
+ brew_downloader = None
+ if fix_info:
+ try:
+ brew_downloader = BrewDownloader(
+ BrewProfileType.INTERNAL, config.base_rpm_dir, str(checker_dir),
+ )
+ brew_downloader.connect()
+ except BrewDownloaderError as e:
+ logger.warning("locate: BrewDownloader init failed (%s), diff path unavailable", e)
+ brew_downloader = None
+
+ descriptions: list[tuple[str, str]] = []
+ if aIntel.ghsa:
+ cve_text = aIntel.ghsa.description or aIntel.ghsa.summary or ""
+ if cve_text:
+ descriptions.append(("ghsa", cve_text))
+ if aIntel.ubuntu and aIntel.ubuntu.description:
+ descriptions.append(("ubuntu", aIntel.ubuntu.description))
+
+ cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions)
+
+ async def L1_agent(state: CodeAgentState) -> dict:
+ logger.info("L1_agent: starting")
+ downstream_report = state.get("downstream_report")
+ upstream_report = state.get("upstream_report")
+
+ with tracer.push_active_function("Initial_Intelligence_Gathering", input_data={}) as span:
+
+ if downstream_report and downstream_report.is_patch_file_available:
+ parsed_patch = downstream_report.parsed_patch
+ patch_data = format_patch_data_for_intel(parsed_patch)
+ elif upstream_report and upstream_report.is_fixed_srpm_is_needed:
+ parsed_patch = upstream_report.fixed_parsed_patch
+ patch_data = format_patch_data_for_intel(parsed_patch)
+ else:
+ patch_data = ""
+
+ vul_prompt = VULNERABILITY_INTEL_EXTRACTION_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ cve_description=cve_description,
+ patch_data=patch_data,
+ )
+ vulnerability_intel: VulnerabilityIntel = await vulnerability_intel_llm.ainvoke(
+ [SystemMessage(content=vul_prompt)],
+ )
+
+ if downstream_report:
+ vulnerability_intel.is_downstream_patch_available = downstream_report.is_patch_file_available
+ vulnerability_intel.is_patch_applied_in_build = downstream_report.is_patch_applied_in_build
+ vulnerability_intel.patch_file_name = downstream_report.patch_file_name or ""
+
+ span.set_output({
+ "vulnerability_intel": vulnerability_intel.model_dump(),
+ })
+
+ # Use case 1: Downstream patch file is available
+ if downstream_report and downstream_report.is_patch_file_available:
+ runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format(
+ sys_prompt=L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel),
+ tools=tools_str,
+ tool_selection_strategy=tool_strategy,
+ tool_instructions=L1_AGENT_THOUGHT_INSTRUCTIONS,
+ )
+
+ span.set_output({
+ "mode": "patch_available",
+ "patch_filename": downstream_report.patch_file_name,
+ })
+ # Use case 2: code is fixed by rebase
+ elif upstream_report and upstream_report.is_code_fixed_by_rebase == "yes":
+
+ if upstream_report.is_fixed_srpm_is_needed and upstream_report.fixed_parsed_patch:
+ # Has patch context - use patch-based verification
+ runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format(
+ sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_FIX,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel),
+ tools=tools_str,
+ tool_selection_strategy=tool_strategy,
+ tool_instructions=L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS,
+ )
+
+ span.set_output({
+ "mode": "rebase_fix_verification",
+ "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "",
+ })
+ else:
+ # No patch context - use CVE description-based verification
+ runtime_prompt = L1_AGENT_PROMPT_TEMPLATE_NO_PATCH.format(
+ sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel),
+ tools=tools_str,
+ tool_selection_strategy=tool_strategy,
+ tool_instructions=L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS,
+ )
+
+ span.set_output({
+ "mode": "rebase_fix_cve_description",
+ "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "",
+ })
+ # use case 3: in target patch was not found but patch is found in the rpm that was mention in cve that is fixed
+ elif upstream_report and upstream_report.fixed_parsed_patch:
+ runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format(
+ sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel),
+ tools=tools_str,
+ tool_selection_strategy=tool_strategy,
+ tool_instructions=L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS,
+ )
+
+ span.set_output({
+ "mode": "upstream_patch_verification",
+ "patch_filename": upstream_report.fixed_srpm_file_name,
+ })
+ else:
+ # Use case 4: Default prompt - no patch context, use VulnerabilityIntel from CVE description
+ runtime_prompt = L1_AGENT_PROMPT_TEMPLATE_NO_PATCH.format(
+ sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH,
+ vuln_id=vuln_id,
+ target_package=target_package.name,
+ vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel),
+ tools=tools_str,
+ tool_selection_strategy=tool_strategy,
+ tool_instructions=L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS,
+ )
+ span.set_output({
+ "mode": "no_patch",
+ })
+
+ messages = state.get("messages", [])
+ remove_messages = [RemoveMessage(id=msg.id) for msg in messages if msg.id]
+
+ return {
+ "runtime_prompt": runtime_prompt,
+ "vulnerability_intel": vulnerability_intel,
+ "messages": remove_messages,
+ }
+
+ async def should_continue_downstream(state: CodeAgentState) -> str:
+ downstream_report = state.get("downstream_report")
+ if downstream_report and downstream_report.is_patch_file_available:
+ return "L1_agent"
+ else:
+ return "gather_more_info"
+
+ async def downstream_search(state: CodeAgentState) -> dict:
+ logger.info("downstream_search: starting")
+
+
+ build_log = ctx.artifacts.build_log_path if ctx and ctx.artifacts else None
+ with tracer.push_active_function("downstream_search", input_data={}) as span:
+ report: DownstreamSearchReport = await downstream_search_preprocss(
+ llm=llm,
+ vuln_id=vuln_id,
+ descriptions=descriptions,
+ source_path=Path(source_dir),
+ build_log_path=Path(build_log) if build_log else None,
+ tracer=tracer,
+ )
+ span.set_output({
+ "is_patch_file_available": report.is_patch_file_available,
+ "is_patch_in_spec_file": report.is_patch_in_spec_file,
+ "spec_file_log_change": report.spec_file_log_change,
+ "is_patch_applied_in_build": report.is_patch_applied_in_build,
+ "build_log_patch_applied": report.build_log_patch_applied,
+ "parsed_patch": report.parsed_patch.patch_filename if report.parsed_patch else None,
+ })
+
+ return {
+ "downstream_report": report,
+ "messages": [AIMessage(content="Downstream flow preprocess completed")],
+ }
+
+
+ async def gather_more_info(state: CodeAgentState) -> dict:
+ logger.info("gather_more_info: starting")
+ with tracer.push_active_function("gather_more_info", input_data={}) as span:
+ report: UpstreamSearchReport = await upstream_search_preprocess(
+ vuln_id=vuln_id,
+ fix_info=fix_info,
+ brew_downloader=brew_downloader,
+ patch_dir=Path(patch_dir),
+ source_path=Path(source_dir),
+ tracer=tracer,
+ )
+
+
+
+ span.set_output({
+ "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed,
+ "is_rebase_fix": report.is_code_fixed_by_rebase == "yes",
+ })
+ return {
+ "messages": [AIMessage(content="Gathering more information...")],
+ "upstream_report": report,
+ }
+
+ async def thought_node(state: CodeAgentState) -> dict:
+ """Generate next thought/action using the LLM."""
+ step_num = state.get("step", 0)
+ logger.info("thought_node: starting step %d", step_num)
+ runtime_prompt = state.get("runtime_prompt") or "You are a security analyst investigating a CVE."
+ messages = [SystemMessage(content=runtime_prompt)] + state["messages"]
+ with tracer.push_active_function("thought_node", input_data={}) as span:
+ obs = state.get("observation", None)
+ if obs is not None:
+ memory_list = obs.memory if obs.memory else ["No prior knowledge."]
+ recent_findings = obs.results if obs.results else ["No recent findings."]
+ memory_context = "\n".join(f"- {m}" for m in memory_list)
+ findings_context = "\n".join(f"- {f}" for f in recent_findings)
+ context_block = f"KNOWLEDGE:\n{memory_context}\nLATEST FINDINGS:\n{findings_context}"
+ messages.append(SystemMessage(content=context_block))
+ response: CheckerThought = await thought_llm.ainvoke(messages)
+ if response.mode == "finish":
+ ai_message = AIMessage(content=response.final_answer)
+ else:
+ tool_name = response.actions.tool
+ arguments = response.actions.query
+ tool_call_id = str(uuid.uuid4())
+ ai_message = AIMessage(
+ content=response.thought,
+ tool_calls=[{"name": tool_name, "args": {"query": arguments}, "id": tool_call_id}]
+ )
+ span.set_output({
+ "thought": response.thought,
+ "mode": response.mode,
+ "actions": response.actions,
+ "final_answer": response.final_answer,
+ })
+ return {
+ "messages": [ai_message],
+ "thought": response,
+ "step": step_num + 1,
+ "max_steps": config.max_iterations,
+ }
+
+ async def forced_finish_node(state: CodeAgentState) -> dict:
+ """Force finish when max iterations reached.
+
+ Invokes the LLM with FORCED_FINISH_PROMPT to generate a final answer
+ based on evidence gathered so far.
+ """
+ step_num = state.get("step", 0)
+ with tracer.push_active_function("forced_finish_node", input_data=f"step:{step_num}") as span:
+ try:
+ active_prompt = state.get("runtime_prompt")
+ messages = [SystemMessage(content=active_prompt)] + state["messages"]
+ messages.append(HumanMessage(content=FORCED_FINISH_PROMPT))
+
+ obs = state.get("observation")
+ if obs is not None and obs.memory:
+ memory_context = "\n".join(f"- {m}" for m in obs.memory)
+ messages.append(SystemMessage(content=f"KNOWLEDGE:\n{memory_context}"))
+
+ response: CheckerThought = await thought_llm.ainvoke(messages)
+
+ if response.mode == "finish" and response.final_answer:
+ ai_message = AIMessage(content=response.final_answer)
+ final_answer = response.final_answer
+ else:
+ final_answer = "Failed to generate a final answer within the maximum allowed steps."
+ ai_message = AIMessage(content=final_answer)
+ response = CheckerThought(
+ thought=response.thought or "Max steps exceeded",
+ mode="finish",
+ actions=None,
+ final_answer=final_answer,
+ )
+
+ span.set_output({"final_answer_length": len(final_answer), "step": step_num})
+ return {
+ "messages": [ai_message],
+ "thought": response,
+ "step": step_num,
+ "max_steps": state.get("max_steps", config.max_iterations),
+ "observation": state.get("observation"),
+ "output": final_answer,
+ }
+ except Exception as e:
+ logger.exception("forced_finish_node failed at step %d", step_num)
+ span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num})
+ raise
+
+ async def observation_node(state: CodeAgentState) -> dict:
+ """Process tool output: comprehension -> memory update with VulnerabilityIntel context."""
+ logger.info("observation_node: starting")
+ tool_message = state["messages"][-1]
+ last_thought = state.get("thought")
+ if not last_thought:
+ return {
+ "messages": [AIMessage(content="No thought found")],
+ }
+ last_thought_text = last_thought.thought
+ tool_used = last_thought.actions.tool
+ tool_input_detail = last_thought.actions.query
+ previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."]
+
+ vulnerability_intel = state.get("vulnerability_intel")
+ intel_formatted = format_vulnerability_intel_for_prompt(vulnerability_intel) if vulnerability_intel else "No intel available"
+ target_package_name = target_package.name if target_package else "unknown"
+
+ with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span:
+ tool_output_for_llm = tool_message.content
+
+ # Step 1: Comprehension - extract key findings from raw tool output
+ comp_prompt = L1_COMPREHENSION_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ vulnerability_intel=intel_formatted,
+ tool_used=tool_used,
+ tool_input=tool_input_detail,
+ last_thought=last_thought_text,
+ tool_output=tool_output_for_llm[:8000],
+ )
+ code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)])
+ findings_text = "\n".join(f"- {f}" for f in code_findings.findings)
+
+ # Step 2: Memory update - merge findings into cumulative memory
+ mem_prompt = L1_MEMORY_UPDATE_PROMPT.format(
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory,
+ findings=findings_text,
+ tool_outcome=code_findings.tool_outcome,
+ )
+ new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)])
+
+ messages = state["messages"]
+ active_prompt = state.get("runtime_prompt")
+ estimated = _estimate_tokens(active_prompt, messages, new_observation)
+ prune_messages = []
+ orig_estimated = estimated
+
+ if estimated > config.context_window_token_limit:
+ l_tool_count = _count_tokens(tool_output_for_llm)
+ for msg in messages:
+ prune_messages.append(RemoveMessage(id=msg.id))
+ estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0
+ if estimated <= config.context_window_token_limit:
+ break
+ logger.info(
+ "Context pruning: removed %d messages, estimated tokens now ~%d (limit %d)",
+ len(prune_messages), estimated, config.context_window_token_limit,
+ )
+
+ span.set_output({
+ "last_thought_text": last_thought_text,
+ "tool_output_for_llm": tool_output_for_llm[:500],
+ "findings": code_findings.findings,
+ "tool_outcome": code_findings.tool_outcome,
+ "new_memory": new_observation.memory,
+ "amount_of_orig_tokens": orig_estimated,
+ "amount_of_estimated_tokens": estimated,
+ })
+ return {
+ "messages": prune_messages,
+ "observation": new_observation,
+ }
+
+ async def should_continue(state: CodeAgentState) -> str:
+ thought = state.get("thought", None)
+ if thought is not None and thought.mode == "finish":
+ return END
+ if state.get("step", 0) >= state.get("max_steps", config.max_iterations):
+ return FORCED_FINISH_NODE
+ return TOOL_NODE
+
+ flow = StateGraph(CodeAgentState)
+
+ flow.add_node(DOWNSTREAM_SEARCH_NODE, downstream_search)
+ flow.add_node(GATHER_MORE_INFO_NODE, gather_more_info)
+ flow.add_node(L1_AGENT_NODE, L1_agent)
+ flow.add_node(THOUGHT_NODE, thought_node)
+ flow.add_node(TOOL_NODE, tools_node)
+ flow.add_node(FORCED_FINISH_NODE, forced_finish_node)
+ flow.add_node(OBSERVATION_NODE, observation_node)
+
+ flow.add_edge(START, DOWNSTREAM_SEARCH_NODE)
+ flow.add_conditional_edges(DOWNSTREAM_SEARCH_NODE, should_continue_downstream, {
+ L1_AGENT_NODE: L1_AGENT_NODE,
+ GATHER_MORE_INFO_NODE: GATHER_MORE_INFO_NODE,
+ })
+ flow.add_edge(GATHER_MORE_INFO_NODE, L1_AGENT_NODE)
+ flow.add_edge(L1_AGENT_NODE, THOUGHT_NODE)
+ flow.add_conditional_edges(
+ THOUGHT_NODE,
+ should_continue,
+ {END: END, TOOL_NODE: TOOL_NODE, FORCED_FINISH_NODE: FORCED_FINISH_NODE}
+ )
+ flow.add_edge(TOOL_NODE, OBSERVATION_NODE)
+ flow.add_edge(OBSERVATION_NODE, THOUGHT_NODE)
+ flow.add_edge(FORCED_FINISH_NODE, END)
+
+
+ app = flow.compile()
+ return app
+
+
+@register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder):
+
+ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Run L1 investigation and return intermediate result for routing to L2 or report generation."""
+ trace_id.set(message.input.scan.id)
+ tracer = Context.get()
+
+ # Set ctx_state so tools (e.g., Source Grep, Lexical Search) can access checker_context
+ from types import SimpleNamespace
+ workflow_state = SimpleNamespace(original_input=message, info=message.info)
+ ctx_state.set(workflow_state)
+
+ logger.info("package_code_agent: starting L1 investigation")
+
+ l1_agent_graph = await create_graph_code_agent(config, builder, message, tracer)
+ initial_state: CodeAgentState = {
+ "messages": [HumanMessage(content="Begin L1 CVE investigation")],
+ "step": 0,
+ "max_steps": config.max_iterations,
+ }
+
+ with tracer.push_active_function("l1_agent_graph", input_data=initial_state["messages"][0].content):
+ result = await l1_agent_graph.ainvoke(
+ initial_state,
+ config={"recursion_limit": config.max_iterations * 4},
+ )
+
+ logger.info("package_code_agent: L1 investigation finished")
+
+ final_answer = None
+ thought = result.get("thought")
+ if thought and thought.mode == "finish":
+ final_answer = thought.final_answer
+
+ vuln_id = message.input.scan.vulns[0].vuln_id
+ target_package = message.input.image.target_package
+ target_package_name = target_package.name if target_package else "unknown"
+
+ llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+ verdict_extraction = await extract_l1_verdict(
+ llm=llm,
+ vuln_id=vuln_id,
+ target_package=target_package_name,
+ final_answer=final_answer or "No final answer produced.",
+ tracer=tracer,
+ )
+ preliminary_verdict = verdict_extraction.preliminary_verdict
+ confidence = verdict_extraction.confidence
+
+ downstream_report: DownstreamSearchReport | None = result.get("downstream_report")
+ upstream_report: UpstreamSearchReport | None = result.get("upstream_report")
+ vulnerability_intel: VulnerabilityIntel | None = result.get("vulnerability_intel")
+
+ l1_result = L1InvestigationResult(
+ downstream_report=downstream_report.model_dump() if downstream_report else None,
+ upstream_report=upstream_report.model_dump() if upstream_report else None,
+ l1_agent_answer=final_answer,
+ vulnerability_intel=vulnerability_intel,
+ preliminary_verdict=preliminary_verdict,
+ confidence=confidence,
+ )
+
+ with tracer.push_active_function(
+ "l1_agent_finish",
+ input_data={"preliminary_verdict": preliminary_verdict},
+ ) as span:
+ span.set_output({
+ "l1_agent_answer": final_answer[:500] if final_answer else None,
+ "vulnerability_intel": vulnerability_intel,
+ "confidence": l1_result.confidence,
+ })
+
+ if message.info.checker_context is not None:
+ message.info.checker_context.l1_result = l1_result
+ else:
+ logger.warning("package_code_agent: checker_context is None, cannot store l1_result")
+ logger.info(
+ "package_code_agent: L1 result - verdict=%s, confidence=%.2f",
+ preliminary_verdict,
+ l1_result.confidence,
+ )
+ return message
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ description="Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index",
+ )
diff --git a/src/vuln_analysis/functions/cve_source_acquisition.py b/src/vuln_analysis/functions/cve_source_acquisition.py
new file mode 100644
index 000000000..6a2b2dc65
--- /dev/null
+++ b/src/vuln_analysis/functions/cve_source_acquisition.py
@@ -0,0 +1,164 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hashlib
+import json
+from datetime import datetime, timezone
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+import shutil
+from pathlib import Path
+from pathlib import PurePath
+
+from exploit_iq_commons.data_models.checker_status import PackageCheckerContext, PackageCheckerStatus, PackageIdentifyResult
+from exploit_iq_commons.data_models.checker_status import AcquiredArtifacts
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+
+from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager, SourceRPMDownloader
+from vuln_analysis.utils.package_identifier import PackageIdentifier
+from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType , BrewDownloaderError
+from vuln_analysis.functions.cve_calculate_intel_score import CVECalculateIntelScoreConfig
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+
+class CVESourceAcquisitionConfig(FunctionBaseConfig, name="cve_source_acquisition"):
+ """
+ Downloads source containers, extracts layers, and locates package sources
+ by purl and ecosystem. Populates the pipeline state with source paths for
+ downstream checker segmentation and investigation nodes.
+ """
+ base_git_dir: str = Field(
+ default=".cache/am_cache/git",
+ description="The directory for storing pulled git repositories used for code analysis.",
+ )
+ base_pickle_dir: str = Field(
+ default=".cache/am_cache/pickle",
+ description="The directory used for storing pickled document cache files.",
+ )
+ base_rpm_dir: str = Field(
+ default=".cache/am_cache/rpms",
+ description="The directory used for storing rpm files.",
+ )
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts (extracted sources, diffs, results).",
+ )
+
+
+@register_function(config_type=CVESourceAcquisitionConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def cve_source_acquisition(config: CVESourceAcquisitionConfig, builder: Builder):
+ from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
+
+ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ logger.info("source_acquisition: starting source code acquisition")
+
+ rpm_manager = RPMDependencyManager.get_instance()
+ rpm_manager.set_rpm_cache_dir(config.base_rpm_dir)
+ message.info.checker_context = PackageCheckerContext()
+ intel_list = message.info.intel or []
+ vulns = message.input.scan.vulns
+
+ intel_by_vuln = {i.vuln_id: i for i in intel_list}
+ target_package = message.input.image.target_package
+
+ identifier = PackageIdentifier(
+ target_package=target_package,
+ )
+
+ status = PackageCheckerStatus.OK
+ per_vuln_results: dict[str, PackageIdentifyResult] = {}
+
+ intel_score_config = builder.get_function_config("cve_calculate_intel_score")
+ assert isinstance(intel_score_config, CVECalculateIntelScoreConfig)
+
+ for vuln_info in vulns:
+ intel = intel_by_vuln.get(vuln_info.vuln_id)
+
+ if intel_score_config.generate_intel_score and intel:
+ score = intel.get_intel_score()
+ if score < intel_score_config.intel_low_score and not intel_score_config.insist_analysis:
+ logger.info("Intel score %d below threshold %d for %s - skipping",
+ score, intel_score_config.intel_low_score, vuln_info.vuln_id)
+ status = PackageCheckerStatus.PKG_INTEL_LOW_SCORE
+ break
+
+ status, result = identifier.identify(intel)
+ message.info.checker_context.identify_result = result
+ break
+
+
+ message.info.checker_context.status = status
+ if status != PackageCheckerStatus.OK:
+ return message
+
+ # create identifier key
+ str_identifier_key = f"{target_package.name}-{target_package.version}-{target_package.release}"
+ identifier_key = hashlib.sha256(str_identifier_key.encode()).hexdigest()[:16]
+ message.info.checker_context.source_key = identifier_key
+
+ target_dir = Path(config.base_checker_dir) / identifier_key
+
+ if target_dir.exists() and any(target_dir.iterdir()):
+ logger.info("Source cache hit for %s: %s", identifier_key, target_dir)
+ artifacts = AcquiredArtifacts()
+ artifacts.srpm_path = target_dir / "source"
+ artifacts.build_log_path = target_dir / "logs"
+ artifacts.binary_rpm_path = target_dir / "binaries"
+ metadata_file = target_dir / "metadata.json"
+ if metadata_file.exists():
+ try:
+ metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
+ artifacts.source_url = metadata.get("source_url")
+ except (json.JSONDecodeError, OSError) as e:
+ logger.warning("Failed to read metadata.json: %s", e)
+ message.info.checker_context.artifacts = artifacts
+ return message
+
+ target_dir.mkdir(parents=True, exist_ok=True)
+ try:
+ brew_downloader = BrewDownloader(BrewProfileType.INTERNAL, config.base_rpm_dir, str(target_dir))
+ brew_downloader.connect()
+ artifacts = brew_downloader.download_target_artifacts(target_package.name, target_package.version, target_package.release,target_package.arch)
+ message.info.checker_context.artifacts = artifacts
+
+ nvr = f"{target_package.name}-{target_package.version}-{target_package.release}"
+ metadata = {
+ "source_url": artifacts.source_url,
+ "nvr": nvr,
+ "downloaded_at": datetime.now(timezone.utc).isoformat(),
+ }
+ metadata_file = target_dir / "metadata.json"
+ metadata_file.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
+ logger.info("Wrote metadata to %s", metadata_file)
+ except BrewDownloaderError as e:
+ logger.error("Failed to download patched SRPM: %s", e)
+ message.info.checker_context.status = PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM
+ return message
+
+
+
+ return message
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ input_schema=AgentMorpheusEngineInput,
+ description="Downloads source containers and locates package sources by purl and ecosystem.",
+ )
diff --git a/src/vuln_analysis/functions/react_internals.py b/src/vuln_analysis/functions/react_internals.py
index 7ec52bb27..834fffd86 100644
--- a/src/vuln_analysis/functions/react_internals.py
+++ b/src/vuln_analysis/functions/react_internals.py
@@ -42,6 +42,34 @@ class Thought(BaseModel):
max_length=3000,
)
+
+class CheckerToolCall(BaseModel):
+ """Tool call for RPM checker flow - simpler schema with just query."""
+ tool: str = Field(description="Exact tool name from AVAILABLE_TOOLS")
+ query: str = Field(description="Search pattern for Source Grep or Code Keyword Search")
+ reason: str = Field(description="Briefly explain why this tool helps the investigation")
+
+
+class CheckerThought(BaseModel):
+ """Thought model for RPM checker flow with simplified tool call schema."""
+ thought: str = Field(
+ description="Brief reasoning about next step (max 3-4 sentences)",
+ max_length=3000,
+ )
+ mode: Literal["act", "finish"] = Field(
+ description="'act' to call tools, 'finish' to return final answer"
+ )
+ actions: CheckerToolCall | None = Field(
+ default=None,
+ description="When mode is 'act', the tool to execute"
+ )
+ final_answer: str | None = Field(
+ default=None,
+ description="When mode is 'finish', concise answer with evidence",
+ max_length=3000,
+ )
+
+
class CodeFindings(BaseModel):
"""Compressed code comprehension output from raw tool results."""
findings: list[str] = Field(
@@ -70,6 +98,26 @@ class Classification(BaseModel):
)
+class L1VerdictExtraction(BaseModel):
+ """Lightweight structured output for extracting verdict from L1 final answer."""
+ preliminary_verdict: Literal["vulnerable", "protected", "not_present", "uncertain"] = Field(
+ description=(
+ "Classify the L1 agent's conclusion: "
+ "'protected' if fix/patch applied or code mitigated, "
+ "'not_present' if vulnerable code not found in this version, "
+ "'vulnerable' if vulnerable code confirmed present, "
+ "'uncertain' if evidence is insufficient or conflicting"
+ )
+ )
+ confidence: float = Field(
+ ge=0.0, le=1.0,
+ description="Confidence in the verdict based on evidence strength in the answer"
+ )
+ reasoning: str = Field(
+ description="Brief explanation of why this verdict was chosen"
+ )
+
+
class PackageSelection(BaseModel):
"""Structured output for selecting the most relevant package from multiple candidates."""
selected_package: str = Field(
diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py
index dc847a87f..f7cfb2534 100644
--- a/src/vuln_analysis/register.py
+++ b/src/vuln_analysis/register.py
@@ -23,9 +23,12 @@
from aiq.data_models.function import FunctionBaseConfig
from pydantic import Field
+from exploit_iq_commons.data_models.common import PipelineMode
+from exploit_iq_commons.data_models.checker_status import PackageCheckerStatus, PACKAGE_CHECKER_STATUS_DESCRIPTIONS
from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput
from exploit_iq_commons.data_models.input import AgentMorpheusInput
-from vuln_analysis.data_models.output import AgentMorpheusOutput
+from exploit_iq_commons.data_models.info import AgentMorpheusInfo
+from vuln_analysis.data_models.output import AgentMorpheusEngineOutput, AgentMorpheusOutput, JustificationOutput, OutputPayload
from vuln_analysis.data_models.state import AgentMorpheusEngineState
# pylint: disable=unused-import
from vuln_analysis.functions import cve_agent
@@ -36,8 +39,13 @@
from vuln_analysis.functions import cve_generate_vdbs
from vuln_analysis.functions import cve_http_output
from vuln_analysis.functions import cve_justify
+from vuln_analysis.functions import cve_package_code_agent
+from vuln_analysis.functions import cve_checker_segmentation
+from vuln_analysis.functions import cve_source_acquisition
from vuln_analysis.functions import cve_process_sbom
from vuln_analysis.functions import cve_summarize
+from vuln_analysis.functions import cve_checker_report
+from vuln_analysis.functions import cve_build_agent
from vuln_analysis.functions import cve_generate_cvss
from vuln_analysis.functions import cve_generate_vex
from vuln_analysis.functions import health_endpoint
@@ -47,6 +55,7 @@
from vuln_analysis.tools import container_image_analysis_data
from vuln_analysis.tools import local_vdb
from vuln_analysis.tools import serp
+from vuln_analysis.tools import source_grep
from vuln_analysis.utils.error_handling_decorator import catch_pipeline_errors_async
# pylint: enable=unused-import
from vuln_analysis.utils.llm_engine_utils import postprocess_engine_output, finalize_preprocess_engine_input
@@ -75,6 +84,26 @@ class CVEAgentWorkflowConfig(FunctionBaseConfig, name="cve_agent"):
description="Function to output workflow results "
"(e.g. cve_file_output, cve_http_output). "
" If None, only prints to console")
+ cve_source_acquisition_name: str | None = Field(
+ default=None,
+ description="Function name for source acquisition (downloads source containers, locates package sources)",
+ )
+ cve_checker_segmentation_name: str | None = Field(
+ default=None,
+ description="Function name for scoped code indexing of extracted checker sources (Tantivy only)",
+ )
+ cve_package_code_agent_name: str | None = Field(
+ default=None,
+ description="Function name for the Level 1 Package Code Agent (source-level CVE investigation)",
+ )
+ cve_checker_report_name: str | None = Field(
+ default=None,
+ description="Function name for the checker report generation (L1/L2 report synthesis)",
+ )
+ cve_build_agent_name: str | None = Field(
+ default=None,
+ description="Function name for the Level 2 Build Agent (build compilation and hardening check)",
+ )
description: str = Field(default="Vulnerability analysis for container security workflow",
description="Workflow function description")
@@ -99,6 +128,26 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder):
cve_generate_vex_fn = builder.get_function(name=config.cve_generate_vex_name)
cve_generate_cvss_fn = builder.get_function(name=config.cve_generate_cvss_name)
cve_output_fn = builder.get_function(name=config.cve_output_config_name) if config.cve_output_config_name else None
+ cve_source_acquisition_fn = (
+ builder.get_function(name=config.cve_source_acquisition_name)
+ if config.cve_source_acquisition_name else None
+ )
+ cve_checker_segmentation_fn = (
+ builder.get_function(name=config.cve_checker_segmentation_name)
+ if config.cve_checker_segmentation_name else None
+ )
+ cve_package_code_agent_fn = (
+ builder.get_function(name=config.cve_package_code_agent_name)
+ if config.cve_package_code_agent_name else None
+ )
+ cve_checker_report_fn = (
+ builder.get_function(name=config.cve_checker_report_name)
+ if config.cve_checker_report_name else None
+ )
+ cve_build_agent_fn = (
+ builder.get_function(name=config.cve_build_agent_name)
+ if config.cve_build_agent_name else None
+ )
# Define langgraph node functions
@catch_pipeline_errors_async
@@ -183,7 +232,24 @@ async def output_results_node(state: AgentMorpheusOutput) -> AgentMorpheusOutput
"""Outputs results using configured output function"""
return await cve_output_fn.ainvoke(state.model_dump()) if cve_output_fn else state
-
+
+ # --- Package checker path nodes ---
+
+ @catch_pipeline_errors_async
+ async def checker_init_state_node(state: AgentMorpheusInput) -> AgentMorpheusEngineInput:
+ """Bridges AgentMorpheusInput -> AgentMorpheusEngineInput with empty info (skips VDB generation)."""
+ return AgentMorpheusEngineInput(input=state, info=AgentMorpheusInfo())
+
+ @catch_pipeline_errors_async
+ async def checker_fetch_intel_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Fetch intel for CVE input (package checker path). Reuses the same fetch_intel function."""
+ return await cve_fetch_intel_fn.ainvoke(state.model_dump())
+
+ @catch_pipeline_errors_async
+ async def checker_calculate_intel_score_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Calculate intel score for CVE input (package checker path)."""
+ return await cve_calculate_intel_score_fn.ainvoke(state.model_dump())
+
async def check_vdbs_success(state: AgentMorpheusInput) -> str:
"""Checks if the VDBs were successfully generated"""
if state.code_index_success:
@@ -196,7 +262,129 @@ async def failure_node(state: AgentMorpheusInput) -> AgentMorpheusOutput:
from exploit_iq_commons.data_models.info import AgentMorpheusInfo
from vuln_analysis.data_models.output import OutputPayload
return AgentMorpheusOutput(input=state, info=AgentMorpheusInfo(), output=OutputPayload(analysis=[], vex=None))
- # define langgraph
+
+
+
+ @catch_pipeline_errors_async
+ async def source_acquisition_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Acquires source code for the target package (source containers, git fallback)."""
+ if cve_source_acquisition_fn:
+ state = await cve_source_acquisition_fn.ainvoke(state.model_dump())
+ else:
+ logger.warning("Source acquisition function not configured, passing state through")
+
+ if state.info.checker_context and state.info.checker_context.status is not None:
+ logger.info(
+ "PackageIdentify aggregate status: %s",
+ state.info.checker_context.status.name,
+ )
+ return state
+
+ @catch_pipeline_errors_async
+ async def checker_segmentation_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Builds scoped Tantivy code index from extracted checker sources."""
+ if cve_checker_segmentation_fn:
+ state = await cve_checker_segmentation_fn.ainvoke(state.model_dump())
+ else:
+ logger.warning("Checker segmentation not configured, skipping indexing")
+ return state
+
+ @catch_pipeline_errors_async
+ async def l1_code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index.
+
+ Returns AgentMorpheusEngineInput with l1_result populated on checker_context.
+ """
+ if cve_package_code_agent_fn:
+ return await cve_package_code_agent_fn.ainvoke(state.model_dump())
+ logger.warning("Package code agent function not configured, passing state through")
+ return state
+
+ def route_after_l1(state: AgentMorpheusEngineInput) -> str:
+ """Route to L2 Build Agent if vulnerable or uncertain, else to report generation."""
+ ctx = state.info.checker_context
+ if ctx and ctx.l1_result:
+ verdict = ctx.l1_result.preliminary_verdict
+ if verdict in ("vulnerable", "uncertain"):
+ return "l2_build_agent"
+ return "generate_report"
+
+ @catch_pipeline_errors_async
+ async def l2_build_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput:
+ """Level 2 Build Agent: BuildCompilationCheck + HardeningCheck.
+
+ Returns AgentMorpheusEngineInput with l2_result populated on checker_context.
+ """
+ if cve_build_agent_fn:
+ return await cve_build_agent_fn.ainvoke(state.model_dump())
+ logger.warning("Build agent function not configured, passing state through")
+ return state
+
+ @catch_pipeline_errors_async
+ async def generate_report_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput:
+ """Generate the final checker report from L1/L2 investigation results."""
+ if cve_checker_report_fn:
+ return await cve_checker_report_fn.ainvoke(state.model_dump())
+ logger.warning("Checker report function not configured, producing empty output")
+ return AgentMorpheusOutput(
+ input=state.input,
+ info=state.info,
+ output=OutputPayload(analysis=[], vex=None),
+ )
+
+ @catch_pipeline_errors_async
+ async def checker_early_exit_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput:
+ """Produces a proper output when source_acquisition exits with a non-OK status."""
+ ctx = state.info.checker_context
+ status = ctx.status if ctx else None
+ reason = (
+ PACKAGE_CHECKER_STATUS_DESCRIPTIONS[status]
+ if status is not None and status in PACKAGE_CHECKER_STATUS_DESCRIPTIONS
+ else f"Checker exited early with status {status}"
+ )
+ logger.info("checker_early_exit: status=%s reason=%s", status, reason)
+ def _get_justification_label(s: PackageCheckerStatus | None) -> str:
+ if s in (PackageCheckerStatus.PKG_IDENT_NOT_VUL, PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH):
+ return "not_vulnerable"
+ if s == PackageCheckerStatus.PKG_INTEL_LOW_SCORE:
+ return "poor_quality_intel"
+ return "error"
+
+ analysis = [
+ AgentMorpheusEngineOutput(
+ vuln_id=v.vuln_id,
+ checklist=[],
+ summary=reason,
+ justification=JustificationOutput(
+ label=_get_justification_label(status),
+ reason=reason,
+ status="FALSE" if status in (
+ PackageCheckerStatus.PKG_IDENT_NOT_VUL,
+ PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH,
+ ) else "UNKNOWN",
+ ),
+ intel_score=0,
+ cvss=None,
+ )
+ for v in state.input.scan.vulns
+ ]
+ return AgentMorpheusOutput(
+ input=state.input, info=state.info,
+ output=OutputPayload(analysis=analysis, vex=None),
+ )
+
+ def route_after_source_acquisition(state: AgentMorpheusEngineInput):
+ """Route to checker_segmentation (happy path) or early exit on non-OK status."""
+ ctx = state.info.checker_context
+ if ctx and ctx.status == PackageCheckerStatus.OK:
+ return "checker_segmentation"
+ return "checker_early_exit"
+
+ def route_after_add_start_time(state: AgentMorpheusInput):
+ """Route to full pipeline or package checker based on pipeline_mode."""
+ if state.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER:
+ return "checker_init_state"
+ return "generate_vdbs"
# build llm engine subgraph
subgraph_builder = StateGraph(AgentMorpheusEngineState)
@@ -240,8 +428,28 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput):
graph_builder.add_node("add_completed_time", add_completed_time_node)
graph_builder.add_node("output_results", output_results_node)
graph_builder.add_node("failure", failure_node)
+# -- Package checker nodes --
+ graph_builder.add_node("checker_init_state", checker_init_state_node)
+ graph_builder.add_node("checker_fetch_intel", checker_fetch_intel_node)
+ graph_builder.add_node("checker_calculate_intel_score", checker_calculate_intel_score_node)
+ graph_builder.add_node("source_acquisition", source_acquisition_node)
+ graph_builder.add_node("checker_early_exit", checker_early_exit_node)
+ graph_builder.add_node("checker_segmentation", checker_segmentation_node)
+ graph_builder.add_node("l1_code_agent", l1_code_agent_node)
+ graph_builder.add_node("l2_build_agent", l2_build_agent_node)
+ graph_builder.add_node("generate_report", generate_report_node)
+
graph_builder.add_edge(START, "add_start_time")
- graph_builder.add_edge("add_start_time", "generate_vdbs")
+ # Conditional: route to full pipeline or package checker after add_start_time
+ graph_builder.add_conditional_edges(
+ "add_start_time",
+ route_after_add_start_time,
+ {
+ "generate_vdbs": "generate_vdbs",
+ "checker_init_state": "checker_init_state",
+ },
+ )
+
graph_builder.add_conditional_edges("generate_vdbs", check_vdbs_success,{"fetch_intel": "fetch_intel", "failure": "failure"})
graph_builder.add_edge("failure", "add_completed_time")
#graph_builder.add_edge("generate_vdbs", "fetch_intel")
@@ -250,10 +458,39 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput):
graph_builder.add_edge("process_sbom", "check_vuln_deps")
graph_builder.add_edge("check_vuln_deps", "llm_engine")
graph_builder.add_edge("llm_engine", "add_completed_time")
+
+ # Package checker path
+ graph_builder.add_edge("checker_init_state", "checker_fetch_intel")
+ graph_builder.add_edge("checker_fetch_intel", "checker_calculate_intel_score")
+ graph_builder.add_edge("checker_calculate_intel_score", "source_acquisition")
+
+ graph_builder.add_conditional_edges(
+ "source_acquisition",
+ route_after_source_acquisition,
+ {
+ "checker_segmentation": "checker_segmentation",
+ "checker_early_exit": "checker_early_exit",
+ },
+ )
+ graph_builder.add_edge("checker_early_exit", "add_completed_time")
+ graph_builder.add_edge("checker_segmentation", "l1_code_agent")
+ graph_builder.add_conditional_edges(
+ "l1_code_agent",
+ route_after_l1,
+ {
+ "l2_build_agent": "l2_build_agent",
+ "generate_report": "generate_report",
+ },
+ )
+ graph_builder.add_edge("l2_build_agent", "generate_report")
+ graph_builder.add_edge("generate_report", "add_completed_time")
+
+ # Shared tail
graph_builder.add_edge("add_completed_time", "output_results")
graph_builder.add_edge("output_results", END)
graph = graph_builder.compile()
-
+ #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png")
+
def convert_str_to_agent_morpheus_input(input: str) -> AgentMorpheusInput:
logger.debug("Converting input to AgentMorpheusInput: %s", input)
try:
diff --git a/src/vuln_analysis/tools/brew_downloader.py b/src/vuln_analysis/tools/brew_downloader.py
new file mode 100644
index 000000000..d0693b46e
--- /dev/null
+++ b/src/vuln_analysis/tools/brew_downloader.py
@@ -0,0 +1,290 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Production Brew Downloader -- fetch SRPMs, build logs, and binary RPMs from Brew (Koji).
+
+Evolved from the PoC at docs/package_analyzer/standalone_checker/brew/brew_downloader.py.
+Storage is split: SRPMs go to the shared rpms/ cache, everything else to checker-specific dirs.
+"""
+
+from __future__ import annotations
+
+import shutil
+from enum import Enum
+from pathlib import Path
+
+import koji
+import requests
+import yaml
+
+from exploit_iq_commons.data_models.checker_status import AcquiredArtifacts
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+_CONFIGS_DIR = Path(__file__).resolve().parent.parent / "configs" / "brew"
+# ---------------------------------------------------------------------------
+# Exceptions
+# ---------------------------------------------------------------------------
+
+class BrewDownloaderError(Exception):
+ """Base for all Brew downloader errors."""
+
+
+class BrewConnectionError(BrewDownloaderError):
+ """Raised when the Brew hub is unreachable or session creation fails."""
+
+
+class BrewBuildNotFoundError(BrewDownloaderError):
+ """Raised when getBuild returns None for the requested NVR."""
+
+
+class BrewDownloadError(BrewDownloaderError):
+ """Raised when an HTTP download of an artifact fails."""
+
+
+class BrewProfileNotImplementedError(BrewDownloaderError):
+ """Raised when a profile type is not yet implemented."""
+
+
+# ---------------------------------------------------------------------------
+# Profile types
+# ---------------------------------------------------------------------------
+
+class BrewProfileType(Enum):
+ INTERNAL = "internal"
+ EXTERNAL = "external"
+
+
+_PROFILE_PATHS: dict[BrewProfileType, Path] = {
+ BrewProfileType.INTERNAL: _CONFIGS_DIR / "internal-user-profile.yml",
+}
+
+
+# ---------------------------------------------------------------------------
+# BrewDownloader
+# ---------------------------------------------------------------------------
+
+class BrewDownloader:
+ """Downloads RPM artifacts and build logs from Brew (Koji) using a profile YAML.
+
+ Storage destinations:
+ - SRPMs -> ``rpm_cache_dir/{NVR}.src.rpm`` (shared with SourceRPMDownloader)
+ - Build logs -> ``checker_dir/logs/{NVR}-{arch}-build.log``
+ - Binary RPMs -> ``checker_dir/binaries/{NVR}/{NVRA}.rpm``
+ """
+
+ def __init__(self, profile_type: BrewProfileType, rpm_cache_dir: str, checker_dir: str) -> None:
+ if profile_type == BrewProfileType.EXTERNAL:
+ raise BrewProfileNotImplementedError(
+ f"Profile type '{profile_type.value}' is not yet implemented"
+ )
+ profile_path = _PROFILE_PATHS[profile_type]
+ self._profile = self._load_profile(str(profile_path))
+
+ hosts = self._profile["hosts"]["rpm"]
+ self._brew_hub: str = hosts["brew_hub"]
+ self._brew_download: str = hosts["brew_download"]
+ self._default_arch: str = self._profile.get("default_arch", "x86_64")
+ self._download_binary_rpm_enabled: bool = self._profile.get("download_binary_rpm", False)
+ self._auto_fetch_build_log: bool = self._profile.get("build_log", {}).get("auto_fetch", True)
+ self._ssl_verify: bool = self._profile.get("ssl_verify", False)
+
+ self._rpm_cache_dir = Path(rpm_cache_dir)
+ self._rpm_cache_dir.mkdir(parents=True, exist_ok=True)
+
+ self._checker_dir = Path(checker_dir)
+ self._checker_dir.mkdir(parents=True, exist_ok=True)
+
+ self._session: koji.ClientSession | None = None
+ self._pathinfo: koji.PathInfo | None = None
+ self._http = requests.Session()
+
+ # -- properties --------------------------------------------------------
+
+ @property
+ def download_binary_rpm_enabled(self) -> bool:
+ return self._download_binary_rpm_enabled
+
+ @property
+ def default_arch(self) -> str:
+ return self._default_arch
+
+ @property
+ def auto_fetch_build_log(self) -> bool:
+ return self._auto_fetch_build_log
+
+ # -- setup -------------------------------------------------------------
+
+ @staticmethod
+ def _load_profile(path: str) -> dict:
+ with open(path, encoding="utf-8") as fh:
+ return yaml.safe_load(fh)
+
+ def connect(self) -> None:
+ """Create a Koji client session and PathInfo helper from the profile."""
+ logger.info("Connecting to Brew hub: %s", self._brew_hub)
+ try:
+ opts: dict = {}
+ if not self._ssl_verify:
+ opts["no_ssl_verify"] = True
+ self._session = koji.ClientSession(self._brew_hub, opts=opts)
+ self._pathinfo = koji.PathInfo(topdir=self._brew_download)
+ self._http.verify = self._ssl_verify
+ except Exception as exc:
+ raise BrewConnectionError(
+ f"Failed to connect to Brew hub {self._brew_hub}: {exc}"
+ ) from exc
+
+ # -- query -------------------------------------------------------------
+
+ def search_build(self, name: str, version: str, release: str) -> dict | None:
+ """Look up a build by NVR. Returns the build-info dict or ``None``."""
+ nvr = f"{name}-{version}-{release}"
+ logger.info("Searching for build: %s", nvr)
+ build = self._session.getBuild(nvr)
+ if build is None:
+ logger.warning("Build not found: %s", nvr)
+ return None
+ logger.info(
+ "Found build %s (id=%s, volume=%s, task=%s)",
+ build["nvr"], build["id"], build.get("volume_name"), build.get("task_id"),
+ )
+ return build
+
+ # -- downloads ---------------------------------------------------------
+
+ def _download_file(self, url: str, dest: Path) -> Path:
+ """Stream-download *url* to *dest*. Returns the destination path."""
+ logger.info("Downloading %s -> %s", url, dest)
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ try:
+ resp = self._http.get(url, stream=True, timeout=120)
+ resp.raise_for_status()
+ except requests.RequestException as exc:
+ raise BrewDownloadError(f"Failed to download {url}: {exc}") from exc
+ with open(dest, "wb") as fh:
+ for chunk in resp.iter_content(chunk_size=1 << 18): # 256 KB
+ fh.write(chunk)
+ logger.info("Saved %s (%d bytes)", dest.name, dest.stat().st_size)
+ return dest
+
+ def _get_srpm_url(self, build: dict) -> str:
+ """Compute the download URL for the source RPM of *build*."""
+ rpms = self._session.listRPMs(buildID=build["id"], arches="src")
+ if not rpms:
+ raise BrewDownloadError(f"No source RPM found for build {build['nvr']}")
+ rpm_info = rpms[0]
+ return f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}"
+
+ def download_srpm(self, build: dict) -> Path:
+ """Download the .src.rpm for *build* into the shared RPM cache.
+
+ Skips the download when the destination file already exists and is non-empty.
+ """
+ rpms = self._session.listRPMs(buildID=build["id"], arches="src")
+ if not rpms:
+ raise BrewDownloadError(f"No source RPM found for build {build['nvr']}")
+
+ rpm_info = rpms[0]
+ dest = self._rpm_cache_dir / f"{rpm_info['nvr']}.src.rpm"
+
+ if dest.exists() and dest.stat().st_size > 0:
+ logger.info("SRPM cache hit: %s", dest)
+ return dest
+
+ url = f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}"
+ return self._download_file(url, dest)
+
+ def download_build_log(self, build: dict, arch: str | None = None) -> Path:
+ """Download ``build.log`` for the given arch into ``checker_dir/logs/``."""
+ arch = arch or self._default_arch
+ url = f"{self._pathinfo.build(build)}/data/logs/{arch}/build.log"
+ dest = self._checker_dir / "logs" / f"{build['nvr']}-{arch}-build.log"
+ return self._download_file(url, dest)
+
+ def download_binary_rpm(self, build: dict, arch: str | None = None) -> Path | None:
+ """Download all binary RPMs for the given arch (excludes debuginfo/debugsource).
+
+ Saves to ``checker_dir/binaries/{NVR}/``. Returns an empty list when no
+ matching RPMs are found.
+ """
+ arch = arch or self._default_arch
+ rpms = self._session.listRPMs(buildID=build["id"], arches=arch)
+ if not rpms:
+ logger.warning("No %s RPMs found for build %s", arch, build["nvr"])
+ return None
+
+ nvr = build["nvr"]
+ build_dir = self._checker_dir / "binaries" / nvr
+
+ downloaded: list[Path] = []
+ for rpm_info in rpms:
+ rpm_name: str = rpm_info["name"]
+ if rpm_name.endswith(("-debuginfo", "-debugsource")):
+ continue
+ url = f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}"
+ nvra = f"{rpm_info['name']}-{rpm_info['version']}-{rpm_info['release']}.{rpm_info['arch']}"
+ dest = build_dir / f"{nvra}.rpm"
+ self._download_file(url, dest)
+ downloaded.append(dest)
+ return build_dir
+
+ def download_patched_srpm(self, name: str, version: str, release: str) -> Path | None:
+ """Download the SRPM for a patched version (from CVE fix info).
+
+ Returns the cached SRPM path, or ``None`` if the patched build is not
+ found in Brew.
+ """
+ build = self.search_build(name, version, release)
+ if build is None:
+ return None
+ return self.download_srpm(build)
+
+ def download_patched_srpm_by_nevra(self, nevra: str) -> Path | None:
+ """Download the SRPM for a patched version (from NEVRA).
+
+ Returns the cached SRPM path, or ``None`` if the patched build is not
+ found in Brew.
+ """
+ build = self._session.getBuild(nevra)
+ if build is None:
+ logger.warning("Build not found: %s", nvr)
+ return None
+ logger.info(
+ "Found build %s (id=%s, volume=%s, task=%s)",
+ build["nvr"], build["id"], build.get("volume_name"), build.get("task_id"),
+ )
+ return self.download_srpm(build)
+ def download_target_artifacts(self, name: str, version: str, release: str, arch: str ) -> AcquiredArtifacts | None:
+ artifacts = AcquiredArtifacts()
+ build = self.search_build(name, version, release)
+ if build is None:
+ raise BrewBuildNotFoundError(f"Build not found for {name}-{version}-{release}")
+
+ artifacts.source_url = self._get_srpm_url(build)
+ cache_srpm_path = self.download_srpm(build)
+
+ srpm_target_path = self._checker_dir / "source"
+ srpm_target_path.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(cache_srpm_path, srpm_target_path)
+ SourceRPMDownloader.extract_src_rpm(cache_srpm_path, srpm_target_path)
+ artifacts.srpm_path = srpm_target_path
+
+ artifacts.build_log_path = self.download_build_log(build, arch)
+ if self._download_binary_rpm_enabled:
+ artifacts.binary_rpm_path = self.download_binary_rpm(build, arch)
+ return artifacts
diff --git a/src/vuln_analysis/tools/lexical_full_search.py b/src/vuln_analysis/tools/lexical_full_search.py
index 0b24fcc1e..3ef0032f3 100644
--- a/src/vuln_analysis/tools/lexical_full_search.py
+++ b/src/vuln_analysis/tools/lexical_full_search.py
@@ -21,6 +21,7 @@
from pydantic import Field
from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from exploit_iq_commons.data_models.input import PipelineMode
from vuln_analysis.utils.error_handling_decorator import catch_tool_errors
LEXICAL_CODE_SEARCH = "lexical_code_search"
@@ -33,6 +34,10 @@ class LexicalSearchToolConfig(FunctionBaseConfig, name=LEXICAL_CODE_SEARCH):
Lexical search tool used to search source code.
"""
top_k: int = Field(default=5, description="Top K to use for the lexical search")
+ base_code_index_dir: str = Field(
+ default=".cache/am_cache/code_index",
+ description="Base directory for Tantivy code index storage.",
+ )
@register_function(config_type=LexicalSearchToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
@@ -43,7 +48,14 @@ async def lexical_search(config: LexicalSearchToolConfig, builder: Builder): #
@catch_tool_errors(LEXICAL_CODE_SEARCH)
async def _arun(query: str) -> str:
workflow_state = ctx_state.get()
- code_index_path = workflow_state.code_index_path
+
+ pipeline_mode = workflow_state.original_input.input.image.pipeline_mode
+ if pipeline_mode == PipelineMode.PACKAGE_CHECKER:
+ source_key = workflow_state.info.checker_context.source_key
+ code_index_path = str(FullTextSearch.get_index_directory(config.base_code_index_dir, source_key))
+ else:
+ code_index_path = workflow_state.info.vdb.code_index_path
+
full_text_search = FullTextSearch(cache_path=code_index_path)
if full_text_search.is_empty():
diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py
new file mode 100644
index 000000000..496270e97
--- /dev/null
+++ b/src/vuln_analysis/tools/source_grep.py
@@ -0,0 +1,212 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Native Unix grep tool for fast source code searching.
+
+Provides an LLM-callable tool that uses native grep subprocess for
+faster searching compared to Python-based regex scanning.
+"""
+
+from pathlib import Path
+
+from aiq.builder.builder import Builder
+from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.builder.function_info import FunctionInfo
+from aiq.cli.register_workflow import register_function
+from aiq.data_models.function import FunctionBaseConfig
+from pydantic import Field
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from vuln_analysis.tools.source_inspector import SourceInspector
+from vuln_analysis.utils.error_handling_decorator import catch_tool_errors
+
+SOURCE_GREP = "source_grep"
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+
+class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP):
+ """Fast grep search using native Unix grep subprocess."""
+
+ base_checker_dir: str = Field(
+ default=".cache/am_cache/checker",
+ description="Root directory for checker-specific artifacts.",
+ )
+ max_results: int = Field(
+ default=50,
+ description="Maximum number of grep results to return.",
+ )
+ context_lines: int = Field(
+ default=3,
+ description="Number of context lines around each match.",
+ )
+
+
+VALID_TARGETS = ("source", "logs", "patch")
+
+TARGET_EXTENSIONS: dict[str, list[str]] = {
+ "source": ["*.c", "*.h", "*.cpp", "*.hpp", "*.py", "*.go", "*.java", "*.spec", "*.cmake", "Makefile", "*.mk"],
+ "logs": [], # empty = search all files
+ "patch": ["*.patch", "*.diff"],
+}
+
+
+def _parse_query(query: str) -> tuple[str | list[str], str | None, str, bool]:
+ """Parse query string into (pattern(s), file_glob, target, word_boundary).
+
+ Supports formats:
+ - "pattern" -> search source (default)
+ - "pattern,*.c" -> search source, only .c files
+ - "target:pattern" -> search specific target
+ - "target:pattern,file_glob" -> search target with file filter
+ - "pattern -w" -> search with word boundary (whole words only)
+ - "target:pattern,file_glob -w" -> full format with word boundary
+ - "pattern1;pattern2,file.c" -> multiple patterns (only with file_glob)
+
+ Valid targets: source, logs, patch
+
+ Note: Multiple patterns (separated by ';') are only supported when
+ a file_glob is provided. This prevents overly broad multi-pattern searches.
+ """
+ query = query.strip().strip('"').strip("'")
+
+ word_boundary = False
+ if query.endswith(" -w"):
+ word_boundary = True
+ query = query[:-3].strip()
+
+ target = "source"
+ if ":" in query:
+ prefix, rest = query.split(":", 1)
+ if prefix in VALID_TARGETS:
+ target = prefix
+ query = rest
+
+ if "," in query:
+ parts = query.split(",", 1)
+ pattern_part = parts[0].strip()
+ file_glob = parts[1].strip() if len(parts) > 1 else None
+
+ # Multi-pattern support: only when file_glob is provided
+ if file_glob and ";" in pattern_part:
+ patterns = [p.strip() for p in pattern_part.split(";") if p.strip()]
+ return patterns, file_glob, target, word_boundary
+
+ return pattern_part, file_glob, target, word_boundary
+
+ return query, None, target, word_boundary
+
+
+def _format_results(pattern: str, matches: list, root: Path) -> str:
+ """Format grep results for LLM consumption."""
+ if not matches:
+ return f"No matches found for '{pattern}'"
+
+ lines = [f"Found {len(matches)} match(es) for '{pattern}':\n"]
+ for i, match in enumerate(matches, 1):
+ try:
+ rel_path = match.file_path.relative_to(root)
+ except ValueError:
+ rel_path = match.file_path
+ lines.append(f"{i}. {rel_path}:{match.match_line_number}")
+ lines.append(f" {match.full_text.strip()}")
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+@register_function(config_type=SourceGrepToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def source_grep(config: SourceGrepToolConfig, builder: Builder): # pylint: disable=unused-argument
+ from vuln_analysis.runtime_context import ctx_state
+
+ @catch_tool_errors(SOURCE_GREP)
+ async def _arun(query: str) -> str:
+ """Search source code, build logs, or patches using native Unix grep.
+
+ Query format: '[target:]pattern[,file_glob][ -w]'
+
+ Targets:
+ - source (default): Package source code
+ - logs: Build compilation logs
+ - patch: Fixed patches from newer RPM version
+
+ Options:
+ - -w: Match whole words only (word boundary)
+ - Multiple patterns: use ';' separator ONLY with a specific file
+
+ Examples:
+ - 'archive_read_open' - search source files
+ - 'archive_read_open,*.c' - search only .c source files
+ - 'archive_read_open -w' - search for whole word only
+ - 'unsigned int cursor;unsigned int nodes,archive_read.c' - multiple patterns in one file
+ - 'logs:undefined reference' - search build logs for link errors
+ - 'logs:error:' - search build logs for error messages
+ - 'patch:CVE-2026-5121' - find patch for specific CVE
+ - 'patch:archive_read,*.patch' - search in patch files
+ """
+ workflow_state = ctx_state.get()
+
+ checker_context = None
+ if workflow_state.original_input and workflow_state.original_input.info:
+ checker_context = workflow_state.original_input.info.checker_context
+
+ if checker_context is None or not checker_context.source_key:
+ raise ValueError("Checker context or source_key not available in workflow state")
+
+ source_key = checker_context.source_key
+ pattern, file_glob, target, word_boundary = _parse_query(query)
+
+ target_dir = (Path(config.base_checker_dir) / source_key / target).resolve()
+
+ if not target_dir.is_dir():
+ raise ValueError(f"Target directory does not exist: {target_dir}")
+
+ inspector = SourceInspector(target_dir)
+ default_extensions = TARGET_EXTENSIONS.get(target, [])
+
+ logger.info("Source grep: searching for '%s' in %s (target: %s, glob: %s, word_boundary: %s)",
+ pattern, target_dir, target, file_glob or "default extensions", word_boundary)
+
+ matches = await inspector.grep_native(
+ patterns=pattern,
+ file_glob=file_glob,
+ word_boundary=word_boundary,
+ context_lines=config.context_lines,
+ max_results=config.max_results,
+ default_extensions=default_extensions,
+ )
+
+ logger.info("Source grep: found matches for '%s' in target '%s'", pattern, target)
+ return matches
+
+ yield FunctionInfo.from_fn(
+ _arun,
+ description=(
+ "Fast grep search using native Unix grep. "
+ "Query format: '[target:]pattern[,file_glob][ -w]'. "
+ "Targets: 'source' (default) for package source code, "
+ "'logs' for build compilation logs, "
+ "'patch' for fixed patches from newer RPM. "
+ "Add ' -w' suffix for whole-word matching. "
+ "Multiple patterns: use ';' separator ONLY with a specific file, e.g., "
+ "'pattern1;pattern2,filename.c' searches for both patterns in that file. "
+ "Examples: 'archive_read_open' searches source, "
+ "'archive_read_open,*.c' searches only C source files, "
+ "'archive_read_open -w' searches for whole word only, "
+ "'unsigned int cursor;unsigned int nodes,archive_read.c' searches multiple patterns in one file, "
+ "'logs:undefined reference' searches build logs, "
+ "'patch:CVE-2026-5121' searches patch files."
+ ),
+ )
diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py
new file mode 100644
index 000000000..d881c3a9b
--- /dev/null
+++ b/src/vuln_analysis/tools/source_inspector.py
@@ -0,0 +1,219 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Generic filesystem utility for inspecting extracted RPM source trees.
+
+Provides low-level primitives (find, grep, read) that can be composed by
+pipeline code or called by an LLM agent in the future.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import re
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class GrepMatch:
+ file_path: Path
+ line_number: int
+ line_content: str
+
+
+class SourceInspector:
+ """Filesystem inspector scoped to a root directory.
+
+ All returned paths are absolute. The class carries no domain-specific
+ logic (RPM, spec, changelog); callers compose the primitives for that.
+ """
+
+ def __init__(self, source_dir: Path) -> None:
+ self._root = source_dir.resolve()
+ if not self._root.is_dir():
+ raise FileNotFoundError(f"source_dir does not exist: {self._root}")
+
+ @property
+ def root(self) -> Path:
+ return self._root
+
+ def find_files(self, pattern: str, recursive: bool = True) -> list[Path]:
+ """Glob over the source tree.
+
+ Parameters
+ ----------
+ pattern:
+ Shell glob pattern, e.g. ``"*.spec"`` or ``"*.patch"``.
+ recursive:
+ If *True* use ``**/`` (deep search).
+ If *False* use ```` (root-level only).
+ """
+ glob_expr = f"**/{pattern}" if recursive else pattern
+ return sorted(self._root.glob(glob_expr))
+
+ def grep_content(
+ self,
+ pattern: str,
+ file_path: Path | None = None,
+ *,
+ recursive: bool = False,
+ ) -> list[GrepMatch]:
+ """Search file contents for a regex *pattern*.
+
+ Parameters
+ ----------
+ pattern:
+ Regular expression (case-sensitive by default).
+ file_path:
+ If given, search that file only, or (if it is a directory) every file
+ in that directory (one level, regular files only).
+ If the path does not exist, return no matches.
+ If *None*, search every file under *source_dir*
+ (depth controlled by *recursive*).
+ recursive:
+ Only used when *file_path* is ``None``.
+ ``False`` searches only root-level files; ``True`` walks the tree.
+ """
+ regex = re.compile(pattern)
+ matches: list[GrepMatch] = []
+
+ if file_path is not None:
+ resolved = file_path.resolve()
+ if resolved.is_file():
+ targets = [resolved]
+ elif resolved.is_dir():
+ targets = sorted(p for p in resolved.iterdir() if p.is_file())
+ else:
+ targets = []
+ elif recursive:
+ targets = sorted(p for p in self._root.rglob("*") if p.is_file())
+ else:
+ targets = sorted(p for p in self._root.iterdir() if p.is_file())
+
+ for fp in targets:
+ try:
+ lines = fp.read_text(encoding="utf-8", errors="replace").splitlines()
+ except (OSError, UnicodeDecodeError):
+ continue
+ for idx, line in enumerate(lines, start=1):
+ if regex.search(line):
+ matches.append(GrepMatch(file_path=fp, line_number=idx, line_content=line))
+ return matches
+
+ async def grep_native(
+ self,
+ patterns: str | list[str],
+ file_glob: str | None = None,
+ *,
+ case_insensitive: bool = False,
+ word_boundary: bool = False,
+ context_lines: int = 0,
+ max_results: int = 50,
+ default_extensions: list[str] | None = None,
+ ) -> str:
+ """Fast grep using native Unix grep subprocess.
+
+ Parameters
+ ----------
+ patterns:
+ Search pattern(s). Can be a single string or list of patterns.
+ When multiple patterns are provided, matches ANY of them (OR logic).
+ file_glob:
+ Optional file pattern (e.g., ``"*.c"``, ``"*.h"``). If provided,
+ overrides default_extensions.
+ case_insensitive:
+ If *True*, perform case-insensitive matching (``-i`` flag).
+ word_boundary:
+ If *True*, match whole words only (``-w`` flag).
+ context_lines:
+ Lines of context around match (``-C`` flag). Default 0.
+ max_results:
+ Stop after this many matches (``-m`` flag). Default 50.
+ default_extensions:
+ List of file extensions to search when file_glob is not provided.
+ If *None*, searches ALL files (no --include filter).
+ If empty list ``[]``, searches ALL files (no --include filter).
+
+ Returns
+ -------
+ str
+ Raw grep output with matches found.
+ """
+ cmd = ["grep", "-rn", "-I"]
+
+ if case_insensitive:
+ cmd.append("-i")
+ if word_boundary:
+ cmd.append("-w")
+ if context_lines > 0:
+ cmd.extend(["-C", str(context_lines)])
+
+ if file_glob:
+ cmd.extend(["--include", file_glob])
+ elif default_extensions is None:
+ pass # No filtering - search all files (caller should pass extensions explicitly)
+ elif default_extensions:
+ for ext in default_extensions:
+ cmd.extend(["--include", ext])
+
+ cmd.extend(["-m", str(max_results)])
+
+ # Handle single or multiple patterns
+ if isinstance(patterns, list):
+ for p in patterns:
+ cmd.extend(["-e", p])
+ else:
+ cmd.extend(["--", patterns])
+
+ cmd.append(str(self._root))
+
+ def _run_grep() -> str:
+ result = subprocess.run(
+ cmd,
+ capture_output=True,
+ text=True,
+ errors="replace",
+ )
+ return result.stdout
+
+ stdout = await asyncio.to_thread(_run_grep)
+
+ #results: list[GrepMatch] = []
+ #results = parse_grep_to_blocks(stdout)
+ return stdout
+
+ def read_file(
+ self,
+ file_path: Path,
+ offset: int = 0,
+ max_lines: int | None = None,
+ ) -> str:
+ """Read file content starting from a line *offset*.
+
+ Parameters
+ ----------
+ file_path:
+ Absolute or relative path (resolved against *source_dir*).
+ offset:
+ 0-based line offset to start reading from.
+ max_lines:
+ Maximum number of lines to return. ``None`` means read to EOF.
+ """
+ resolved = file_path if file_path.is_absolute() else (self._root / file_path)
+ lines = resolved.read_text(encoding="utf-8", errors="replace").splitlines()
+ end = (offset + max_lines) if max_lines is not None else len(lines)
+ return "\n".join(lines[offset:end])
diff --git a/src/vuln_analysis/tools/tool_names.py b/src/vuln_analysis/tools/tool_names.py
index 6f46faa61..5771f6e96 100644
--- a/src/vuln_analysis/tools/tool_names.py
+++ b/src/vuln_analysis/tools/tool_names.py
@@ -47,6 +47,9 @@ class ToolNames:
FUNCTION_LIBRARY_VERSION_FINDER = "Function Library Version Finder"
"""Checks in which library version the function is used"""
+ SOURCE_GREP = "Source Grep"
+ """Fast grep search in source code using native Unix grep"""
+
# Export as module-level constants
CODE_SEMANTIC_SEARCH = ToolNames.CODE_SEMANTIC_SEARCH
@@ -58,6 +61,7 @@ class ToolNames:
CVE_WEB_SEARCH = ToolNames.CVE_WEB_SEARCH
CONTAINER_ANALYSIS_DATA = ToolNames.CONTAINER_ANALYSIS_DATA
FUNCTION_LIBRARY_VERSION_FINDER = ToolNames.FUNCTION_LIBRARY_VERSION_FINDER
+SOURCE_GREP = ToolNames.SOURCE_GREP
@@ -72,4 +76,5 @@ class ToolNames:
'CONTAINER_ANALYSIS_DATA',
'FUNCTION_LOCATOR',
'FUNCTION_LIBRARY_VERSION_FINDER',
+ 'SOURCE_GREP',
]
\ No newline at end of file
diff --git a/src/vuln_analysis/utils/clients/nvd_client.py b/src/vuln_analysis/utils/clients/nvd_client.py
index 1b13ff51c..fd6a43d54 100644
--- a/src/vuln_analysis/utils/clients/nvd_client.py
+++ b/src/vuln_analysis/utils/clients/nvd_client.py
@@ -125,19 +125,21 @@ async def _get_cwe_elements(self, cve_obj: dict) -> dict:
those CWEs.
"""
# Get CWE name
- cwe_id = None
+ raw_cwe_id = None
weaknesses = cve_obj.get('weaknesses', [])
- cwe_id = self._get_cwe(weaknesses)
+ raw_cwe_id = self._get_cwe(weaknesses)
cwe_link = None
cwe_name = None
cwe_description = None
cwe_extended_description = None
- if cwe_id is not None:
- if cwe_id.startswith('CWE-'):
- cwe_id = cwe_id.replace('CWE-', '', 1)
+ cwe_id_numeric = None
+ if raw_cwe_id is not None:
+ cwe_id_numeric = raw_cwe_id
+ if cwe_id_numeric.startswith('CWE-'):
+ cwe_id_numeric = cwe_id_numeric.replace('CWE-', '', 1)
- if cwe_id.isnumeric():
- cwe_link = self._cwe_details_url_template.format(CWE_ID=cwe_id)
+ if cwe_id_numeric.isnumeric():
+ cwe_link = self._cwe_details_url_template.format(CWE_ID=cwe_id_numeric)
if cwe_link is not None:
soup = await self._get_soup(cwe_link)
@@ -155,7 +157,9 @@ async def _get_cwe_elements(self, cve_obj: dict) -> dict:
if extended_description_div:
cwe_extended_description = extended_description_div.find('div', class_='indent').text.strip()
+ cwe_id = f"CWE-{cwe_id_numeric}" if cwe_id_numeric and cwe_id_numeric.isnumeric() else raw_cwe_id
return {
+ "cwe_id": cwe_id,
"cwe_name": cwe_name,
"cwe_description": cwe_description,
"cwe_extended_description": cwe_extended_description,
@@ -330,6 +334,7 @@ async def get_intel(self, cve_id: str) -> CveIntelNvd:
cvss_vector=cvss_vector,
cvss_base_score=cvss_base_score,
cvss_severity=cvss_severity,
+ cwe_id=cwe_elements["cwe_id"],
cwe_name=cwe_elements["cwe_name"],
cwe_description=cwe_elements["cwe_description"],
cwe_extended_description=cwe_elements["cwe_extended_description"],
diff --git a/src/vuln_analysis/utils/full_text_search.py b/src/vuln_analysis/utils/full_text_search.py
index 0a02ab991..132602ebf 100644
--- a/src/vuln_analysis/utils/full_text_search.py
+++ b/src/vuln_analysis/utils/full_text_search.py
@@ -194,7 +194,7 @@ def add_documents_from_code_path(self,
code_path: str,
include_extensions: list[str],
use_langparser=True,
- splitter=True):
+ splitter=True,no_extension=[]):
"""Create an index from raw files."""
doc_content = []
@@ -218,7 +218,7 @@ def add_documents_from_code_path(self,
for root, _, files in os.walk(code_path):
for file in files:
- if any(file.endswith(ext) for ext in include_extensions):
+ if any(file.endswith(ext) for ext in include_extensions) or file in no_extension:
file_path = os.path.join(root, file)
try:
with open(file_path, "r") as f:
diff --git a/src/vuln_analysis/utils/osv_patch_retriever.py b/src/vuln_analysis/utils/osv_patch_retriever.py
new file mode 100644
index 000000000..aaba10a22
--- /dev/null
+++ b/src/vuln_analysis/utils/osv_patch_retriever.py
@@ -0,0 +1,449 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""OSV Patch Retriever - fetch upstream fix patches from OSV when RPM patches are unavailable."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import TYPE_CHECKING
+
+import aiohttp
+from pydantic import BaseModel
+from unidiff import PatchSet
+
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from vuln_analysis.utils.async_http_utils import request_with_retry
+from vuln_analysis.functions.code_agent_graph_defs import OSVPatchResult
+
+if TYPE_CHECKING:
+ from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+_OSV_API_URL = os.environ.get("OSV_API_URL", "https://api.osv.dev/v1/vulns/")
+_OSV_TIMEOUT_SECONDS = int(os.environ.get("OSV_TIMEOUT_SECONDS", "10"))
+_GITHUB_PATCH_TIMEOUT_SECONDS = int(os.environ.get("GITHUB_PATCH_TIMEOUT_SECONDS", "30"))
+
+_BINARY_FILE_EXTENSIONS = frozenset({
+ '.uu', '.uue', '.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2',
+ '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp',
+ '.pdf', '.doc', '.docx', '.xls', '.xlsx',
+ '.exe', '.dll', '.so', '.dylib', '.a', '.o', '.obj',
+ '.pyc', '.pyo', '.class', '.jar', '.war',
+})
+
+_GITHUB_REPO_PATTERN = re.compile(r"https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
+
+
+class OSVAffectedRange(BaseModel):
+ """Represents a Git range from an OSV affected block."""
+ repo_url: str | None = None
+ fixed_commit: str | None = None
+ introduced_commit: str | None = None
+
+
+
+def _is_binary_file_path(path: str) -> bool:
+ """Check if file path has a binary file extension."""
+ path_lower = path.lower()
+ return any(path_lower.endswith(ext) for ext in _BINARY_FILE_EXTENSIONS)
+
+
+def _version_in_range(version: str, introduced: str | None, fixed: str | None) -> bool:
+ """Check if provided upstream version falls within [introduced, fixed) range.
+
+ Returns True if version >= introduced (or introduced is None) AND version < fixed (or fixed is None).
+ """
+ try:
+ from packaging.version import parse as parse_version
+ v = parse_version(version)
+ if introduced:
+ try:
+ if v < parse_version(introduced):
+ return False
+ except Exception:
+ pass
+ if fixed:
+ try:
+ if v >= parse_version(fixed):
+ return False
+ except Exception:
+ pass
+ return True
+ except Exception:
+ return True
+
+
+def _parse_patch_content(patch_content: str, patch_filename: str) -> "ParsedPatch | None":
+ """Parse patch content string into structured ParsedPatch model.
+
+ Reuses the same logic as code_agent_graph_defs.parse_patch_file but works on string content.
+ """
+ from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch, PatchFile, PatchHunk
+
+ try:
+ patch_set = PatchSet.from_string(patch_content)
+ except Exception:
+ logger.warning("_parse_patch_content: failed to parse patch content")
+ return None
+
+ files: list[PatchFile] = []
+ for patched_file in patch_set:
+ if patched_file.is_binary_file:
+ continue
+ if _is_binary_file_path(patched_file.target_file):
+ continue
+
+ hunks: list[PatchHunk] = []
+ for hunk in patched_file:
+ context, removed, added = [], [], []
+ for line in hunk:
+ if line.is_context:
+ context.append(str(line.value).rstrip("\n"))
+ elif line.is_removed:
+ removed.append(str(line.value).rstrip("\n"))
+ elif line.is_added:
+ added.append(str(line.value).rstrip("\n"))
+
+ hunks.append(PatchHunk(
+ source_start=hunk.source_start,
+ source_length=hunk.source_length,
+ target_start=hunk.target_start,
+ target_length=hunk.target_length,
+ context_lines=context,
+ removed_lines=removed,
+ added_lines=added,
+ ))
+
+ files.append(PatchFile(
+ source_path=patched_file.source_file,
+ target_path=patched_file.target_file,
+ hunks=hunks,
+ is_new_file=patched_file.is_added_file,
+ is_deleted_file=patched_file.is_removed_file,
+ ))
+
+ return ParsedPatch(patch_filename=patch_filename, files=files)
+
+
+def _extract_commit_metadata(patch_content: str) -> tuple[str | None, str | None, str | None]:
+ """Extract commit message, author, and date from GitHub .patch format.
+
+ GitHub .patch format starts with:
+ From Mon Sep 17 00:00:00 2001
+ From: Author Name
+ Date: Tue, 1 Jan 2024 12:00:00 +0000
+ Subject: [PATCH] Commit message
+
+ Extended commit message...
+ ---
+
+ """
+ lines = patch_content.split('\n')
+ author = None
+ date = None
+ subject_lines = []
+ in_subject = False
+
+ for line in lines:
+ if line.startswith('From:'):
+ author = line[5:].strip()
+ elif line.startswith('Date:'):
+ date = line[5:].strip()
+ elif line.startswith('Subject:'):
+ in_subject = True
+ subject_part = line[8:].strip()
+ if subject_part.startswith('[PATCH'):
+ idx = subject_part.find(']')
+ if idx != -1:
+ subject_part = subject_part[idx + 1:].strip()
+ subject_lines.append(subject_part)
+ elif in_subject:
+ if line.startswith('---') or line.startswith('diff --git'):
+ break
+ if line.strip() == '':
+ in_subject = False
+ else:
+ subject_lines.append(line.strip())
+
+ commit_message = ' '.join(subject_lines).strip() if subject_lines else None
+ return commit_message, author, date
+
+
+class OSVPatchRetriever:
+ """Retrieve upstream fix patches from OSV when RPM patches are unavailable.
+
+ Usage:
+ async with aiohttp.ClientSession() as session:
+ retriever = OSVPatchRetriever(session=session)
+ result = await retriever.get_fix_patch("CVE-2024-1234", "3.0.7", "openssl")
+ if result and result.parsed_patch:
+ # Use result.parsed_patch for agent context
+ pass
+ """
+
+ def __init__(
+ self,
+ session: aiohttp.ClientSession,
+ osv_timeout: int = _OSV_TIMEOUT_SECONDS,
+ github_timeout: int = _GITHUB_PATCH_TIMEOUT_SECONDS,
+ ):
+ self._session = session
+ self._osv_timeout = aiohttp.ClientTimeout(total=osv_timeout)
+ self._github_timeout = aiohttp.ClientTimeout(total=github_timeout)
+
+ async def get_fix_patch(
+ self,
+ cve_id: str,
+ upstream_version: str,
+ package_name: str | None = None,
+ ) -> OSVPatchResult | None:
+ """Main entry point - orchestrates the full workflow.
+
+ Args:
+ cve_id: CVE identifier (e.g., "CVE-2024-1234")
+ upstream_version: Upstream version from TargetPackage.version (e.g., "3.0.7")
+ package_name: Optional package name to help match the correct affected block
+
+ Returns:
+ OSVPatchResult with patch data, or None if no fix found
+ """
+ try:
+ osv_data = await self._query_osv(cve_id)
+ if not osv_data:
+ return None
+
+ # 1. Try to get the highly-specific patch URL from references first
+ patch_url = self._extract_commit_from_references(osv_data)
+ fixed_commit = None
+ repo_url = None
+ if patch_url:
+ # Extract repo_url and fixed_commit from the patch_url for the result object
+ repo_url = patch_url.split('/commit/')[0] if '/commit/' in patch_url else patch_url.split('/pull/')[0]
+ fixed_commit = patch_url.split('/')[-1].replace('.patch', '')
+ logger.info("OSV: Found precise fix commit in references for %s", cve_id)
+ else:
+ # second try to find the fix commit from the affected block
+ affected = self._find_matching_affected(osv_data, package_name)
+ if not affected:
+ logger.info("OSV: No affected block with fix found for %s", cve_id)
+ return None
+
+ range_info = self._extract_fix_commit(affected)
+ if not range_info.fixed_commit or not range_info.repo_url:
+ logger.info("OSV: No fixed commit found for %s", cve_id)
+ return None
+
+ patch_url = self._build_patch_url(range_info.repo_url, range_info.fixed_commit)
+ fixed_commit = range_info.fixed_commit[:8]
+ repo_url = range_info.repo_url
+ if not patch_url:
+ logger.info("OSV: Could not build patch URL for %s (non-GitHub repo?)", cve_id)
+ return None
+
+ patch_content = await self._fetch_github_patch(patch_url)
+ if not patch_content:
+ return None
+
+ commit_message, commit_author, commit_date = _extract_commit_metadata(patch_content)
+ parsed_patch = _parse_patch_content(patch_content, f"{cve_id}_{fixed_commit}.patch")
+
+ return OSVPatchResult(
+ cve_id=cve_id,
+ fixed_commit=fixed_commit,
+ repo_url=repo_url,
+ patch_url=patch_url,
+ patch_content=patch_content,
+ parsed_patch=parsed_patch,
+ commit_message=commit_message,
+ commit_author=commit_author,
+ commit_date=commit_date,
+ )
+
+ except Exception:
+ logger.warning("OSV patch retrieval failed for %s", cve_id, exc_info=True)
+ return None
+
+ async def _query_osv(self, cve_id: str) -> dict | None:
+ """Query OSV API for CVE data.
+
+ Args:
+ cve_id: CVE identifier
+
+ Returns:
+ OSV vulnerability data dict, or None on failure
+ """
+ url = f"{_OSV_API_URL}{cve_id}"
+ try:
+ async with request_with_retry(
+ session=self._session,
+ request_kwargs={
+ 'method': 'GET',
+ 'url': url,
+ 'timeout': self._osv_timeout,
+ },
+ max_retries=3,
+ sleep_time=0.5,
+ log_on_error=False,
+ ) as response:
+ return await response.json()
+ except aiohttp.ClientResponseError as e:
+ if e.status == 404:
+ logger.info("OSV: CVE %s not found", cve_id)
+ else:
+ logger.warning("OSV query failed for %s: %s", cve_id, e)
+ return None
+ except Exception as e:
+ logger.warning("OSV query failed for %s: %s", cve_id, e)
+ return None
+
+ def _extract_commit_from_references(self, osv_data: dict) -> str | None:
+ """Attempt to find the exact fix commit URL from the OSV references array.
+
+ Args:
+ osv_data: OSV vulnerability data dict
+
+ Returns:
+ The patch URL if found, otherwise None
+ """
+ references = osv_data.get("references", [])
+
+ for ref in references:
+ if ref.get("type") == "FIX":
+ url = ref.get("url", "")
+ # We look for GitHub URLs containing either /commit/ or /pull/
+ if "github.com" in url and ("/commit/" in url or "/pull/" in url):
+ if not url.endswith(".patch"):
+ return f"{url}.patch"
+ return url
+
+ return None
+
+ def _find_matching_affected(
+ self,
+ osv_data: dict,
+ package_name: str | None = None,
+ ) -> dict | None:
+ """Find an affected block that has a GIT range with a fixed commit.
+
+ Args:
+ osv_data: OSV vulnerability data
+ package_name: Optional package name to filter affected blocks
+
+ Returns:
+ Matching affected block dict, or None if no match
+ """
+
+
+ for affected in osv_data.get("affected", []):
+
+
+ for range_block in affected.get("ranges", []):
+ if range_block.get("type") == "GIT":
+ for event in range_block.get("events", []):
+ if "fixed" in event:
+ return affected
+
+ return None
+
+ def _extract_fix_commit(self, affected: dict) -> OSVAffectedRange:
+ """Extract the fixed commit hash and repo URL from an affected block.
+
+ Args:
+ affected: OSV affected block
+
+ Returns:
+ OSVAffectedRange with repo_url and fixed_commit
+ """
+ result = OSVAffectedRange()
+
+ ranges = affected.get("ranges", [])
+ for range_block in ranges:
+ if range_block.get("type") != "GIT":
+ continue
+
+ repo = range_block.get("repo")
+ if repo:
+ result.repo_url = repo
+
+ events = range_block.get("events", [])
+ for event in events:
+ if "introduced" in event and event["introduced"] != "0":
+ result.introduced_commit = event["introduced"]
+ if "fixed" in event:
+ result.fixed_commit = event["fixed"]
+
+ if result.fixed_commit:
+ break
+
+ return result
+
+ def _build_patch_url(self, repo_url: str, commit_sha: str) -> str | None:
+ """Build GitHub patch URL from repo URL and commit SHA.
+
+ Args:
+ repo_url: Git repository URL (e.g., "https://github.com/openssl/openssl")
+ commit_sha: Git commit hash
+
+ Returns:
+ Patch URL (e.g., "https://github.com/openssl/openssl/commit/.patch"),
+ or None if not a GitHub repo
+ """
+ match = _GITHUB_REPO_PATTERN.match(repo_url)
+ if not match:
+ if "github.com" in repo_url:
+ parts = repo_url.rstrip('/').split('/')
+ if len(parts) >= 2:
+ repo_path = '/'.join(parts[-2:]).replace('.git', '')
+ return f"https://github.com/{repo_path}/commit/{commit_sha}.patch"
+ logger.debug("Non-GitHub repo URL: %s", repo_url)
+ return None
+
+ repo_path = match.group(1)
+ return f"https://github.com/{repo_path}/commit/{commit_sha}.patch"
+
+ async def _fetch_github_patch(self, patch_url: str) -> str | None:
+ """Download patch content from GitHub.
+
+ Args:
+ patch_url: URL to the .patch file
+
+ Returns:
+ Patch content string, or None on failure
+ """
+ try:
+ async with request_with_retry(
+ session=self._session,
+ request_kwargs={
+ 'method': 'GET',
+ 'url': patch_url,
+ 'timeout': self._github_timeout,
+ },
+ max_retries=3,
+ sleep_time=0.5,
+ log_on_error=False,
+ ) as response:
+ return await response.text()
+ except aiohttp.ClientResponseError as e:
+ if e.status == 404:
+ logger.info("GitHub patch not found: %s", patch_url)
+ else:
+ logger.warning("GitHub patch fetch failed: %s - %s", patch_url, e)
+ return None
+ except Exception as e:
+ logger.warning("GitHub patch fetch failed: %s - %s", patch_url, e)
+ return None
diff --git a/src/vuln_analysis/utils/output_formatter.py b/src/vuln_analysis/utils/output_formatter.py
index 5bbbe5e60..6d3acf591 100644
--- a/src/vuln_analysis/utils/output_formatter.py
+++ b/src/vuln_analysis/utils/output_formatter.py
@@ -109,7 +109,7 @@ def _add_header(markdown_content, model_dict: AgentMorpheusOutput):
markdown_content[cve_id].append(f"# Vulnerability Analysis Report for {cve_id}")
markdown_content[cve_id].append(f"> **Container Analyzed:** `{input_image.name}:{input_image.tag}`\n\n")
# Only add SBOM info if it is a file location
- if input_image.sbom_info.type == "file":
+ if input_image.sbom_info and input_image.sbom_info.type == "file":
markdown_content[cve_id].append(f"> **SBOM Info:** `{input_image.sbom_info}`\n\n")
markdown_content[cve_id].append(f"> **Status:** {_get_expoiltability_text(output.justification.status)}")
diff --git a/src/vuln_analysis/utils/package_identifier.py b/src/vuln_analysis/utils/package_identifier.py
new file mode 100644
index 000000000..6c3b8c6a5
--- /dev/null
+++ b/src/vuln_analysis/utils/package_identifier.py
@@ -0,0 +1,333 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+from univers import versions
+
+from exploit_iq_commons.data_models.checker_status import EnumIdentifyResult, PackageCheckerStatus, PackageIdentifyResult
+from exploit_iq_commons.data_models.cve_intel import CveIntel
+from exploit_iq_commons.logging.loggers_factory import LoggingFactory
+from exploit_iq_commons.utils.string_utils import package_names_match
+from exploit_iq_commons.data_models.common import TargetPackage
+
+logger = LoggingFactory.get_agent_logger(__name__)
+
+_RPM_NEVRA_RE = re.compile(r"^(.+?)-(\d+):(.+?)-(.+)$")
+_DIST_TAG_RE = re.compile(r"(el\d+)")
+_ARCH_SUFFIXES = frozenset({"x86_64", "aarch64", "i686", "noarch", "s390x", "ppc64le", "armv7hl", "src"})
+
+
+def _strip_arch_suffix(release_arch: str) -> str:
+ """Remove .arch suffix if present, preserving dist tags like .el6_10."""
+ if "." in release_arch:
+ base, suffix = release_arch.rsplit(".", 1)
+ if suffix in _ARCH_SUFFIXES:
+ return base
+ return release_arch
+
+
+def _extract_dist_tag(release: str) -> str | None:
+ """Extract the RHEL dist-tag family (e.g. 'el8') from a release string."""
+ m = _DIST_TAG_RE.search(release)
+ return m.group(1) if m else None
+
+
+
+class PackageIdentifier:
+ """
+ Deterministic PackageIdentify phase: resolves package identity from intel,
+ cross-references the SBOM, checks version ranges, and locates RPMs in cache.
+ """
+
+ def __init__(
+ self,
+ target_package: TargetPackage,
+ ):
+ self._target_package = target_package
+
+
+ def identify(self, intel: CveIntel | None) -> tuple[PackageCheckerStatus, PackageIdentifyResult]:
+ """Run PackageIdentify for a single CVE."""
+
+ package_identify = PackageIdentifyResult()
+ status = PackageCheckerStatus.OK
+ if intel is None:
+ status = PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL
+ return status, package_identify
+
+ if not self._is_cve_for_target_package(intel):
+ status = PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH
+ package_identify.is_target_package_affected = EnumIdentifyResult.NO
+ return status, package_identify
+
+ package_identify.is_target_package_affected = self._is_target_package_affected(intel,package_identify)
+
+ package_identify.is_target_package_fixed = self._is_target_package_fixed(intel,package_identify)
+
+ if package_identify.is_target_package_affected == EnumIdentifyResult.NO or package_identify.is_target_package_fixed == EnumIdentifyResult.YES:
+ status = PackageCheckerStatus.PKG_IDENT_NOT_VUL
+
+ return status, package_identify
+
+ # ------------------------------------------------------------------
+ # Internal helpers
+ # ------------------------------------------------------------------
+
+ def _find_and_locate_rpm(self, intel: CveIntel) -> list[str]:
+ """Extract deduplicated RPM package names from RHSA package_state."""
+ packages = self._extract_rhsa(intel)
+ packages = [p for p in packages if "/" not in p.get("package_name", "/")]
+ seen: set[str] = set()
+ names: list[str] = []
+ for pkg in packages:
+ name = pkg.get("package_name")
+ if name and name not in seen:
+ seen.add(name)
+ names.append(name)
+ return names
+
+ def _is_cve_for_target_package(self, intel: CveIntel) -> bool:
+ """Validate CVE applies to target package via RHSA package_state.
+
+ Returns True if validation passes or cannot be performed.
+ Returns False only if RHSA explicitly lists packages and target is NOT among them.
+ """
+ if not intel.rhsa or not intel.rhsa.package_state:
+ return True # No RHSA data to validate against
+
+ target_name = self._target_package.name
+ for ps in intel.rhsa.package_state:
+ if ps.package_name and package_names_match(target_name, ps.package_name):
+ return True
+ return False # RHSA has packages but target not found
+
+ def _is_target_package_affected(
+ self, intel: CveIntel, package_identify: PackageIdentifyResult,
+ ) -> EnumIdentifyResult:
+ """Determine whether the target package is affected by this CVE.
+
+ Task 1: populate affected_rpm_list from RHSA package_state.
+ Task 2: match target package by name + version range.
+ Only returns NO with definitive proof; defaults to UNKNOWN otherwise.
+ """
+ rpm_names = self._find_and_locate_rpm(intel)
+ if not rpm_names:
+ return EnumIdentifyResult.UNKNOWN
+ package_identify.affected_rpm_list = rpm_names
+
+ target_name = self._target_package.name
+ name_matched = any(package_names_match(target_name, name) for name in rpm_names)
+
+ if name_matched:
+ if self._target_package.version:
+ in_range = self._version_in_affected_range(self._target_package.version, intel)
+ return EnumIdentifyResult.YES if in_range else EnumIdentifyResult.NO
+ return EnumIdentifyResult.YES
+
+ if self._target_package.version and intel.nvd and intel.nvd.configurations:
+ in_range = self._version_in_affected_range(self._target_package.version, intel)
+ return EnumIdentifyResult.UNKNOWN if in_range else EnumIdentifyResult.NO
+
+ return EnumIdentifyResult.UNKNOWN
+
+ def _is_target_package_fixed(self, intel: CveIntel, package_identify: PackageIdentifyResult) -> EnumIdentifyResult:
+ """Determine whether the target package is already running the fixed version.
+
+ Task 1: populate fixed_rpm_list from RHSA affected_release.
+ Task 2: compare target version+release against fix NVR.
+ """
+ fix_entries = self._extract_fixed_rpms(intel)
+ if not fix_entries:
+ return EnumIdentifyResult.UNKNOWN
+ package_identify.fixed_rpm_list = [e["nevra"] for e in fix_entries]
+
+ target_name = self._target_package.name
+ matching = [e for e in fix_entries if package_names_match(target_name, e["name"])]
+ if not matching:
+ return EnumIdentifyResult.UNKNOWN
+
+ # NOTE: Version comparison disabled to test Option A (rely entirely on Verify phase).
+ # fixed_rpm_list is still populated for reference/logging.
+ # To re-enable, uncomment the block below.
+ return EnumIdentifyResult.UNKNOWN
+
+ # --- DISABLED: Version comparison logic ---
+ # target_version = self._target_package.version
+ # target_release = self._target_package.release
+ #
+ # fix = matching[0]
+ # try:
+ # target_nvr = f"{target_version}-{target_release}"
+ # fix_nvr = f"{fix['version']}-{fix['release']}"
+ #
+ # target_dist = _extract_dist_tag(target_release) if target_release else None
+ # fix_dist = _extract_dist_tag(fix["release"])
+ # if target_dist and fix_dist and target_dist != fix_dist:
+ # logger.debug(
+ # "Cross-stream fix comparison skipped: target=%s fix=%s",
+ # target_dist, fix_dist,
+ # )
+ # return EnumIdentifyResult.UNKNOWN
+ #
+ # if versions.RpmVersion(target_nvr) >= versions.RpmVersion(fix_nvr):
+ # return EnumIdentifyResult.YES
+ # return EnumIdentifyResult.NO
+ # except Exception as exc:
+ # logger.debug("Fix version comparison failed: %s", exc)
+ # return EnumIdentifyResult.UNKNOWN
+
+
+ def _version_in_affected_range(self, target_version: str, intel: CveIntel) -> bool:
+ """Check if target_version falls within any NVD configuration affected range."""
+ if intel.nvd is None or not intel.nvd.configurations:
+ return True # no range data -> conservatively assume affected
+
+ target_name = self._target_package.name
+ matched_any_config = False
+ for config in intel.nvd.configurations:
+ if not package_names_match(target_name, config.package):
+ continue
+ matched_any_config = True
+ version_range = [
+ config.versionStartExcluding,
+ config.versionEndExcluding,
+ config.versionStartIncluding,
+ config.versionEndIncluding,
+ ]
+ if all(v is None for v in version_range):
+ continue
+ try:
+ if self._check_version_in_range(target_version, version_range):
+ return True
+ except Exception as exc:
+ logger.debug("Version comparison failed for %s: %s", target_version, exc)
+ return True # conservative: assume affected on error
+
+ if not matched_any_config:
+ return True # no NVD data for this package -> conservatively assume affected
+ return False
+
+ @staticmethod
+ def _check_version_in_range(version_to_check: str, version_range: list[str | None]) -> bool:
+ """Reuse the same logic as VulnerableDependencyChecker._check_version_in_range."""
+ ver_start_excl, ver_end_excl, ver_start_incl, ver_end_incl = version_range
+
+ all_versions = [v for v in version_range if v is not None] + [version_to_check]
+ has_el = any("el" in str(v) for v in all_versions)
+ has_deb = any("deb" in str(v) or "ubuntu" in str(v) for v in all_versions)
+
+ if has_el:
+ vfunc = versions.RpmVersion
+ elif has_deb:
+ vfunc = versions.DebianVersion
+ else:
+ vfunc = versions.GenericVersion
+
+ vtc = vfunc(version_to_check)
+ vsi = vfunc(ver_start_incl) if ver_start_incl else None
+ vse = vfunc(ver_start_excl) if ver_start_excl else None
+ vei = vfunc(ver_end_incl) if ver_end_incl else None
+ vee = vfunc(ver_end_excl) if ver_end_excl else None
+
+ if vsi:
+ if not (vsi <= vtc):
+ return False
+ elif vse:
+ if not (vse < vtc):
+ return False
+
+ if vei:
+ if not (vtc <= vei):
+ return False
+ elif vee:
+ if not (vtc < vee):
+ return False
+
+ return True
+
+ # ------------------------------------------------------------------
+ # Intel extraction
+ # ------------------------------------------------------------------
+
+ @staticmethod
+ def _extract_rhsa(intel: CveIntel) -> list[dict]:
+ if intel.rhsa is None or not intel.rhsa.package_state:
+ return []
+ packages = []
+ for ps in intel.rhsa.package_state:
+ if ps.package_name:
+ packages.append({"package_name": ps.package_name})
+ return packages
+
+ @staticmethod
+ def _extract_fixed_rpms(intel: CveIntel) -> list[dict]:
+ """Extract all fix entries from RHSA affected_release.
+
+ Returns a list of dicts with keys: nevra, name, version, release.
+ """
+ if intel.rhsa is None or not hasattr(intel.rhsa, "affected_release"):
+ return []
+ releases = intel.rhsa.affected_release
+ if not releases:
+ return []
+ results: list[dict] = []
+ seen: set[str] = set()
+ for entry in releases:
+ raw = entry.get("package") if isinstance(entry, dict) else getattr(entry, "package", None)
+ if not raw:
+ continue
+ m = _RPM_NEVRA_RE.match(raw)
+ if not m:
+ continue
+ name = m.group(1)
+ if "/" in name:
+ continue
+ if name in seen:
+ continue
+ seen.add(name)
+ version = m.group(3)
+ release_arch = m.group(4)
+ release = _strip_arch_suffix(release_arch)
+ results.append({"nevra": raw, "name": name, "version": version, "release": release})
+ return results
+
+ @staticmethod
+ def _extract_fix_info(intel: CveIntel | None, resolved_name: str) -> dict:
+ """Extract fix NVR from RHSA affected_release for the resolved package.
+
+ Returns a dict with keys nevra, name, version, release when a matching
+ fix entry is found; empty dict otherwise.
+ """
+ if intel is None or intel.rhsa is None or not hasattr(intel.rhsa, "affected_release"):
+ return {}
+ releases = intel.rhsa.affected_release
+ if not releases:
+ return {}
+ for entry in releases:
+ raw = entry.get("package") if isinstance(entry, dict) else getattr(entry, "package", None)
+ if not raw:
+ continue
+ m = _RPM_NEVRA_RE.match(raw)
+ if not m:
+ continue
+ name = m.group(1)
+ if name.lower() != resolved_name.lower():
+ continue
+ version = m.group(3)
+ release_arch = m.group(4)
+ release = _strip_arch_suffix(release_arch)
+ return {"nevra": raw, "name": name, "version": version, "release": release}
+ return {}
+