From cdf9dd579f29d281edc605153c53d44da5420f03 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 15 Apr 2026 17:18:09 +0300 Subject: [PATCH 01/46] rpm analyzier mile stone one --- Dockerfile | 1 + pyproject.toml | 1 + .../data_models/checker_status.py | 74 +++++ src/exploit_iq_commons/data_models/common.py | 22 +- src/exploit_iq_commons/data_models/info.py | 2 + src/exploit_iq_commons/data_models/input.py | 16 + .../utils/source_rpm_downloader.py | 3 +- .../configs/brew/internal-user-profile.yml | 24 ++ .../configs/config-http-openai.yml | 19 ++ .../functions/cve_checker_segmentation.py | 122 ++++++++ .../functions/cve_package_code_agent.py | 62 ++++ .../functions/cve_source_acquisition.py | 132 +++++++++ src/vuln_analysis/register.py | 127 +++++++- src/vuln_analysis/tools/brew_downloader.py | 266 +++++++++++++++++ src/vuln_analysis/utils/full_text_search.py | 4 +- src/vuln_analysis/utils/package_identifier.py | 280 ++++++++++++++++++ 16 files changed, 1148 insertions(+), 7 deletions(-) create mode 100644 src/exploit_iq_commons/data_models/checker_status.py create mode 100644 src/vuln_analysis/configs/brew/internal-user-profile.yml create mode 100644 src/vuln_analysis/functions/cve_checker_segmentation.py create mode 100644 src/vuln_analysis/functions/cve_package_code_agent.py create mode 100644 src/vuln_analysis/functions/cve_source_acquisition.py create mode 100644 src/vuln_analysis/tools/brew_downloader.py create mode 100644 src/vuln_analysis/utils/package_identifier.py diff --git a/Dockerfile b/Dockerfile index fa6e6d652..9dd3cdb83 100755 --- a/Dockerfile +++ b/Dockerfile @@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y \ libarchive-tools \ xz-utils \ libatomic1 \ + libkrb5-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && update-ca-certificates diff --git a/pyproject.toml b/pyproject.toml index 0d4d36bd5..557941873 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "litellm<=1.75.8", "csaf-tool==0.3.2", "jsonschema>=4.0.0,<5.0.0", + "koji", ] requires-python = ">=3.11,<3.13" description = "NVIDIA AI Blueprint: Vulnerability Analysis for Container Security" diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py new file mode 100644 index 000000000..d419c6de1 --- /dev/null +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum +from enum import IntEnum +from pathlib import Path + +from pydantic import BaseModel, Field + + +class PackageCheckerStatus(IntEnum): + """Per-CVE status codes produced by the PackageIdentify phase.""" + OK = 0 + ERROR_PKG_IDENT_NO_INTEL = 1 + PKG_IDENT_NOT_VUL = 2 + ERROR_FAILED_TO_DOWNLOAD_SRPM = 3 + + +PACKAGE_CHECKER_STATUS_DESCRIPTIONS: dict[PackageCheckerStatus, str] = { + PackageCheckerStatus.OK: + "Package identified and in affected range -- continue investigation", + PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL: + "No Intel found for the package", + PackageCheckerStatus.PKG_IDENT_NOT_VUL: + "Identification state concluded from intel that target package is not vulnerable", + PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM: + "Failed to download the patched SRPM", +} + +class EnumIdentifyResult(str, Enum): + """Result of the PackageIdentify phase for a single CVE.""" + YES = "yes" + NO = "no" + UNKNOWN = "unknown" + +class PackageIdentifyResult(BaseModel): + """Result of the PackageIdentify phase for a single CVE.""" + affected_rpm_list: list[str] = [] + fixed_rpm_list: list[str] = [] + + is_target_package_affected: EnumIdentifyResult = EnumIdentifyResult.UNKNOWN + is_target_package_fixed: EnumIdentifyResult = EnumIdentifyResult.UNKNOWN + + + +class AcquiredArtifacts(BaseModel): + """Resolved file locations populated by source_acquisition, consumed by downstream checker nodes.""" + srpm_path: Path | None = None + source_dir: Path | None = None + build_log_path: Path | None = None + binary_rpm_path: Path | None = None + patch_source_dir: Path | None = None + patch_diff_path: Path | None = None + + +class PackageCheckerContext(BaseModel): + """Consolidates all checker-specific state on AgentMorpheusInfo.""" + status: PackageCheckerStatus | None = None + source_key: str | None = None + artifacts: AcquiredArtifacts = Field(default_factory=AcquiredArtifacts) + code_index_path: str | None = None + code_vdb_path: str | None = None + identify_result: PackageIdentifyResult = Field(default_factory=PackageIdentifyResult) diff --git a/src/exploit_iq_commons/data_models/common.py b/src/exploit_iq_commons/data_models/common.py index 077a98fa9..248e0469c 100644 --- a/src/exploit_iq_commons/data_models/common.py +++ b/src/exploit_iq_commons/data_models/common.py @@ -28,6 +28,17 @@ class AnalysisType(str, Enum): IMAGE = "image" SOURCE = "source" + +class PipelineMode(str, Enum): + """ + Controls which investigation path the pipeline takes after process_sbom. + Orthogonal to AnalysisType (input format) -- any combination is valid. + """ + FULL_PIPELINE = "full_pipeline" + PACKAGE_CHECKER = "package_checker" + + + class HashableModel(BaseModel): """ Subclass of a Pydantic BaseModel that is hashable. Use in objects that need to be hashed for caching purposes. @@ -50,7 +61,16 @@ def __ne__(self, other): def __gt__(self, other): return self.__hash__() > other.__hash__() - +class TargetPackage(HashableModel): + """ + A package to investigate. + """ + name: str + version: str | None = None + release: str | None = None # e.g. "1.el8_2.3" (needed for Brew NVR lookup) + ecosystem: str | None = None + arch: str = "x86_64" # e.g. "x86_64", "aarch64", "s390x", "noarch" + class TypedBaseModel(BaseModel, typing.Generic[_LT]): """ Subclass of Pydantic BaseModel that allows for specifying the object type. Use in Pydantic discriminated unions. diff --git a/src/exploit_iq_commons/data_models/info.py b/src/exploit_iq_commons/data_models/info.py index a01f1dda7..4f7bd1ef1 100644 --- a/src/exploit_iq_commons/data_models/info.py +++ b/src/exploit_iq_commons/data_models/info.py @@ -15,6 +15,7 @@ from pydantic import BaseModel +from exploit_iq_commons.data_models.checker_status import PackageCheckerContext from exploit_iq_commons.data_models.cve_intel import CveIntel from exploit_iq_commons.data_models.dependencies import VulnerableDependencies @@ -62,3 +63,4 @@ class SBOMInfo(BaseModel): intel: list[CveIntel] | None = None sbom: SBOMInfo | None = None vulnerable_dependencies: list[VulnerableDependencies] | None = None + checker_context: PackageCheckerContext | None = None diff --git a/src/exploit_iq_commons/data_models/input.py b/src/exploit_iq_commons/data_models/input.py index 897c915d1..72c5382c3 100644 --- a/src/exploit_iq_commons/data_models/input.py +++ b/src/exploit_iq_commons/data_models/input.py @@ -25,12 +25,14 @@ from pydantic import Field from pydantic import Tag from pydantic import field_validator +from pydantic import model_validator from exploit_iq_commons.utils.string_utils import is_valid_cve_id from exploit_iq_commons.utils.string_utils import is_valid_ghsa_id from exploit_iq_commons.utils.dep_tree import Ecosystem from exploit_iq_commons.data_models.common import AnalysisType from exploit_iq_commons.data_models.common import HashableModel +from exploit_iq_commons.data_models.common import PipelineMode , TargetPackage from exploit_iq_commons.data_models.common import TypedBaseModel from exploit_iq_commons.data_models.info import AgentMorpheusInfo from exploit_iq_commons.data_models.info import SBOMPackage @@ -168,9 +170,23 @@ class ImageInfoInput(HashableModel): - "source": Analysis of source code and commitId without SBOM data """ + pipeline_mode: PipelineMode = PipelineMode.FULL_PIPELINE + """ + Controls which investigation path the pipeline takes after process_sbom: + - "full_pipeline": Full transitive analysis (check_vuln_deps -> llm_engine) + - "package_checker": Focused package vulnerability checker (package_checker -> checker_output) + """ + target_package: TargetPackage | None = None + source_info: list[SourceDocumentsInfo] sbom_info: SBOMInfoInput | None = None + @model_validator(mode="after") + def validate_target_package(self) -> "ImageInfoInput": + if self.pipeline_mode == PipelineMode.PACKAGE_CHECKER and self.target_package is None: + raise ValueError("target_package is required when pipeline_mode is PACKAGE_CHECKER") + return self + @field_validator('source_info', mode='after') @classmethod def check_conflicting_refs(cls, source_info: list[SourceDocumentsInfo]) -> list[SourceDocumentsInfo]: diff --git a/src/exploit_iq_commons/utils/source_rpm_downloader.py b/src/exploit_iq_commons/utils/source_rpm_downloader.py index 09dc81078..d0549b9d4 100644 --- a/src/exploit_iq_commons/utils/source_rpm_downloader.py +++ b/src/exploit_iq_commons/utils/source_rpm_downloader.py @@ -462,7 +462,8 @@ def parse_sbom(self): logger.info(f"Found {len(packages)} packages in SBOM, platform: {platform_version}") return packages, platform_version - def extract_src_rpm(self, rpm_path: Path, extract_dir: Path): + @staticmethod + def extract_src_rpm(rpm_path: Path, extract_dir: Path): #logger.info(f" Extracting {rpm_path.name} to {extract_dir} ...") extract_dir.mkdir(parents=True, exist_ok=True) try: diff --git a/src/vuln_analysis/configs/brew/internal-user-profile.yml b/src/vuln_analysis/configs/brew/internal-user-profile.yml new file mode 100644 index 000000000..52b4b993d --- /dev/null +++ b/src/vuln_analysis/configs/brew/internal-user-profile.yml @@ -0,0 +1,24 @@ +# Internal User Profile — Red Hat VPN-connected environment +# +# Assumptions: +# - User is on the Red Hat VPN (can reach *.redhat.com internal hosts) +# - Build logs are available via Brew task output + +profile: + name: redhat-internal + +hosts: + rpm: + brew_hub: https://brewhub.engineering.redhat.com/brewhub + brew_download: https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot + git: + dist_git: https://pkgs.devel.redhat.com/cgit + +default_arch: x86_64 + +ssl_verify: false + +build_log: + auto_fetch: true + +download_binary_rpm: false diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index a67e18c1b..f3314e087 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -148,6 +148,21 @@ functions: generate_intel_score: true intel_low_score: 51 insist_analysis: false + cve_source_acquisition: + _type: cve_source_acquisition + base_git_dir: .cache/am_cache/git + base_pickle_dir: .cache/am_cache/pickle + base_rpm_dir: .cache/am_cache/rpms + cve_checker_segmentation: + _type: cve_checker_segmentation + base_checker_dir: .cache/am_cache/checker + base_code_index_dir: .cache/am_cache/code_index + cve_package_checker_probe: + _type: cve_package_checker_probe + probe_log_path: .cache/am_cache/checker/probe_results.jsonl + cve_package_code_agent: + _type: cve_package_code_agent + base_checker_dir: .cache/am_cache/checker health_check: _type: health_check @@ -239,6 +254,10 @@ workflow: cve_summarize_name: cve_summarize cve_justify_name: cve_justify cve_output_config_name: cve_http_output + cve_source_acquisition_name: cve_source_acquisition + cve_checker_segmentation_name: cve_checker_segmentation + cve_package_checker_probe_name: cve_package_checker_probe + cve_package_code_agent_name: cve_package_code_agent eval: general: diff --git a/src/vuln_analysis/functions/cve_checker_segmentation.py b/src/vuln_analysis/functions/cve_checker_segmentation.py new file mode 100644 index 000000000..fb4aa4c52 --- /dev/null +++ b/src/vuln_analysis/functions/cve_checker_segmentation.py @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +from pathlib import Path + +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory + +logger = LoggingFactory.get_agent_logger(__name__) + +_BUILD_FILE_NAMES = {"Makefile", "GNUmakefile", "configure"} + + +class CVECheckerSegmentationConfig(FunctionBaseConfig, name="cve_checker_segmentation"): + """ + Builds a scoped Tantivy lexical code index from extracted RPM source files. + Reads source directories populated by source_acquisition, indexes them, + and sets info.vdb.code_index_path for downstream checker nodes. + """ + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts.", + ) + base_code_index_dir: str = Field( + default=".cache/am_cache/code_index", + description="Base directory for Tantivy code index storage.", + ) + include_extensions: list[str] = Field( + default=[ + ".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", + ".ts", ".spec", ".patch", ".conf", ".cfg", ".sh", ".m4", + ".ac", ".am", ".in", ".txt", ".md", ".rst", + ], + description="File extensions to include when building the code index.", + ) + + +@register_function(config_type=CVECheckerSegmentationConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def cve_checker_segmentation(config: CVECheckerSegmentationConfig, builder: Builder): + from exploit_iq_commons.data_models.info import AgentMorpheusInfo + from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from vuln_analysis.utils.full_text_search import FullTextSearch + + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + if not message.info.checker_context or not message.info.checker_context.source_key: + logger.info("checker_segmentation: no checker_context.source_keys, skipping indexing") + return message + + source_key = message.info.checker_context.source_key + if not source_key: + logger.info("checker_segmentation: no source_key, skipping indexing") + return message + + index_path = FullTextSearch.get_index_directory(config.base_code_index_dir, source_key) + + if index_path.exists(): + logger.info("checker_segmentation: cache hit on code index: %s", index_path) + else: + start = time.time() + fts = FullTextSearch(cache_path=str(index_path)) + + source_dir = Path(config.base_checker_dir) / source_key / "source" + if not source_dir.is_dir(): + logger.warning("checker_segmentation: source dir missing: %s", source_dir) + return message + + logger.info("checker_segmentation: indexing source dir %s", source_dir) + fts.add_documents_from_code_path( + str(source_dir), + config.include_extensions, + use_langparser=False, + splitter=True, + no_extension=_BUILD_FILE_NAMES, + ) + + elapsed = time.time() - start + logger.info("checker_segmentation: indexing completed in %.2fs at %s", elapsed, index_path) + + message.info.vdb = AgentMorpheusInfo.VdbPaths(code_index_path=str(index_path)) + return message + + yield FunctionInfo.from_fn( + _arun, + description="Build scoped Tantivy code index from extracted checker sources", + ) + + +def _index_build_files(fts, source_dir: Path) -> None: + """Walk source_dir for extensionless build files and add them to the index.""" + docs: list[tuple[str, str]] = [] + for root, _, files in os.walk(source_dir): + for fname in files: + if fname in _BUILD_FILE_NAMES: + fpath = os.path.join(root, fname) + try: + with open(fpath, "r", encoding="utf-8", errors="replace") as f: + docs.append((fpath, f.read())) + except Exception as exc: + logger.warning("checker_segmentation: error reading %s: %s", fpath, exc) + if docs: + fts.add_documents(docs) + logger.info("checker_segmentation: indexed %d build files from %s", len(docs), source_dir) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py new file mode 100644 index 000000000..4f5e3817f --- /dev/null +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory + +logger = LoggingFactory.get_agent_logger(__name__) + + +class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent"): + """ + Level 1 Package Code Agent. Investigates each CVE using extracted source + code and the scoped Tantivy code index built by checker_segmentation. + + Phases: Identify -> Locate -> Verify (see HLD-standalone-checker.md §5). + """ + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts.", + ) + + +@register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder): + from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from vuln_analysis.data_models.output import AgentMorpheusOutput + from vuln_analysis.data_models.output import OutputPayload + + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + logger.info("package_code_agent: starting L1 investigation") + + # TODO: implement L1 investigation phases (Identify, Locate, Verify) + + logger.info("package_code_agent: finished (stub -- no investigation logic yet)") + return AgentMorpheusOutput( + input=message.input, + info=message.info, + output=OutputPayload(analysis=[], vex=None), + ) + + yield FunctionInfo.from_fn( + _arun, + description="Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index", + ) diff --git a/src/vuln_analysis/functions/cve_source_acquisition.py b/src/vuln_analysis/functions/cve_source_acquisition.py new file mode 100644 index 000000000..60d193347 --- /dev/null +++ b/src/vuln_analysis/functions/cve_source_acquisition.py @@ -0,0 +1,132 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +import shutil +from pathlib import Path +from pathlib import PurePath + +from exploit_iq_commons.data_models.checker_status import PackageCheckerContext, PackageCheckerStatus, PackageIdentifyResult +from exploit_iq_commons.data_models.checker_status import AcquiredArtifacts +from exploit_iq_commons.logging.loggers_factory import LoggingFactory + +from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager, SourceRPMDownloader +from vuln_analysis.utils.package_identifier import PackageIdentifier +from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType , BrewDownloaderError + +logger = LoggingFactory.get_agent_logger(__name__) + + +class CVESourceAcquisitionConfig(FunctionBaseConfig, name="cve_source_acquisition"): + """ + Downloads source containers, extracts layers, and locates package sources + by purl and ecosystem. Populates the pipeline state with source paths for + downstream checker segmentation and investigation nodes. + """ + base_git_dir: str = Field( + default=".cache/am_cache/git", + description="The directory for storing pulled git repositories used for code analysis.", + ) + base_pickle_dir: str = Field( + default=".cache/am_cache/pickle", + description="The directory used for storing pickled document cache files.", + ) + base_rpm_dir: str = Field( + default=".cache/am_cache/rpms", + description="The directory used for storing rpm files.", + ) + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts (extracted sources, diffs, results).", + ) + + +@register_function(config_type=CVESourceAcquisitionConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def cve_source_acquisition(config: CVESourceAcquisitionConfig, builder: Builder): + from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + logger.info("source_acquisition: starting source code acquisition") + + rpm_manager = RPMDependencyManager.get_instance() + rpm_manager.set_rpm_cache_dir(config.base_rpm_dir) + message.info.checker_context = PackageCheckerContext() + intel_list = message.info.intel or [] + vulns = message.input.scan.vulns + + intel_by_vuln = {i.vuln_id: i for i in intel_list} + target_package = message.input.image.target_package + + identifier = PackageIdentifier( + target_package=target_package, + ) + + status = PackageCheckerStatus.OK + per_vuln_results: dict[str, PackageIdentifyResult] = {} + for vuln_info in vulns: + intel = intel_by_vuln.get(vuln_info.vuln_id) + status,result = identifier.identify( intel) + message.info.checker_context.identify_result = result + break + + + message.info.checker_context.status = status + if status != PackageCheckerStatus.OK: + return message + + # create identifier key + str_identifier_key = f"{target_package.name}-{target_package.version}-{target_package.release}" + identifier_key = hashlib.sha256(str_identifier_key.encode()).hexdigest()[:16] + message.info.checker_context.source_key = identifier_key + + target_dir = Path(config.base_checker_dir) / identifier_key + + if target_dir.exists() and any(target_dir.iterdir()): + logger.info("Source cache hit for %s: %s", identifier_key, target_dir) + #build artifacts from target_dir + artifacts = AcquiredArtifacts() + artifacts.srpm_path = target_dir / "source" + artifacts.build_log_path = target_dir / "logs" + artifacts.binary_rpm_path = target_dir / "binaries" + message.info.checker_context.artifacts = artifacts + return message + + target_dir.mkdir(parents=True, exist_ok=True) + try: + brew_downloader = BrewDownloader(BrewProfileType.INTERNAL, config.base_rpm_dir, str(target_dir)) + brew_downloader.connect() + artifacts = brew_downloader.download_target_artifacts(target_package.name, target_package.version, target_package.release,target_package.arch) + message.info.checker_context.artifacts = artifacts + except BrewDownloaderError as e: + logger.error("Failed to download patched SRPM: %s", e) + message.info.checker_context.status = PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM + return message + + + + return message + + yield FunctionInfo.from_fn( + _arun, + input_schema=AgentMorpheusEngineInput, + description="Downloads source containers and locates package sources by purl and ecosystem.", + ) diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index dc847a87f..b9eff0d15 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -23,9 +23,12 @@ from aiq.data_models.function import FunctionBaseConfig from pydantic import Field +from exploit_iq_commons.data_models.common import PipelineMode from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput from exploit_iq_commons.data_models.input import AgentMorpheusInput +from exploit_iq_commons.data_models.info import AgentMorpheusInfo from vuln_analysis.data_models.output import AgentMorpheusOutput +from vuln_analysis.data_models.output import OutputPayload from vuln_analysis.data_models.state import AgentMorpheusEngineState # pylint: disable=unused-import from vuln_analysis.functions import cve_agent @@ -36,6 +39,10 @@ from vuln_analysis.functions import cve_generate_vdbs from vuln_analysis.functions import cve_http_output from vuln_analysis.functions import cve_justify +from vuln_analysis.functions import cve_package_checker_probe +from vuln_analysis.functions import cve_package_code_agent +from vuln_analysis.functions import cve_checker_segmentation +from vuln_analysis.functions import cve_source_acquisition from vuln_analysis.functions import cve_process_sbom from vuln_analysis.functions import cve_summarize from vuln_analysis.functions import cve_generate_cvss @@ -75,6 +82,21 @@ class CVEAgentWorkflowConfig(FunctionBaseConfig, name="cve_agent"): description="Function to output workflow results " "(e.g. cve_file_output, cve_http_output). " " If None, only prints to console") + cve_source_acquisition_name: str | None = Field( + default=None, + description="Function name for source acquisition (downloads source containers, locates package sources)", + ) + cve_checker_segmentation_name: str | None = Field( + default=None, + description="Function name for scoped code indexing of extracted checker sources (Tantivy only)", + ) + cve_package_checker_probe_name: str | None = Field( + default=None, + description="Function name for the package checker probe (logs package identification data per CVE)") + cve_package_code_agent_name: str | None = Field( + default=None, + description="Function name for the Level 1 Package Code Agent (source-level CVE investigation)", + ) description: str = Field(default="Vulnerability analysis for container security workflow", description="Workflow function description") @@ -99,6 +121,22 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder): cve_generate_vex_fn = builder.get_function(name=config.cve_generate_vex_name) cve_generate_cvss_fn = builder.get_function(name=config.cve_generate_cvss_name) cve_output_fn = builder.get_function(name=config.cve_output_config_name) if config.cve_output_config_name else None + cve_package_checker_probe_fn = ( + builder.get_function(name=config.cve_package_checker_probe_name) + if config.cve_package_checker_probe_name else None + ) + cve_source_acquisition_fn = ( + builder.get_function(name=config.cve_source_acquisition_name) + if config.cve_source_acquisition_name else None + ) + cve_checker_segmentation_fn = ( + builder.get_function(name=config.cve_checker_segmentation_name) + if config.cve_checker_segmentation_name else None + ) + cve_package_code_agent_fn = ( + builder.get_function(name=config.cve_package_code_agent_name) + if config.cve_package_code_agent_name else None + ) # Define langgraph node functions @catch_pipeline_errors_async @@ -183,6 +221,18 @@ async def output_results_node(state: AgentMorpheusOutput) -> AgentMorpheusOutput """Outputs results using configured output function""" return await cve_output_fn.ainvoke(state.model_dump()) if cve_output_fn else state + + # --- Package checker path nodes --- + + @catch_pipeline_errors_async + async def checker_init_state_node(state: AgentMorpheusInput) -> AgentMorpheusEngineInput: + """Bridges AgentMorpheusInput -> AgentMorpheusEngineInput with empty info (skips VDB generation).""" + return AgentMorpheusEngineInput(input=state, info=AgentMorpheusInfo()) + + @catch_pipeline_errors_async + async def checker_fetch_intel_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Fetch intel for CVE input (package checker path). Reuses the same fetch_intel function.""" + return await cve_fetch_intel_fn.ainvoke(state.model_dump()) async def check_vdbs_success(state: AgentMorpheusInput) -> str: """Checks if the VDBs were successfully generated""" @@ -196,7 +246,50 @@ async def failure_node(state: AgentMorpheusInput) -> AgentMorpheusOutput: from exploit_iq_commons.data_models.info import AgentMorpheusInfo from vuln_analysis.data_models.output import OutputPayload return AgentMorpheusOutput(input=state, info=AgentMorpheusInfo(), output=OutputPayload(analysis=[], vex=None)) - # define langgraph + + + + @catch_pipeline_errors_async + async def source_acquisition_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Acquires source code for the target package (source containers, git fallback).""" + if cve_source_acquisition_fn: + state = await cve_source_acquisition_fn.ainvoke(state.model_dump()) + else: + logger.warning("Source acquisition function not configured, passing state through") + + if state.info.checker_context and state.info.checker_context.status is not None: + logger.info( + "PackageIdentify aggregate status: %s", + state.info.checker_context.status.name, + ) + return state + + @catch_pipeline_errors_async + async def checker_segmentation_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Builds scoped Tantivy code index from extracted checker sources.""" + if cve_checker_segmentation_fn: + state = await cve_checker_segmentation_fn.ainvoke(state.model_dump()) + else: + logger.warning("Checker segmentation not configured, skipping indexing") + return state + + @catch_pipeline_errors_async + async def code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + """Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index.""" + if cve_package_code_agent_fn: + return await cve_package_code_agent_fn.ainvoke(state.model_dump()) + logger.warning("Package code agent function not configured, producing empty output") + return AgentMorpheusOutput( + input=state.input, + info=state.info, + output=OutputPayload(analysis=[], vex=None), + ) + + def route_after_add_start_time(state: AgentMorpheusInput): + """Route to full pipeline or package checker based on pipeline_mode.""" + if state.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER: + return "checker_init_state" + return "generate_vdbs" # build llm engine subgraph subgraph_builder = StateGraph(AgentMorpheusEngineState) @@ -240,8 +333,25 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph_builder.add_node("add_completed_time", add_completed_time_node) graph_builder.add_node("output_results", output_results_node) graph_builder.add_node("failure", failure_node) +# -- Package checker nodes -- + graph_builder.add_node("checker_init_state", checker_init_state_node) + graph_builder.add_node("checker_fetch_intel", checker_fetch_intel_node) + + graph_builder.add_node("source_acquisition", source_acquisition_node) + graph_builder.add_node("checker_segmentation", checker_segmentation_node) + graph_builder.add_node("code_agent", code_agent_node) + graph_builder.add_edge(START, "add_start_time") - graph_builder.add_edge("add_start_time", "generate_vdbs") + # Conditional: route to full pipeline or package checker after add_start_time + graph_builder.add_conditional_edges( + "add_start_time", + route_after_add_start_time, + { + "generate_vdbs": "generate_vdbs", + "checker_init_state": "checker_init_state", + }, + ) + graph_builder.add_conditional_edges("generate_vdbs", check_vdbs_success,{"fetch_intel": "fetch_intel", "failure": "failure"}) graph_builder.add_edge("failure", "add_completed_time") #graph_builder.add_edge("generate_vdbs", "fetch_intel") @@ -250,10 +360,21 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph_builder.add_edge("process_sbom", "check_vuln_deps") graph_builder.add_edge("check_vuln_deps", "llm_engine") graph_builder.add_edge("llm_engine", "add_completed_time") + + # Package checker path + graph_builder.add_edge("checker_init_state", "checker_fetch_intel") + graph_builder.add_edge("checker_fetch_intel", "source_acquisition") + + graph_builder.add_edge("source_acquisition", "checker_segmentation") + graph_builder.add_edge("checker_segmentation", "code_agent") + graph_builder.add_edge("code_agent", "add_completed_time") + + # Shared tail graph_builder.add_edge("add_completed_time", "output_results") graph_builder.add_edge("output_results", END) graph = graph_builder.compile() - + #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png") + def convert_str_to_agent_morpheus_input(input: str) -> AgentMorpheusInput: logger.debug("Converting input to AgentMorpheusInput: %s", input) try: diff --git a/src/vuln_analysis/tools/brew_downloader.py b/src/vuln_analysis/tools/brew_downloader.py new file mode 100644 index 000000000..987fca0f6 --- /dev/null +++ b/src/vuln_analysis/tools/brew_downloader.py @@ -0,0 +1,266 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Production Brew Downloader -- fetch SRPMs, build logs, and binary RPMs from Brew (Koji). + +Evolved from the PoC at docs/package_analyzer/standalone_checker/brew/brew_downloader.py. +Storage is split: SRPMs go to the shared rpms/ cache, everything else to checker-specific dirs. +""" + +from __future__ import annotations + +import shutil +from enum import Enum +from pathlib import Path + +import koji +import requests +import yaml + +from exploit_iq_commons.data_models.checker_status import AcquiredArtifacts +from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader + +logger = LoggingFactory.get_agent_logger(__name__) + +_CONFIGS_DIR = Path(__file__).resolve().parent.parent / "configs" / "brew" +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + +class BrewDownloaderError(Exception): + """Base for all Brew downloader errors.""" + + +class BrewConnectionError(BrewDownloaderError): + """Raised when the Brew hub is unreachable or session creation fails.""" + + +class BrewBuildNotFoundError(BrewDownloaderError): + """Raised when getBuild returns None for the requested NVR.""" + + +class BrewDownloadError(BrewDownloaderError): + """Raised when an HTTP download of an artifact fails.""" + + +class BrewProfileNotImplementedError(BrewDownloaderError): + """Raised when a profile type is not yet implemented.""" + + +# --------------------------------------------------------------------------- +# Profile types +# --------------------------------------------------------------------------- + +class BrewProfileType(Enum): + INTERNAL = "internal" + EXTERNAL = "external" + + +_PROFILE_PATHS: dict[BrewProfileType, Path] = { + BrewProfileType.INTERNAL: _CONFIGS_DIR / "internal-user-profile.yml", +} + + +# --------------------------------------------------------------------------- +# BrewDownloader +# --------------------------------------------------------------------------- + +class BrewDownloader: + """Downloads RPM artifacts and build logs from Brew (Koji) using a profile YAML. + + Storage destinations: + - SRPMs -> ``rpm_cache_dir/{NVR}.src.rpm`` (shared with SourceRPMDownloader) + - Build logs -> ``checker_dir/logs/{NVR}-{arch}-build.log`` + - Binary RPMs -> ``checker_dir/binaries/{NVR}/{NVRA}.rpm`` + """ + + def __init__(self, profile_type: BrewProfileType, rpm_cache_dir: str, checker_dir: str) -> None: + if profile_type == BrewProfileType.EXTERNAL: + raise BrewProfileNotImplementedError( + f"Profile type '{profile_type.value}' is not yet implemented" + ) + profile_path = _PROFILE_PATHS[profile_type] + self._profile = self._load_profile(str(profile_path)) + + hosts = self._profile["hosts"]["rpm"] + self._brew_hub: str = hosts["brew_hub"] + self._brew_download: str = hosts["brew_download"] + self._default_arch: str = self._profile.get("default_arch", "x86_64") + self._download_binary_rpm_enabled: bool = self._profile.get("download_binary_rpm", False) + self._auto_fetch_build_log: bool = self._profile.get("build_log", {}).get("auto_fetch", True) + self._ssl_verify: bool = self._profile.get("ssl_verify", False) + + self._rpm_cache_dir = Path(rpm_cache_dir) + self._rpm_cache_dir.mkdir(parents=True, exist_ok=True) + + self._checker_dir = Path(checker_dir) + self._checker_dir.mkdir(parents=True, exist_ok=True) + + self._session: koji.ClientSession | None = None + self._pathinfo: koji.PathInfo | None = None + self._http = requests.Session() + + # -- properties -------------------------------------------------------- + + @property + def download_binary_rpm_enabled(self) -> bool: + return self._download_binary_rpm_enabled + + @property + def default_arch(self) -> str: + return self._default_arch + + @property + def auto_fetch_build_log(self) -> bool: + return self._auto_fetch_build_log + + # -- setup ------------------------------------------------------------- + + @staticmethod + def _load_profile(path: str) -> dict: + with open(path, encoding="utf-8") as fh: + return yaml.safe_load(fh) + + def connect(self) -> None: + """Create a Koji client session and PathInfo helper from the profile.""" + logger.info("Connecting to Brew hub: %s", self._brew_hub) + try: + opts: dict = {} + if not self._ssl_verify: + opts["no_ssl_verify"] = True + self._session = koji.ClientSession(self._brew_hub, opts=opts) + self._pathinfo = koji.PathInfo(topdir=self._brew_download) + self._http.verify = self._ssl_verify + except Exception as exc: + raise BrewConnectionError( + f"Failed to connect to Brew hub {self._brew_hub}: {exc}" + ) from exc + + # -- query ------------------------------------------------------------- + + def search_build(self, name: str, version: str, release: str) -> dict | None: + """Look up a build by NVR. Returns the build-info dict or ``None``.""" + nvr = f"{name}-{version}-{release}" + logger.info("Searching for build: %s", nvr) + build = self._session.getBuild(nvr) + if build is None: + logger.warning("Build not found: %s", nvr) + return None + logger.info( + "Found build %s (id=%s, volume=%s, task=%s)", + build["nvr"], build["id"], build.get("volume_name"), build.get("task_id"), + ) + return build + + # -- downloads --------------------------------------------------------- + + def _download_file(self, url: str, dest: Path) -> Path: + """Stream-download *url* to *dest*. Returns the destination path.""" + logger.info("Downloading %s -> %s", url, dest) + dest.parent.mkdir(parents=True, exist_ok=True) + try: + resp = self._http.get(url, stream=True, timeout=120) + resp.raise_for_status() + except requests.RequestException as exc: + raise BrewDownloadError(f"Failed to download {url}: {exc}") from exc + with open(dest, "wb") as fh: + for chunk in resp.iter_content(chunk_size=1 << 18): # 256 KB + fh.write(chunk) + logger.info("Saved %s (%d bytes)", dest.name, dest.stat().st_size) + return dest + + def download_srpm(self, build: dict) -> Path: + """Download the .src.rpm for *build* into the shared RPM cache. + + Skips the download when the destination file already exists and is non-empty. + """ + rpms = self._session.listRPMs(buildID=build["id"], arches="src") + if not rpms: + raise BrewDownloadError(f"No source RPM found for build {build['nvr']}") + + rpm_info = rpms[0] + dest = self._rpm_cache_dir / f"{rpm_info['nvr']}.src.rpm" + + if dest.exists() and dest.stat().st_size > 0: + logger.info("SRPM cache hit: %s", dest) + return dest + + url = f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}" + return self._download_file(url, dest) + + def download_build_log(self, build: dict, arch: str | None = None) -> Path: + """Download ``build.log`` for the given arch into ``checker_dir/logs/``.""" + arch = arch or self._default_arch + url = f"{self._pathinfo.build(build)}/data/logs/{arch}/build.log" + dest = self._checker_dir / "logs" / f"{build['nvr']}-{arch}-build.log" + return self._download_file(url, dest) + + def download_binary_rpm(self, build: dict, arch: str | None = None) -> Path | None: + """Download all binary RPMs for the given arch (excludes debuginfo/debugsource). + + Saves to ``checker_dir/binaries/{NVR}/``. Returns an empty list when no + matching RPMs are found. + """ + arch = arch or self._default_arch + rpms = self._session.listRPMs(buildID=build["id"], arches=arch) + if not rpms: + logger.warning("No %s RPMs found for build %s", arch, build["nvr"]) + return None + + nvr = build["nvr"] + build_dir = self._checker_dir / "binaries" / nvr + + downloaded: list[Path] = [] + for rpm_info in rpms: + rpm_name: str = rpm_info["name"] + if rpm_name.endswith(("-debuginfo", "-debugsource")): + continue + url = f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}" + nvra = f"{rpm_info['name']}-{rpm_info['version']}-{rpm_info['release']}.{rpm_info['arch']}" + dest = build_dir / f"{nvra}.rpm" + self._download_file(url, dest) + downloaded.append(dest) + return build_dir + + def download_patched_srpm(self, name: str, version: str, release: str) -> Path | None: + """Download the SRPM for a patched version (from CVE fix info). + + Returns the cached SRPM path, or ``None`` if the patched build is not + found in Brew. + """ + build = self.search_build(name, version, release) + if build is None: + return None + return self.download_srpm(build) + + def download_target_artifacts(self, name: str, version: str, release: str, arch: str ) -> AcquiredArtifacts | None: + artifacts = AcquiredArtifacts() + build = self.search_build(name, version, release) + if build is None: + raise BrewBuildNotFoundError(f"Build not found for {name}-{version}-{release}") + + cache_srpm_path = self.download_srpm(build) + + srpm_target_path = self._checker_dir / "source" + srpm_target_path.mkdir(parents=True, exist_ok=True) + shutil.copy2(cache_srpm_path, srpm_target_path) + SourceRPMDownloader.extract_src_rpm(cache_srpm_path, srpm_target_path) + artifacts.srpm_path = srpm_target_path + + artifacts.build_log_path = self.download_build_log(build, arch) + if self._download_binary_rpm_enabled: + artifacts.binary_rpm_path = self.download_binary_rpm(build, arch) + return artifacts diff --git a/src/vuln_analysis/utils/full_text_search.py b/src/vuln_analysis/utils/full_text_search.py index 0a02ab991..132602ebf 100644 --- a/src/vuln_analysis/utils/full_text_search.py +++ b/src/vuln_analysis/utils/full_text_search.py @@ -194,7 +194,7 @@ def add_documents_from_code_path(self, code_path: str, include_extensions: list[str], use_langparser=True, - splitter=True): + splitter=True,no_extension=[]): """Create an index from raw files.""" doc_content = [] @@ -218,7 +218,7 @@ def add_documents_from_code_path(self, for root, _, files in os.walk(code_path): for file in files: - if any(file.endswith(ext) for ext in include_extensions): + if any(file.endswith(ext) for ext in include_extensions) or file in no_extension: file_path = os.path.join(root, file) try: with open(file_path, "r") as f: diff --git a/src/vuln_analysis/utils/package_identifier.py b/src/vuln_analysis/utils/package_identifier.py new file mode 100644 index 000000000..523a9cf7d --- /dev/null +++ b/src/vuln_analysis/utils/package_identifier.py @@ -0,0 +1,280 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +from univers import versions + +from exploit_iq_commons.data_models.checker_status import EnumIdentifyResult, PackageCheckerStatus, PackageIdentifyResult +from exploit_iq_commons.data_models.cve_intel import CveIntel +from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from exploit_iq_commons.utils.string_utils import package_names_match +from exploit_iq_commons.data_models.common import TargetPackage + +logger = LoggingFactory.get_agent_logger(__name__) + +_RPM_NEVRA_RE = re.compile(r"^(.+?)-(\d+):(.+?)-(.+)$") + + + +class PackageIdentifier: + """ + Deterministic PackageIdentify phase: resolves package identity from intel, + cross-references the SBOM, checks version ranges, and locates RPMs in cache. + """ + + def __init__( + self, + target_package: TargetPackage, + ): + self._target_package = target_package + + + def identify(self, intel: CveIntel | None) -> tuple[PackageCheckerStatus, PackageIdentifyResult]: + """Run PackageIdentify for a single CVE.""" + + package_identify = PackageIdentifyResult() + status = PackageCheckerStatus.OK + if intel is None: + status = PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL + return status, package_identify + + package_identify.is_target_package_affected = self._is_target_package_affected(intel,package_identify) + + package_identify.is_target_package_fixed = self._is_target_package_fixed(intel,package_identify) + + if package_identify.is_target_package_affected == EnumIdentifyResult.NO or package_identify.is_target_package_fixed == EnumIdentifyResult.YES: + status = PackageCheckerStatus.PKG_IDENT_NOT_VUL + + return status, package_identify + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _find_and_locate_rpm(self, intel: CveIntel) -> list[str]: + """Extract deduplicated RPM package names from RHSA package_state.""" + packages = self._extract_rhsa(intel) + packages = [p for p in packages if "/" not in p.get("package_name", "/")] + seen: set[str] = set() + names: list[str] = [] + for pkg in packages: + name = pkg.get("package_name") + if name and name not in seen: + seen.add(name) + names.append(name) + return names + + def _is_target_package_affected( + self, intel: CveIntel, package_identify: PackageIdentifyResult, + ) -> EnumIdentifyResult: + """Determine whether the target package is affected by this CVE. + + Task 1: populate affected_rpm_list from RHSA package_state. + Task 2: match target package by name + version range. + Only returns NO with definitive proof; defaults to UNKNOWN otherwise. + """ + rpm_names = self._find_and_locate_rpm(intel) + if not rpm_names: + return EnumIdentifyResult.UNKNOWN + package_identify.affected_rpm_list = rpm_names + + target_name = self._target_package.name + name_matched = any(package_names_match(target_name, name) for name in rpm_names) + + if name_matched: + if self._target_package.version: + in_range = self._version_in_affected_range(self._target_package.version, intel) + return EnumIdentifyResult.YES if in_range else EnumIdentifyResult.NO + return EnumIdentifyResult.YES + + if self._target_package.version and intel.nvd and intel.nvd.configurations: + in_range = self._version_in_affected_range(self._target_package.version, intel) + return EnumIdentifyResult.UNKNOWN if in_range else EnumIdentifyResult.NO + + return EnumIdentifyResult.UNKNOWN + + def _is_target_package_fixed(self, intel: CveIntel, package_identify: PackageIdentifyResult) -> EnumIdentifyResult: + """Determine whether the target package is already running the fixed version. + + Task 1: populate fixed_rpm_list from RHSA affected_release. + Task 2: compare target version+release against fix NVR. + """ + fix_entries = self._extract_fixed_rpms(intel) + if not fix_entries: + return EnumIdentifyResult.UNKNOWN + package_identify.fixed_rpm_list = [e["nevra"] for e in fix_entries] + + target_name = self._target_package.name + matching = [e for e in fix_entries if package_names_match(target_name, e["name"])] + if not matching: + return EnumIdentifyResult.UNKNOWN + + target_version = self._target_package.version + target_release = self._target_package.release + + fix = matching[0] + try: + target_nvr = f"{target_version}-{target_release}" + fix_nvr = f"{fix['version']}-{fix['release']}" + if versions.RpmVersion(target_nvr) >= versions.RpmVersion(fix_nvr): + return EnumIdentifyResult.YES + return EnumIdentifyResult.NO + except Exception as exc: + logger.debug("Fix version comparison failed: %s", exc) + return EnumIdentifyResult.UNKNOWN + + + def _version_in_affected_range(self, target_version: str, intel: CveIntel) -> bool: + """Check if target_version falls within any NVD configuration affected range.""" + if intel.nvd is None or not intel.nvd.configurations: + return True # no range data -> conservatively assume affected + + target_name = self._target_package.name + matched_any_config = False + for config in intel.nvd.configurations: + if not package_names_match(target_name, config.package): + continue + matched_any_config = True + version_range = [ + config.versionStartExcluding, + config.versionEndExcluding, + config.versionStartIncluding, + config.versionEndIncluding, + ] + if all(v is None for v in version_range): + continue + try: + if self._check_version_in_range(target_version, version_range): + return True + except Exception as exc: + logger.debug("Version comparison failed for %s: %s", target_version, exc) + return True # conservative: assume affected on error + + if not matched_any_config: + return True # no NVD data for this package -> conservatively assume affected + return False + + @staticmethod + def _check_version_in_range(version_to_check: str, version_range: list[str | None]) -> bool: + """Reuse the same logic as VulnerableDependencyChecker._check_version_in_range.""" + ver_start_excl, ver_end_excl, ver_start_incl, ver_end_incl = version_range + + all_versions = [v for v in version_range if v is not None] + [version_to_check] + has_el = any("el" in str(v) for v in all_versions) + has_deb = any("deb" in str(v) or "ubuntu" in str(v) for v in all_versions) + + if has_el: + vfunc = versions.RpmVersion + elif has_deb: + vfunc = versions.DebianVersion + else: + vfunc = versions.GenericVersion + + vtc = vfunc(version_to_check) + vsi = vfunc(ver_start_incl) if ver_start_incl else None + vse = vfunc(ver_start_excl) if ver_start_excl else None + vei = vfunc(ver_end_incl) if ver_end_incl else None + vee = vfunc(ver_end_excl) if ver_end_excl else None + + if vsi: + if not (vsi <= vtc): + return False + elif vse: + if not (vse < vtc): + return False + + if vei: + if not (vtc <= vei): + return False + elif vee: + if not (vtc < vee): + return False + + return True + + # ------------------------------------------------------------------ + # Intel extraction + # ------------------------------------------------------------------ + + @staticmethod + def _extract_rhsa(intel: CveIntel) -> list[dict]: + if intel.rhsa is None or not intel.rhsa.package_state: + return [] + packages = [] + for ps in intel.rhsa.package_state: + if ps.package_name: + packages.append({"package_name": ps.package_name}) + return packages + + @staticmethod + def _extract_fixed_rpms(intel: CveIntel) -> list[dict]: + """Extract all fix entries from RHSA affected_release. + + Returns a list of dicts with keys: nevra, name, version, release. + """ + if intel.rhsa is None or not hasattr(intel.rhsa, "affected_release"): + return [] + releases = intel.rhsa.affected_release + if not releases: + return [] + results: list[dict] = [] + seen: set[str] = set() + for entry in releases: + raw = entry.get("package") if isinstance(entry, dict) else getattr(entry, "package", None) + if not raw: + continue + m = _RPM_NEVRA_RE.match(raw) + if not m: + continue + name = m.group(1) + if "/" in name: + continue + if name in seen: + continue + seen.add(name) + version = m.group(3) + release_arch = m.group(4) + release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch + results.append({"nevra": raw, "name": name, "version": version, "release": release}) + return results + + @staticmethod + def _extract_fix_info(intel: CveIntel | None, resolved_name: str) -> dict: + """Extract fix NVR from RHSA affected_release for the resolved package. + + Returns a dict with keys nevra, name, version, release when a matching + fix entry is found; empty dict otherwise. + """ + if intel is None or intel.rhsa is None or not hasattr(intel.rhsa, "affected_release"): + return {} + releases = intel.rhsa.affected_release + if not releases: + return {} + for entry in releases: + raw = entry.get("package") if isinstance(entry, dict) else getattr(entry, "package", None) + if not raw: + continue + m = _RPM_NEVRA_RE.match(raw) + if not m: + continue + name = m.group(1) + if name.lower() != resolved_name.lower(): + continue + version = m.group(3) + release_arch = m.group(4) + release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch + return {"nevra": raw, "name": name, "version": version, "release": release} + return {} + From 34a0b14669cec057b37d59b47708da4e65646fd8 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 19 Apr 2026 13:07:58 +0000 Subject: [PATCH 02/46] start prompt the identify keywords --- .../data_models/checker_status.py | 2 - .../data_models/cve_intel.py | 4 +- .../configs/config-http-openai.yml | 1 + .../functions/code_agent_graph_defs.py | 312 ++++++++++++++++++ .../functions/cve_package_code_agent.py | 167 +++++++++- 5 files changed, 475 insertions(+), 11 deletions(-) create mode 100644 src/vuln_analysis/functions/code_agent_graph_defs.py diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index d419c6de1..1d9dcfe5e 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -69,6 +69,4 @@ class PackageCheckerContext(BaseModel): status: PackageCheckerStatus | None = None source_key: str | None = None artifacts: AcquiredArtifacts = Field(default_factory=AcquiredArtifacts) - code_index_path: str | None = None - code_vdb_path: str | None = None identify_result: PackageIdentifyResult = Field(default_factory=PackageIdentifyResult) diff --git a/src/exploit_iq_commons/data_models/cve_intel.py b/src/exploit_iq_commons/data_models/cve_intel.py index 8050ffe26..5ebdf23f6 100644 --- a/src/exploit_iq_commons/data_models/cve_intel.py +++ b/src/exploit_iq_commons/data_models/cve_intel.py @@ -185,8 +185,8 @@ class CVSSV3(BaseModel): class BaseMetricV3(BaseModel): cvssV3: "CVSSV3" - exploitabilityScore: float - impactScore: float + exploitabilityScore: float | None = None + impactScore: float | None = None class Impact(BaseModel): baseMetricV3: "BaseMetricV3" diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index f3314e087..6a79fe33e 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -162,6 +162,7 @@ functions: probe_log_path: .cache/am_cache/checker/probe_results.jsonl cve_package_code_agent: _type: cve_package_code_agent + llm_name: cve_agent_executor_llm base_checker_dir: .cache/am_cache/checker health_check: _type: health_check diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py new file mode 100644 index 000000000..8fcf1b0e5 --- /dev/null +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -0,0 +1,312 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Graph definitions for the L1 Package Code Agent (Identify -> Locate -> Verify). + +Houses the LangGraph state schema, reflection-pattern schemas, the reusable +``build_reflection_subgraph`` factory, and per-node prompt templates. +""" + +from __future__ import annotations + +import logging +import typing +import uuid +from typing import Annotated, Literal + +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage +from langgraph.graph import END, START, StateGraph +from langgraph.graph.message import add_messages +from langgraph.prebuilt import ToolNode +from pydantic import BaseModel, Field +from typing_extensions import TypedDict + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Graph state +# --------------------------------------------------------------------------- + + +class CodeAgentState(TypedDict): + """LangGraph state for the Identify -> Locate -> Verify graph.""" + messages: Annotated[list[BaseMessage], add_messages] + + +# --------------------------------------------------------------------------- +# Reflection schemas +# --------------------------------------------------------------------------- + + +class ReflectionBase(BaseModel): + """Base schema that all reflection reports must inherit from. + + Provides the contract the conditional edge in ``build_reflection_subgraph`` + relies on -- subclasses add phase-specific fields on top. + """ + instructions: str = Field( + description="Guidance to the generator for the next iteration.") + is_sufficient: bool = Field( + description="True if results are good enough to proceed.") + + +class IdentifyKeywords(BaseModel): + """Generator output: candidate keywords extracted from one intel description.""" + keywords: list[str] = Field( + description=( + "Keywords to search for the vulnerability: " + "function names, symbols, file patterns, etc." + )) + reasoning: str = Field( + description=( + "Why these keywords were chosen, or " + "'no new keywords found' if prior report covered everything." + )) + + +class Keyword(BaseModel): + """Single keyword with reflector-assigned classification and confidence.""" + term: str + keyword_type: Literal["function", "variable", "file", "symbol", "concept"] + confidence: float = Field(default=0.0, ge=0.0, le=1.0) + + +class ReflectionReport(ReflectionBase): + """Reflector output for the Identify phase.""" + approved: list[Keyword] = Field( + description="Keywords that found relevant hits in the source code.") + rejected: list[Keyword] = Field( + description="Keywords with no hits or irrelevant results.") + + +# --------------------------------------------------------------------------- +# Reflection subgraph factory +# --------------------------------------------------------------------------- + + +def build_reflection_subgraph( + *, + name: str, + generator_llm, + reflector_llm, + generate_prompt: str, + reflect_prompt: str, + output_schema: type[BaseModel], + report_schema: type[ReflectionBase], + tools: list[typing.Any] | None = None, + max_reflections: int = 2, +): + """Build a LangGraph subgraph implementing a generate-reflect loop. + + The reflector uses ``ToolNode`` (matching the pattern in + ``cve_agent._create_graph_agent``) so that tool invocation goes through + LangGraph's standard ``AIMessage.tool_calls`` -> ``ToolMessage`` flow. + + Parameters + ---------- + name: + Prefix for node names (e.g. ``"identify"``). + generator_llm: + LangChain LLM for the generate node. + reflector_llm: + LangChain LLM for the reflect node (can be a cheaper/faster model). + generate_prompt: + System prompt for the generator. + reflect_prompt: + System prompt for the reflector. + output_schema: + Pydantic model the generator must return (e.g. ``IdentifyKeywords``). + Used with ``generator_llm.with_structured_output()``. + report_schema: + Must inherit from ``ReflectionBase``. Pydantic model the reflector + must return (e.g. ``ReflectionReport``). + Used with ``reflector_llm.with_structured_output()``. + tools: + Optional LangChain tools (from ``builder.get_tools()``) available to + the reflector for grounded verification via ``ToolNode``. + max_reflections: + Max retry cycles before forcing acceptance. + + Returns + ------- + CompiledStateGraph + A compiled LangGraph that can be invoked with a ``CodeAgentState``. + """ + gen_llm = generator_llm.with_structured_output(output_schema) + ref_llm = reflector_llm.with_structured_output(report_schema) + + gen_node_name = f"{name}_generate" + ref_prepare_name = f"{name}_reflect_prepare" + ref_tool_name = f"{name}_reflect_tool" + ref_synthesize_name = f"{name}_reflect_synthesize" + + tool_node = ToolNode(tools, handle_tool_errors=True) if tools else None + + async def generate(state: CodeAgentState) -> dict: + messages = [SystemMessage(content=generate_prompt)] + list(state["messages"]) + result = await gen_llm.ainvoke(messages) + return {"messages": [AIMessage(content=result.model_dump_json())]} + + async def reflect_prepare(state: CodeAgentState) -> dict: + """Build tool_calls for each keyword so ToolNode can execute them.""" + last_msg = state["messages"][-1] + last_gen_output = last_msg.content if isinstance(last_msg.content, str) else "" + + if not tools: + return {"messages": [AIMessage(content=last_gen_output)]} + + tool_calls = [] + try: + gen_data = output_schema.model_validate_json(last_gen_output) + keywords = getattr(gen_data, "keywords", []) + except Exception: + keywords = [] + + for kw in keywords: + for tool in tools: + tool_calls.append({ + "name": tool.name, + "args": {"query": kw} if not isinstance(kw, dict) else kw, + "id": str(uuid.uuid4()), + }) + + if not tool_calls: + return {"messages": [AIMessage(content=last_gen_output)]} + + return {"messages": [AIMessage(content=last_gen_output, tool_calls=tool_calls)]} + + async def reflect_synthesize(state: CodeAgentState) -> dict: + """Collect tool results and ask the reflector LLM to produce a report.""" + gen_output = "" + tool_results: list[str] = [] + + for msg in state["messages"]: + if isinstance(msg, AIMessage) and isinstance(msg.content, str): + try: + output_schema.model_validate_json(msg.content) + gen_output = msg.content + except Exception: + pass + if hasattr(msg, "name") and hasattr(msg, "content"): + content = msg.content if isinstance(msg.content, str) else str(msg.content) + tool_name = getattr(msg, "name", "unknown") + tool_results.append(f"tool={tool_name}: {content}") + + tool_context = "\n".join(tool_results) if tool_results else "No tool results." + + messages = [ + SystemMessage(content=reflect_prompt), + HumanMessage(content=( + f"Generator output:\n{gen_output}\n\n" + f"Tool verification results:\n{tool_context}" + )), + ] + report = await ref_llm.ainvoke(messages) + return {"messages": [AIMessage(content=report.model_dump_json())]} + + def should_continue(state: CodeAgentState) -> str: + reflection_count = sum( + 1 for m in state["messages"] + if isinstance(m, AIMessage) + and isinstance(m.content, str) + and _is_reflection_report(m.content, report_schema) + ) + if reflection_count >= max_reflections: + logger.info("%s: max reflections (%d) reached, accepting", name, max_reflections) + return "accept" + + last = state["messages"][-1] + last_content = last.content if isinstance(last.content, str) else "" + try: + report = report_schema.model_validate_json(last_content) + if report.is_sufficient: + logger.info("%s: reflector approved, accepting", name) + return "accept" + except Exception: + pass + + logger.info("%s: reflector wants retry (%d/%d)", name, reflection_count, max_reflections) + return "retry" + + flow = StateGraph(CodeAgentState) + flow.add_node(gen_node_name, generate) + flow.add_node(ref_prepare_name, reflect_prepare) + flow.add_node(ref_synthesize_name, reflect_synthesize) + + flow.add_edge(START, gen_node_name) + flow.add_edge(gen_node_name, ref_prepare_name) + + if tool_node: + flow.add_node(ref_tool_name, tool_node) + flow.add_edge(ref_prepare_name, ref_tool_name) + flow.add_edge(ref_tool_name, ref_synthesize_name) + else: + flow.add_edge(ref_prepare_name, ref_synthesize_name) + + flow.add_conditional_edges( + ref_synthesize_name, + should_continue, + {"retry": gen_node_name, "accept": END}, + ) + + return flow.compile() + + +def _is_reflection_report(content: str, schema: type[ReflectionBase]) -> bool: + """Check whether a message's content is a valid reflection report.""" + try: + schema.model_validate_json(content) + return True + except Exception: + return False + + +# --------------------------------------------------------------------------- +# Prompt templates +# --------------------------------------------------------------------------- + +IDENTIFY_SYSTEM_PROMPT = ( + "You are a vulnerability analyst performing the **Identify** phase.\n\n" + "You will receive CVE intelligence (descriptions, advisories, affected packages) " + "in the KNOWLEDGE block. Your task is to extract **search keywords** that can be " + "used to locate the vulnerable code in a source tree.\n\n" + "KEYWORD EXTRACTION RULES:\n" + "1. Extract specific function names, method names, and symbols mentioned in the CVE description.\n" + "2. Extract package/module names that contain the vulnerable code.\n" + "3. Extract file name patterns if mentioned (e.g. 'parse.c', 'auth.go').\n" + "4. Prefer exact identifiers over generic terms. 'PQescapeLiteral' is a good keyword; " + "'SQL injection' is not.\n" + "5. If the description mentions a specific API, endpoint, or configuration key, include it.\n" + "6. Include both the short function name and the fully-qualified name if available " + "(e.g. both 'Parse' and 'encoding/xml.Decoder.Parse').\n" + "7. Do NOT include CVE IDs, GHSA IDs, or version numbers as keywords.\n\n" + "Output a structured list of keywords with reasoning." +) + +LOCATE_SYSTEM_PROMPT = ( + "You are a vulnerability analyst performing the **Locate** phase.\n" + "Using the components identified in the previous phase, search the\n" + "source code to map each vulnerable function to its concrete file path\n" + "and call sites. Determine whether the vulnerable code is reachable\n" + "from an entry point in the target package." +) + +VERIFY_SYSTEM_PROMPT = ( + "You are a vulnerability analyst performing the **Verify** phase.\n" + "Review the located code paths and confirm whether the vulnerability\n" + "is exploitable in the target package. Produce a final verdict with\n" + "supporting evidence (code snippets, call chains, patch applicability)." +) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 4f5e3817f..381e4b278 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -20,7 +20,27 @@ from aiq.data_models.function import FunctionBaseConfig from pydantic import Field -from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id + +from langgraph.graph import StateGraph, START, END +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage + +from nat.builder.context import Context +from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from vuln_analysis.data_models.output import ( + AgentMorpheusEngineOutput, + AgentMorpheusOutput, + ChecklistItemOutput, + JustificationOutput, + OutputPayload, +) +from vuln_analysis.functions.code_agent_graph_defs import ( + CodeAgentState, + IDENTIFY_SYSTEM_PROMPT, + LOCATE_SYSTEM_PROMPT, + VERIFY_SYSTEM_PROMPT, + IdentifyKeywords, +) logger = LoggingFactory.get_agent_logger(__name__) @@ -36,24 +56,157 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent default=".cache/am_cache/checker", description="Root directory for checker-specific artifacts.", ) + llm_name: str = Field(description="The LLM model to use with the L1 code agent.") + tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") + max_iterations: int = Field(default=10, description="The maximum number of iterations for the agent.") + + +async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer): + + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + vuln_id = state.input.scan.vulns[0].vuln_id + ctx = state.info.checker_context + intel = state.info.intel + + async def identify_node(state: CodeAgentState) -> dict: + logger.info("identify_node: starting") + gen_llm = llm.with_structured_output(IdentifyKeywords) + aIntel = intel[0] + + descriptions: list[tuple[str, str]] = [] + if aIntel.ghsa: + cve_text = aIntel.ghsa.description or aIntel.ghsa.summary or "" + if cve_text: + descriptions.append(("ghsa", cve_text)) + if aIntel.ubuntu and aIntel.ubuntu.description: + descriptions.append(("ubuntu", aIntel.ubuntu.description)) + + all_keywords: list[str] = [] + all_reasoning: list[str] = [] + + for source_name, desc_text in descriptions: + context_block = f"CVE: {vuln_id}\nKNOWLEDGE:\n{desc_text}" + if all_keywords: + context_block += ( + "\n\nALREADY IDENTIFIED KEYWORDS (do not repeat): " + + ", ".join(all_keywords) + ) + messages = [ + SystemMessage(content=IDENTIFY_SYSTEM_PROMPT), + SystemMessage(content=context_block), + ] + with tracer.push_active_function( + f"identify_{source_name}", input_data={"source": source_name} + ) as span: + response: IdentifyKeywords = await gen_llm.ainvoke(messages) + span.set_output({ + "keywords": response.keywords, + "reasoning": response.reasoning, + }) + logger.info("identify_node [%s]: %s", source_name, response.model_dump_json()) + + all_keywords.extend(response.keywords) + all_reasoning.append(f"[{source_name}] {response.reasoning}") + + seen: set[str] = set() + unique_keywords: list[str] = [] + for kw in all_keywords: + kw_lower = kw.lower() + if kw_lower not in seen: + seen.add(kw_lower) + unique_keywords.append(kw) + + final = IdentifyKeywords( + keywords=unique_keywords, + reasoning=" | ".join(all_reasoning), + ) + logger.info("identify_node final: %s", final.model_dump_json()) + return {"messages": [AIMessage(content=final.model_dump_json())]} + + async def locate_node(state: CodeAgentState) -> dict: + logger.info("locate_node: starting") + with tracer.push_active_function( + "locate", input_data={"messages_count": len(state["messages"])} + ): + result = "Locate phase complete: vulnerable code paths mapped." + logger.info("locate_node: %s", result) + return {"messages": [AIMessage(content=result)]} + + async def verify_node(state: CodeAgentState) -> dict: + logger.info("verify_node: starting") + with tracer.push_active_function( + "verify", input_data={"messages_count": len(state["messages"])} + ): + result = "Verify phase complete: exploit reachability confirmed." + logger.info("verify_node: %s", result) + return {"messages": [AIMessage(content=result)]} + + flow = StateGraph(CodeAgentState) + flow.add_node("identify", identify_node) + flow.add_node("locate", locate_node) + flow.add_node("verify", verify_node) + + flow.add_edge(START, "identify") + flow.add_edge("identify", "locate") + flow.add_edge("locate", "verify") + flow.add_edge("verify", END) + + app = flow.compile() + app.get_graph().draw_mermaid_png(output_file_path="code_agent_flow.png") + return app + + +def _build_stub_analysis( + message: AgentMorpheusEngineInput, result: dict, +) -> list[AgentMorpheusEngineOutput]: + last_msg = result["messages"][-1].content if result.get("messages") else "no result" + + return [ + AgentMorpheusEngineOutput( + vuln_id=intel.vuln_id, + checklist=[ + ChecklistItemOutput( + input="L1 stub checklist item", + response=last_msg, + ), + ], + summary=f"Stub summary for {intel.vuln_id}: {last_msg}", + justification=JustificationOutput( + label="code_not_present", + reason="Stub L1 agent -- no real analysis performed yet.", + status="UNKNOWN", + ), + intel_score=0, + cvss=None, + ) + for intel in (message.info.intel if message.info and message.info.intel else []) + ] @register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder): - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput - from vuln_analysis.data_models.output import AgentMorpheusOutput - from vuln_analysis.data_models.output import OutputPayload async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + trace_id.set(message.input.scan.id) + tracer = Context.get() logger.info("package_code_agent: starting L1 investigation") - # TODO: implement L1 investigation phases (Identify, Locate, Verify) + l1_agent_graph = await create_graph_code_agent(config, builder, message, tracer) + initial_state: CodeAgentState = { + "messages": [HumanMessage(content="Begin L1 CVE investigation")], + } + + with tracer.push_active_function("l1_agent_graph", input_data=initial_state["messages"][0].content): + result = await l1_agent_graph.ainvoke(initial_state) + + logger.info("package_code_agent: L1 investigation finished") + with tracer.push_active_function("agent_finish", input_data={"verdict": "not guilty"}): + pass - logger.info("package_code_agent: finished (stub -- no investigation logic yet)") return AgentMorpheusOutput( input=message.input, info=message.info, - output=OutputPayload(analysis=[], vex=None), + output=OutputPayload(analysis=_build_stub_analysis(message, result), vex=None), ) yield FunctionInfo.from_fn( From 7024b9b8b6a9f3932e2856f2eafe4dba544c1e08 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 19 Apr 2026 15:30:13 +0000 Subject: [PATCH 03/46] Identiy sub graph flow --- .../configs/config-http-openai.yml | 1 + .../functions/code_agent_graph_defs.py | 361 ++++++++---------- .../functions/cve_package_code_agent.py | 76 ++-- 3 files changed, 193 insertions(+), 245 deletions(-) diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index 6a79fe33e..99cd91152 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -164,6 +164,7 @@ functions: _type: cve_package_code_agent llm_name: cve_agent_executor_llm base_checker_dir: .cache/am_cache/checker + base_code_index_dir: .cache/am_cache/code_index health_check: _type: health_check diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 8fcf1b0e5..acf698799 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -16,23 +16,18 @@ """ Graph definitions for the L1 Package Code Agent (Identify -> Locate -> Verify). -Houses the LangGraph state schema, reflection-pattern schemas, the reusable -``build_reflection_subgraph`` factory, and per-node prompt templates. +Houses the LangGraph state schema, structured-output schemas, the +``build_identify_subgraph`` pipeline, and per-node prompt templates. """ from __future__ import annotations import logging -import typing -import uuid -from typing import Annotated, Literal - -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage -from langgraph.graph import END, START, StateGraph -from langgraph.graph.message import add_messages -from langgraph.prebuilt import ToolNode +from typing import Literal, NotRequired + +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import MessagesState from pydantic import BaseModel, Field -from typing_extensions import TypedDict logger = logging.getLogger(__name__) @@ -41,9 +36,9 @@ # --------------------------------------------------------------------------- -class CodeAgentState(TypedDict): +class CodeAgentState(MessagesState): """LangGraph state for the Identify -> Locate -> Verify graph.""" - messages: Annotated[list[BaseMessage], add_messages] + identify_report: NotRequired[IdentifyReport | None] # --------------------------------------------------------------------------- @@ -52,10 +47,9 @@ class CodeAgentState(TypedDict): class ReflectionBase(BaseModel): - """Base schema that all reflection reports must inherit from. + """Base schema for phase reports. - Provides the contract the conditional edge in ``build_reflection_subgraph`` - relies on -- subclasses add phase-specific fields on top. + Subclasses add phase-specific fields on top. """ instructions: str = Field( description="Guidance to the generator for the next iteration.") @@ -84,196 +78,23 @@ class Keyword(BaseModel): confidence: float = Field(default=0.0, ge=0.0, le=1.0) -class ReflectionReport(ReflectionBase): - """Reflector output for the Identify phase.""" +class KeywordJudgment(BaseModel): + """LLM output for judging a single keyword against its search results.""" + term: str + keyword_type: Literal["function", "variable", "file", "symbol", "concept"] + confidence: float = Field(default=0.0, ge=0.0, le=1.0) + approved: bool = Field(description="True if search results show code related to the vulnerability") + reasoning: str = Field(description="Brief explanation of the decision") + + +class IdentifyReport(ReflectionBase): + """Aggregated report for the Identify phase.""" approved: list[Keyword] = Field( description="Keywords that found relevant hits in the source code.") rejected: list[Keyword] = Field( description="Keywords with no hits or irrelevant results.") -# --------------------------------------------------------------------------- -# Reflection subgraph factory -# --------------------------------------------------------------------------- - - -def build_reflection_subgraph( - *, - name: str, - generator_llm, - reflector_llm, - generate_prompt: str, - reflect_prompt: str, - output_schema: type[BaseModel], - report_schema: type[ReflectionBase], - tools: list[typing.Any] | None = None, - max_reflections: int = 2, -): - """Build a LangGraph subgraph implementing a generate-reflect loop. - - The reflector uses ``ToolNode`` (matching the pattern in - ``cve_agent._create_graph_agent``) so that tool invocation goes through - LangGraph's standard ``AIMessage.tool_calls`` -> ``ToolMessage`` flow. - - Parameters - ---------- - name: - Prefix for node names (e.g. ``"identify"``). - generator_llm: - LangChain LLM for the generate node. - reflector_llm: - LangChain LLM for the reflect node (can be a cheaper/faster model). - generate_prompt: - System prompt for the generator. - reflect_prompt: - System prompt for the reflector. - output_schema: - Pydantic model the generator must return (e.g. ``IdentifyKeywords``). - Used with ``generator_llm.with_structured_output()``. - report_schema: - Must inherit from ``ReflectionBase``. Pydantic model the reflector - must return (e.g. ``ReflectionReport``). - Used with ``reflector_llm.with_structured_output()``. - tools: - Optional LangChain tools (from ``builder.get_tools()``) available to - the reflector for grounded verification via ``ToolNode``. - max_reflections: - Max retry cycles before forcing acceptance. - - Returns - ------- - CompiledStateGraph - A compiled LangGraph that can be invoked with a ``CodeAgentState``. - """ - gen_llm = generator_llm.with_structured_output(output_schema) - ref_llm = reflector_llm.with_structured_output(report_schema) - - gen_node_name = f"{name}_generate" - ref_prepare_name = f"{name}_reflect_prepare" - ref_tool_name = f"{name}_reflect_tool" - ref_synthesize_name = f"{name}_reflect_synthesize" - - tool_node = ToolNode(tools, handle_tool_errors=True) if tools else None - - async def generate(state: CodeAgentState) -> dict: - messages = [SystemMessage(content=generate_prompt)] + list(state["messages"]) - result = await gen_llm.ainvoke(messages) - return {"messages": [AIMessage(content=result.model_dump_json())]} - - async def reflect_prepare(state: CodeAgentState) -> dict: - """Build tool_calls for each keyword so ToolNode can execute them.""" - last_msg = state["messages"][-1] - last_gen_output = last_msg.content if isinstance(last_msg.content, str) else "" - - if not tools: - return {"messages": [AIMessage(content=last_gen_output)]} - - tool_calls = [] - try: - gen_data = output_schema.model_validate_json(last_gen_output) - keywords = getattr(gen_data, "keywords", []) - except Exception: - keywords = [] - - for kw in keywords: - for tool in tools: - tool_calls.append({ - "name": tool.name, - "args": {"query": kw} if not isinstance(kw, dict) else kw, - "id": str(uuid.uuid4()), - }) - - if not tool_calls: - return {"messages": [AIMessage(content=last_gen_output)]} - - return {"messages": [AIMessage(content=last_gen_output, tool_calls=tool_calls)]} - - async def reflect_synthesize(state: CodeAgentState) -> dict: - """Collect tool results and ask the reflector LLM to produce a report.""" - gen_output = "" - tool_results: list[str] = [] - - for msg in state["messages"]: - if isinstance(msg, AIMessage) and isinstance(msg.content, str): - try: - output_schema.model_validate_json(msg.content) - gen_output = msg.content - except Exception: - pass - if hasattr(msg, "name") and hasattr(msg, "content"): - content = msg.content if isinstance(msg.content, str) else str(msg.content) - tool_name = getattr(msg, "name", "unknown") - tool_results.append(f"tool={tool_name}: {content}") - - tool_context = "\n".join(tool_results) if tool_results else "No tool results." - - messages = [ - SystemMessage(content=reflect_prompt), - HumanMessage(content=( - f"Generator output:\n{gen_output}\n\n" - f"Tool verification results:\n{tool_context}" - )), - ] - report = await ref_llm.ainvoke(messages) - return {"messages": [AIMessage(content=report.model_dump_json())]} - - def should_continue(state: CodeAgentState) -> str: - reflection_count = sum( - 1 for m in state["messages"] - if isinstance(m, AIMessage) - and isinstance(m.content, str) - and _is_reflection_report(m.content, report_schema) - ) - if reflection_count >= max_reflections: - logger.info("%s: max reflections (%d) reached, accepting", name, max_reflections) - return "accept" - - last = state["messages"][-1] - last_content = last.content if isinstance(last.content, str) else "" - try: - report = report_schema.model_validate_json(last_content) - if report.is_sufficient: - logger.info("%s: reflector approved, accepting", name) - return "accept" - except Exception: - pass - - logger.info("%s: reflector wants retry (%d/%d)", name, reflection_count, max_reflections) - return "retry" - - flow = StateGraph(CodeAgentState) - flow.add_node(gen_node_name, generate) - flow.add_node(ref_prepare_name, reflect_prepare) - flow.add_node(ref_synthesize_name, reflect_synthesize) - - flow.add_edge(START, gen_node_name) - flow.add_edge(gen_node_name, ref_prepare_name) - - if tool_node: - flow.add_node(ref_tool_name, tool_node) - flow.add_edge(ref_prepare_name, ref_tool_name) - flow.add_edge(ref_tool_name, ref_synthesize_name) - else: - flow.add_edge(ref_prepare_name, ref_synthesize_name) - - flow.add_conditional_edges( - ref_synthesize_name, - should_continue, - {"retry": gen_node_name, "accept": END}, - ) - - return flow.compile() - - -def _is_reflection_report(content: str, schema: type[ReflectionBase]) -> bool: - """Check whether a message's content is a valid reflection report.""" - try: - schema.model_validate_json(content) - return True - except Exception: - return False - - # --------------------------------------------------------------------------- # Prompt templates # --------------------------------------------------------------------------- @@ -296,6 +117,19 @@ def _is_reflection_report(content: str, schema: type[ReflectionBase]) -> bool: "Output a structured list of keywords with reasoning." ) +IDENTIFY_REFLECT_PROMPT = ( + "You are reviewing search results for a single keyword from the **Identify** phase.\n\n" + "You will receive:\n" + "1. The CVE description for context\n" + "2. A keyword extracted from that CVE description\n" + "3. Lexical search results from the source code for that keyword\n\n" + "Classify the keyword:\n" + "- keyword_type: one of function, variable, file, symbol, concept\n" + "- confidence: 0.0-1.0 how confident this keyword points to vulnerable code\n" + "- approved: True if the search results show code related to the vulnerability\n" + "- reasoning: brief explanation" +) + LOCATE_SYSTEM_PROMPT = ( "You are a vulnerability analyst performing the **Locate** phase.\n" "Using the components identified in the previous phase, search the\n" @@ -310,3 +144,130 @@ def _is_reflection_report(content: str, schema: type[ReflectionBase]) -> bool: "is exploitable in the target package. Produce a final verdict with\n" "supporting evidence (code snippets, call chains, patch applicability)." ) + + +# --------------------------------------------------------------------------- +# Identify subgraph pipeline +# --------------------------------------------------------------------------- + + +async def build_identify_subgraph( + *, + llm, + descriptions: list[tuple[str, str]], + vuln_id: str, + lexical_search_fn, + tracer, + identify_prompt: str = IDENTIFY_SYSTEM_PROMPT, + reflect_prompt: str = IDENTIFY_REFLECT_PROMPT, +) -> IdentifyReport: + """Extract keywords from CVE descriptions, search for each, and judge relevance. + + Orchestrates a linear pipeline (no LangGraph subgraph): + A) One LLM call per intel source to extract ``IdentifyKeywords``. + B) One lexical search + one LLM judge call per unique keyword. + C) Aggregate judgments into an ``IdentifyReport`` in Python. + + Parameters + ---------- + llm: + LangChain LLM used for both keyword extraction and per-keyword judgment. + descriptions: + ``(source_name, text)`` pairs built from CVE intel (e.g. GHSA, Ubuntu). + vuln_id: + CVE identifier, included in context blocks sent to the LLM. + lexical_search_fn: + ``async (str) -> list[dict]`` -- calls the Tantivy index. + tracer: + Request-scoped ``nat.builder.context.Context`` for span creation. + identify_prompt: + System prompt for keyword extraction. + reflect_prompt: + System prompt for per-keyword judgment. + """ + MAX_CONTENT_CHARS = 500 + + # -- Step A: extract keywords per source ----------------------------------- + gen_llm = llm.with_structured_output(IdentifyKeywords) + all_keywords: list[str] = [] + all_reasoning: list[str] = [] + + for source_name, desc_text in descriptions: + context_block = f"CVE: {vuln_id}\nKNOWLEDGE:\n{desc_text}" + if all_keywords: + context_block += ( + "\n\nALREADY IDENTIFIED KEYWORDS (do not repeat): " + + ", ".join(all_keywords) + ) + messages = [ + SystemMessage(content=identify_prompt), + SystemMessage(content=context_block), + ] + with tracer.push_active_function( + f"identify_{source_name}", input_data={"source": source_name} + ) as span: + response: IdentifyKeywords = await gen_llm.ainvoke(messages) + span.set_output({"keywords": response.keywords, "reasoning": response.reasoning}) + logger.info("build_identify_subgraph [%s]: %s", source_name, response.model_dump_json()) + all_keywords.extend(response.keywords) + all_reasoning.append(f"[{source_name}] {response.reasoning}") + + seen: set[str] = set() + unique_keywords = [ + kw for kw in all_keywords + if not (kw.lower() in seen or seen.add(kw.lower())) + ] + + # -- Step B: per-keyword search + judge ------------------------------------ + judge_llm = llm.with_structured_output(KeywordJudgment) + judgments: list[KeywordJudgment] = [] + cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) + + for kw in unique_keywords: + with tracer.push_active_function(f"judge_{kw}", input_data={"keyword": kw}) as span: + try: + hits = await lexical_search_fn(kw) + except Exception as e: + logger.warning("lexical search failed for '%s': %s", kw, e) + hits = [] + + if isinstance(hits, str): + hits = [] + + results_text = ( + "\n\n".join( + f"File: {h['source']}\n{h['content'][:MAX_CONTENT_CHARS]}" + for h in hits + ) + if hits + else "No results found." + ) + + messages = [ + SystemMessage(content=reflect_prompt), + HumanMessage(content=( + f"CVE: {vuln_id}\n{cve_context}\n\n" + f"Keyword: {kw}\n\n" + f"Search results:\n{results_text}" + )), + ] + judgment: KeywordJudgment = await judge_llm.ainvoke(messages) + span.set_output({"approved": judgment.approved, "confidence": judgment.confidence}) + judgments.append(judgment) + + # -- Step C: aggregate into IdentifyReport --------------------------------- + approved: list[Keyword] = [] + rejected: list[Keyword] = [] + for j in judgments: + kw_obj = Keyword(term=j.term, keyword_type=j.keyword_type, confidence=j.confidence) + if j.approved: + approved.append(kw_obj) + else: + rejected.append(kw_obj) + + return IdentifyReport( + approved=approved, + rejected=rejected, + is_sufficient=len(approved) > 0, + instructions="" if approved else "No keywords matched vulnerable code. Consider broader terms.", + ) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 381e4b278..272d514a8 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -23,7 +23,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id from langgraph.graph import StateGraph, START, END -from langchain_core.messages import HumanMessage, AIMessage, SystemMessage +from langchain_core.messages import HumanMessage, AIMessage from nat.builder.context import Context from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput @@ -36,11 +36,12 @@ ) from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, - IDENTIFY_SYSTEM_PROMPT, LOCATE_SYSTEM_PROMPT, VERIFY_SYSTEM_PROMPT, - IdentifyKeywords, + IdentifyReport, + build_identify_subgraph, ) +from vuln_analysis.utils.full_text_search import FullTextSearch logger = LoggingFactory.get_agent_logger(__name__) @@ -56,6 +57,10 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent default=".cache/am_cache/checker", description="Root directory for checker-specific artifacts.", ) + base_code_index_dir: str = Field( + default=".cache/am_cache/code_index", + description="Base directory for Tantivy code index storage.", + ) llm_name: str = Field(description="The LLM model to use with the L1 code agent.") tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") max_iterations: int = Field(default=10, description="The maximum number of iterations for the agent.") @@ -67,10 +72,16 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu vuln_id = state.input.scan.vulns[0].vuln_id ctx = state.info.checker_context intel = state.info.intel + source_key = ctx.source_key + index_path = FullTextSearch.get_index_directory(config.base_code_index_dir, source_key) + fts = FullTextSearch(cache_path=str(index_path)) + if fts.is_empty(): + raise ValueError(f"Invalid code index at: {index_path}, index is empty") + async def lexical_search_fn(query: str) -> list: + return fts.search_index(query, top_k=5) async def identify_node(state: CodeAgentState) -> dict: logger.info("identify_node: starting") - gen_llm = llm.with_structured_output(IdentifyKeywords) aIntel = intel[0] descriptions: list[tuple[str, str]] = [] @@ -81,47 +92,22 @@ async def identify_node(state: CodeAgentState) -> dict: if aIntel.ubuntu and aIntel.ubuntu.description: descriptions.append(("ubuntu", aIntel.ubuntu.description)) - all_keywords: list[str] = [] - all_reasoning: list[str] = [] - - for source_name, desc_text in descriptions: - context_block = f"CVE: {vuln_id}\nKNOWLEDGE:\n{desc_text}" - if all_keywords: - context_block += ( - "\n\nALREADY IDENTIFIED KEYWORDS (do not repeat): " - + ", ".join(all_keywords) - ) - messages = [ - SystemMessage(content=IDENTIFY_SYSTEM_PROMPT), - SystemMessage(content=context_block), - ] - with tracer.push_active_function( - f"identify_{source_name}", input_data={"source": source_name} - ) as span: - response: IdentifyKeywords = await gen_llm.ainvoke(messages) - span.set_output({ - "keywords": response.keywords, - "reasoning": response.reasoning, - }) - logger.info("identify_node [%s]: %s", source_name, response.model_dump_json()) - - all_keywords.extend(response.keywords) - all_reasoning.append(f"[{source_name}] {response.reasoning}") - - seen: set[str] = set() - unique_keywords: list[str] = [] - for kw in all_keywords: - kw_lower = kw.lower() - if kw_lower not in seen: - seen.add(kw_lower) - unique_keywords.append(kw) - - final = IdentifyKeywords( - keywords=unique_keywords, - reasoning=" | ".join(all_reasoning), - ) - logger.info("identify_node final: %s", final.model_dump_json()) - return {"messages": [AIMessage(content=final.model_dump_json())]} + with tracer.push_active_function("identify", input_data={"vuln_id": vuln_id}) as span: + report: IdentifyReport = await build_identify_subgraph( + llm=llm, + descriptions=descriptions, + vuln_id=vuln_id, + lexical_search_fn=lexical_search_fn, + tracer=tracer, + ) + span.set_output({ + "approved": [kw.term for kw in report.approved], + "rejected": [kw.term for kw in report.rejected], + "is_sufficient": report.is_sufficient, + }) + + logger.info("identify_node: %s", report.model_dump_json()) + return {"identify_report": report} async def locate_node(state: CodeAgentState) -> dict: logger.info("locate_node: starting") From 95543f078cc637dcd3e6bf058acc773fa9b8fd5d Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 20 Apr 2026 10:50:00 +0000 Subject: [PATCH 04/46] chunk and parse code file and index it to the lexical search --- .../functions/cve_checker_segmentation.py | 72 ++++++++++++++++--- 1 file changed, 63 insertions(+), 9 deletions(-) diff --git a/src/vuln_analysis/functions/cve_checker_segmentation.py b/src/vuln_analysis/functions/cve_checker_segmentation.py index fb4aa4c52..da00a36d9 100644 --- a/src/vuln_analysis/functions/cve_checker_segmentation.py +++ b/src/vuln_analysis/functions/cve_checker_segmentation.py @@ -25,11 +25,61 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory - +from langchain.docstore.document import Document +from exploit_iq_commons.utils.document_embedding import MultiLanguageRecursiveCharacterTextSplitter,ExtendedLanguageParser +from langchain_community.document_loaders.generic import GenericLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter logger = LoggingFactory.get_agent_logger(__name__) _BUILD_FILE_NAMES = {"Makefile", "GNUmakefile", "configure"} - +LANG_PARSER_EXTENSIONS = {".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", ".ts"} +TEXT_FILE_EXTENSIONS = {".spec", ".patch", ".conf", ".cfg", ".sh", ".m4",".ac", ".am", ".in", ".txt", ".md", ".rst"} +class RpmDocumentEmbedding: + def __init__(self, source_dir: Path, chunk_size: int = 800, chunk_overlap: int = 160): + self.source_dir = source_dir + self.lang_splitter = MultiLanguageRecursiveCharacterTextSplitter( + chunk_size=chunk_size, chunk_overlap=chunk_overlap, + ) + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, chunk_overlap=200, + ) + + def load_and_chunk_code(self) -> list[Document]: + loader = GenericLoader.from_filesystem( + self.source_dir, + glob="**/*", + suffixes=list(LANG_PARSER_EXTENSIONS), + parser=ExtendedLanguageParser(), + ) + try: + documents = loader.load() + except Exception as e: + logger.warning("LanguageParser failed on %s: %s", self.source_dir, e) + return [] + return self.lang_splitter.split_documents(documents) + + def load_and_chunk_files(self) -> list[Document]: + documents: list[Document] = [] + for root, _, files in os.walk(self.source_dir): + for file in files: + if any(file.endswith(ext) for ext in TEXT_FILE_EXTENSIONS) or file in _BUILD_FILE_NAMES: + file_path = os.path.join(root, file) + try: + with open(file_path, "r") as f: + content = f.read() + documents.append(Document(page_content=content, metadata={"source": file_path})) + except Exception as e: + logger.warning("Error reading %s: %s", file_path, e) + continue + return self.text_splitter.split_documents(documents) + + def load_and_chunk_all(self) -> list[Document]: + documents = self.load_and_chunk_code() + documents.extend(self.load_and_chunk_files()) + return documents + + + class CVECheckerSegmentationConfig(FunctionBaseConfig, name="cve_checker_segmentation"): """ @@ -85,13 +135,17 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: return message logger.info("checker_segmentation: indexing source dir %s", source_dir) - fts.add_documents_from_code_path( - str(source_dir), - config.include_extensions, - use_langparser=False, - splitter=True, - no_extension=_BUILD_FILE_NAMES, - ) + document_embedding = RpmDocumentEmbedding(source_dir=source_dir) + documents = document_embedding.load_and_chunk_all() + + #fts.add_documents_from_code_path( + # str(source_dir), + # config.include_extensions, + # use_langparser=False, + # splitter=True, + # no_extension=_BUILD_FILE_NAMES, + #) + fts.add_documents_from_langchain_chunks(documents) elapsed = time.time() - start logger.info("checker_segmentation: indexing completed in %.2fs at %s", elapsed, index_path) From d8ae498449977e1e1ef71877832ea05b205d317a Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 20 Apr 2026 13:59:53 +0000 Subject: [PATCH 05/46] milestone 1 locate vulnerability place --- pyproject.toml | 1 + .../functions/code_agent_graph_defs.py | 353 +++++++++++++++++- .../functions/cve_package_code_agent.py | 125 ++++++- src/vuln_analysis/tools/brew_downloader.py | 17 +- 4 files changed, 473 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 557941873..c3041e8be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "csaf-tool==0.3.2", "jsonschema>=4.0.0,<5.0.0", "koji", + "unidiff>=0.7.5", ] requires-python = ">=3.11,<3.13" description = "NVIDIA AI Blueprint: Vulnerability Analysis for Container Security" diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index acf698799..079817a2f 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -23,11 +23,15 @@ from __future__ import annotations import logging +import shutil +import subprocess +from pathlib import Path from typing import Literal, NotRequired from langchain_core.messages import HumanMessage, SystemMessage from langgraph.graph import MessagesState from pydantic import BaseModel, Field +from unidiff import PatchSet logger = logging.getLogger(__name__) @@ -39,6 +43,7 @@ class CodeAgentState(MessagesState): """LangGraph state for the Identify -> Locate -> Verify graph.""" identify_report: NotRequired[IdentifyReport | None] + locate_report: NotRequired[LocateReport | None] # --------------------------------------------------------------------------- @@ -76,6 +81,10 @@ class Keyword(BaseModel): term: str keyword_type: Literal["function", "variable", "file", "symbol", "concept"] confidence: float = Field(default=0.0, ge=0.0, le=1.0) + file_paths: list[str] = Field( + default_factory=list, + description="Source files where this keyword was found during Identify", + ) class KeywordJudgment(BaseModel): @@ -85,6 +94,10 @@ class KeywordJudgment(BaseModel): confidence: float = Field(default=0.0, ge=0.0, le=1.0) approved: bool = Field(description="True if search results show code related to the vulnerability") reasoning: str = Field(description="Brief explanation of the decision") + file_paths: list[str] = Field( + default_factory=list, + description="Source files where this keyword was found during Identify", + ) class IdentifyReport(ReflectionBase): @@ -95,6 +108,50 @@ class IdentifyReport(ReflectionBase): description="Keywords with no hits or irrelevant results.") +# --------------------------------------------------------------------------- +# Locate schemas +# --------------------------------------------------------------------------- + + +class DiffHunk(BaseModel): + """A single hunk from a unified diff between target and patched source trees.""" + file_path: str + source_start_line: int = Field(description="Start line in the target (investigated) source") + source_end_line: int = Field(description="End line in the target (investigated) source") + patch_start_line: int = Field(description="Start line in the patched (fixed) source") + patch_end_line: int = Field(description="End line in the patched (fixed) source") + content: str = Field(description="Raw diff hunk text including +/- lines") + + +class FileLocation(BaseModel): + """A source file location where vulnerable code was found.""" + file_path: str + line_number: int | None = None + snippet: str = Field(description="Code context around the match") + matched_keywords: list[str] + source: Literal["diff", "tantivy"] = Field( + description="How this location was discovered") + + +class LocationJudgment(BaseModel): + """LLM output: judge whether a located code region is vulnerability-relevant.""" + file_path: str + relevant: bool = Field( + description="True if this location contains vulnerability-relevant code") + confidence: float = Field(default=0.0, ge=0.0, le=1.0) + reasoning: str = Field(description="Brief explanation of the decision") + + +class LocateReport(ReflectionBase): + """Aggregated report for the Locate phase.""" + locations: list[FileLocation] = Field( + description="Source locations where vulnerable code was found.") + diff_available: bool = Field( + description="Whether a patched RPM diff was used for location.") + evidence: str = Field( + description="Human-readable summary for justification.") + + # --------------------------------------------------------------------------- # Prompt templates # --------------------------------------------------------------------------- @@ -131,11 +188,19 @@ class IdentifyReport(ReflectionBase): ) LOCATE_SYSTEM_PROMPT = ( - "You are a vulnerability analyst performing the **Locate** phase.\n" - "Using the components identified in the previous phase, search the\n" - "source code to map each vulnerable function to its concrete file path\n" - "and call sites. Determine whether the vulnerable code is reachable\n" - "from an entry point in the target package." + "You are a vulnerability analyst performing the **Locate** phase.\n\n" + "You will receive a code region (either a diff hunk from the patched RPM " + "or a search result from the source code index) along with the CVE context " + "and keywords from the Identify phase.\n\n" + "Your task is to judge whether this code region is **relevant to the " + "vulnerability**:\n" + "- relevant: True if the code change or code region relates to fixing or " + "containing the vulnerability described in the CVE.\n" + "- confidence: 0.0-1.0 how confident you are in the judgment.\n" + "- reasoning: brief explanation of why this region is or is not relevant.\n\n" + "Focus on: function names, variable names, control flow changes, " + "added bounds checks, input validation, or security-relevant API calls " + "that match the CVE description." ) VERIFY_SYSTEM_PROMPT = ( @@ -234,6 +299,8 @@ async def build_identify_subgraph( if isinstance(hits, str): hits = [] + hit_files = [h["source"] for h in hits if "source" in h] + results_text = ( "\n\n".join( f"File: {h['source']}\n{h['content'][:MAX_CONTENT_CHARS]}" @@ -252,6 +319,7 @@ async def build_identify_subgraph( )), ] judgment: KeywordJudgment = await judge_llm.ainvoke(messages) + judgment.file_paths = hit_files span.set_output({"approved": judgment.approved, "confidence": judgment.confidence}) judgments.append(judgment) @@ -259,7 +327,10 @@ async def build_identify_subgraph( approved: list[Keyword] = [] rejected: list[Keyword] = [] for j in judgments: - kw_obj = Keyword(term=j.term, keyword_type=j.keyword_type, confidence=j.confidence) + kw_obj = Keyword( + term=j.term, keyword_type=j.keyword_type, + confidence=j.confidence, file_paths=j.file_paths, + ) if j.approved: approved.append(kw_obj) else: @@ -271,3 +342,273 @@ async def build_identify_subgraph( is_sufficient=len(approved) > 0, instructions="" if approved else "No keywords matched vulnerable code. Consider broader terms.", ) + + +# --------------------------------------------------------------------------- +# Locate helpers +# --------------------------------------------------------------------------- + + +def parse_unified_diff(diff_text: str) -> list[DiffHunk]: + """Parse unified diff text into structured DiffHunk objects using unidiff.""" + patch = PatchSet.from_string(diff_text) + hunks: list[DiffHunk] = [] + for patched_file in patch: + if patched_file.is_binary_file: + continue + for hunk in patched_file: + hunks.append(DiffHunk( + file_path=patched_file.path, + source_start_line=hunk.source_start, + source_end_line=hunk.source_start + hunk.source_length, + patch_start_line=hunk.target_start, + patch_end_line=hunk.target_start + hunk.target_length, + content=str(hunk), + )) + return hunks + + +def _generate_tree_diff(source_dir: Path, patch_dir: Path) -> str: + """Run ``git diff --no-index`` between two directory trees. + + Uses git's built-in binary detection to automatically skip binary files + (RPMs, tarballs, images, etc.) without needing an explicit exclude list. + """ + result = subprocess.run( + ["git", "diff", "--no-index", "--no-color", + str(source_dir), str(patch_dir)], + capture_output=True, timeout=300, + ) + return result.stdout.decode("utf-8", errors="replace") + + +_NON_SOURCE_EXTENSIONS = frozenset({ + ".md", ".txt", ".rst", ".html", ".xml", ".json", ".yaml", ".yml", + ".sgml", ".po", ".pot", ".spec", ".cfg", ".conf", ".1", ".man", +}) + + +def _cross_reference_hunks( + hunks: list[DiffHunk], approved_keywords: list[Keyword], +) -> list[FileLocation]: + """Match diff hunks against approved keywords from the Identify phase. + + Applies two layers of filtering before keyword matching: + 1. Extension filter – skip non-source files (docs, configs, translations). + 2. File-scope filter – when a keyword carries ``file_paths`` from Identify, + only consider hunks whose file basename appears in that list. + """ + locations: list[FileLocation] = [] + for hunk in hunks: + if Path(hunk.file_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: + continue + + hunk_basename = Path(hunk.file_path).name + searchable = (hunk.content + " " + hunk.file_path).lower() + + matched: list[str] = [] + for kw in approved_keywords: + if kw.file_paths and not any( + hunk_basename in fp for fp in kw.file_paths + ): + continue + if kw.term.lower() in searchable: + matched.append(kw.term) + + if matched: + locations.append(FileLocation( + file_path=hunk.file_path, + line_number=hunk.source_start_line, + snippet=hunk.content, + matched_keywords=matched, + source="diff", + )) + return locations + + +async def _tantivy_fallback( + approved_keywords: list[Keyword], + lexical_search_fn, + tracer, +) -> list[FileLocation]: + """Fall back to keyword-only Tantivy search when no patched RPM diff is available.""" + locations: list[FileLocation] = [] + for kw in approved_keywords: + with tracer.push_active_function( + f"locate_search_{kw.term}", input_data={"keyword": kw.term} + ): + try: + hits = await lexical_search_fn(kw.term) + except Exception as e: + logger.warning("locate tantivy search failed for '%s': %s", kw.term, e) + hits = [] + if isinstance(hits, str): + hits = [] + for h in hits: + source_path = h["source"] + if Path(source_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: + continue + locations.append(FileLocation( + file_path=source_path, + line_number=None, + snippet=h["content"], + matched_keywords=[kw.term], + source="tantivy", + )) + return locations + + +# --------------------------------------------------------------------------- +# Locate subgraph pipeline +# --------------------------------------------------------------------------- + + +async def build_locate_pipeline( + *, + llm, + identify_report: IdentifyReport, + vuln_id: str, + descriptions: list[tuple[str, str]], + lexical_search_fn, + fix_info: dict, + brew_downloader, + source_dir: Path, + patch_dir: Path, + tracer, + locate_prompt: str = LOCATE_SYSTEM_PROMPT, +) -> LocateReport: + """Locate vulnerable code by diffing patched RPM against target source, + or falling back to keyword-only Tantivy search. + + Steps: + A) Resolve fix NVR from identify_result.fixed_rpm_list. + B) Download patched SRPM via BrewDownloader. + C) Extract to patch_dir. + D) Generate unified diff between source_dir and patch_dir. + E) Parse diff into DiffHunk objects via unidiff. + F) Cross-reference diff hunks with Identify approved keywords. + G) LLM judges each matched location for relevance. + + Fallback: if no fix RPM is available, search Tantivy for each + approved keyword and judge those results instead. + + Parameters + ---------- + llm: + LangChain LLM for per-location judgment. + identify_report: + Output of the Identify phase with approved/rejected keywords. + vuln_id: + CVE identifier. + descriptions: + ``(source_name, text)`` pairs from CVE intel for LLM context. + lexical_search_fn: + ``async (str) -> list[dict]`` -- Tantivy index search. + fix_info: + ``{name, version, release}`` dict from RHSA, or empty dict. + brew_downloader: + ``BrewDownloader`` instance, or ``None`` if Brew is unavailable. + source_dir: + Path to the extracted target source tree. + patch_dir: + Path where the patched source tree will be extracted. + tracer: + Request-scoped ``nat.builder.context.Context`` for span creation. + locate_prompt: + System prompt for per-location LLM judgment. + """ + approved = identify_report.approved + diff_available = False + candidate_locations: list[FileLocation] = [] + + # -- Steps A-F: diff-based path ------------------------------------------- + if fix_info and brew_downloader is not None: + try: + with tracer.push_active_function( + "locate_download_patch", input_data={"fix_info": fix_info} + ) as span: + from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader + + srpm_path = brew_downloader.download_patched_srpm_by_nevra(fix_info["nevra"]) + if srpm_path is None: + srpm_path = brew_downloader.download_patched_srpm( + fix_info["name"], fix_info["version"], fix_info["release"], + ) + if srpm_path is not None: + patch_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(srpm_path, patch_dir) + SourceRPMDownloader.extract_src_rpm(srpm_path, patch_dir) + + diff_text = _generate_tree_diff(source_dir, patch_dir) + diff_output_path = patch_dir.parent / "locate.diff" + diff_output_path.write_text(diff_text, encoding="utf-8") + + hunks = parse_unified_diff(diff_text) + candidate_locations = _cross_reference_hunks(hunks, approved) + diff_available = True + span.set_output({ + "srpm": str(srpm_path), + "hunks_total": len(hunks), + "matched_locations": len(candidate_locations), + }) + else: + span.set_output({"srpm": None, "reason": "patched build not found in Brew"}) + logger.info("locate: patched SRPM not found, falling back to Tantivy") + except Exception as e: + logger.warning("locate: diff path failed (%s), falling back to Tantivy", e) + + # -- Fallback: keyword-only Tantivy search -------------------------------- + if not candidate_locations: + with tracer.push_active_function( + "locate_tantivy_fallback", input_data={"keywords": [k.term for k in approved]} + ): + candidate_locations = await _tantivy_fallback(approved, lexical_search_fn, tracer) + + # -- Step G: LLM judges each candidate location --------------------------- + confirmed: list[FileLocation] = [] + if candidate_locations: + judge_llm = llm.with_structured_output(LocationJudgment) + cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) + keywords_text = ", ".join(kw.term for kw in approved) + + for loc in candidate_locations: + with tracer.push_active_function( + f"locate_judge_{loc.file_path}", input_data={"file": loc.file_path} + ) as span: + messages = [ + SystemMessage(content=locate_prompt), + HumanMessage(content=( + f"CVE: {vuln_id}\n{cve_context}\n\n" + f"Approved keywords: {keywords_text}\n\n" + f"File: {loc.file_path} (line {loc.line_number or 'N/A'})\n" + f"Source: {loc.source}\n\n" + f"Code region:\n{loc.snippet}" + )), + ] + judgment: LocationJudgment = await judge_llm.ainvoke(messages) + span.set_output({"relevant": judgment.relevant, "confidence": judgment.confidence}) + if judgment.relevant: + confirmed.append(loc) + + # -- Aggregate into LocateReport ------------------------------------------ + is_sufficient = len(confirmed) > 0 + if confirmed: + file_list = ", ".join(sorted({loc.file_path for loc in confirmed})) + evidence = ( + f"Located {len(confirmed)} vulnerability-relevant code region(s) " + f"in: {file_list}." + ) + if diff_available: + evidence += " Evidence sourced from patched RPM diff." + instructions = "" + else: + evidence = "No vulnerability-relevant code locations found in the source tree." + instructions = "No code locations confirmed. Verify phase should assess based on available intel." + + return LocateReport( + locations=confirmed, + diff_available=diff_available, + evidence=evidence, + is_sufficient=is_sufficient, + instructions=instructions, + ) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 272d514a8..a16081f51 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re +from pathlib import Path + from aiq.builder.builder import Builder from aiq.builder.framework_enum import LLMFrameworkEnum from aiq.builder.function_info import FunctionInfo @@ -36,15 +39,46 @@ ) from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, - LOCATE_SYSTEM_PROMPT, - VERIFY_SYSTEM_PROMPT, IdentifyReport, + LocateReport, + VERIFY_SYSTEM_PROMPT, build_identify_subgraph, + build_locate_pipeline, ) +from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch logger = LoggingFactory.get_agent_logger(__name__) +_RPM_NEVRA_RE = re.compile(r"^(.+?)-(?:(\d+):)?(\d\S*?)-(\S+)$") + + +def _parse_fix_info_from_context(ctx, target_name: str) -> dict: + """Extract {name, version, release} from checker_context.identify_result.fixed_rpm_list. + + Handles both epoch and non-epoch NEVRAs: + - With epoch: libpq-0:13.20-1.el8_6.x86_64 + - Without epoch: libpq-13.20-1.el8_6.x86_64 + + Finds the first NEVRA in the list that matches the target package name. + Returns an empty dict if no match is found. + """ + if not ctx or not ctx.identify_result or not ctx.identify_result.fixed_rpm_list: + return {} + for nevra in ctx.identify_result.fixed_rpm_list: + m = _RPM_NEVRA_RE.match(nevra) + if not m: + continue + name = m.group(1) + if name.lower() != target_name.lower(): + continue + version = m.group(3) + release_arch = m.group(4) + release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch + clean_nevra = f"{name}-{version}-{release_arch}" + return {"nevra": clean_nevra, "name": name, "version": version, "release": release} + return {} + class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent"): """ @@ -61,6 +95,10 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent default=".cache/am_cache/code_index", description="Base directory for Tantivy code index storage.", ) + base_rpm_dir: str = Field( + default=".cache/am_cache/rpms", + description="Shared RPM cache directory (for BrewDownloader).", + ) llm_name: str = Field(description="The LLM model to use with the L1 code agent.") tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") max_iterations: int = Field(default=10, description="The maximum number of iterations for the agent.") @@ -72,25 +110,44 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu vuln_id = state.input.scan.vulns[0].vuln_id ctx = state.info.checker_context intel = state.info.intel + target_package = state.input.image.target_package source_key = ctx.source_key index_path = FullTextSearch.get_index_directory(config.base_code_index_dir, source_key) fts = FullTextSearch(cache_path=str(index_path)) if fts.is_empty(): raise ValueError(f"Invalid code index at: {index_path}, index is empty") + async def lexical_search_fn(query: str) -> list: return fts.search_index(query, top_k=5) + # -- Locate setup: fix info + BrewDownloader + paths ----------------------- + aIntel = intel[0] + fix_info = _parse_fix_info_from_context(ctx, target_package.name) + checker_dir = Path(config.base_checker_dir) / source_key + source_dir = checker_dir / "source" + patch_dir = checker_dir / "patch" + + brew_downloader = None + if fix_info: + try: + brew_downloader = BrewDownloader( + BrewProfileType.INTERNAL, config.base_rpm_dir, str(checker_dir), + ) + brew_downloader.connect() + except BrewDownloaderError as e: + logger.warning("locate: BrewDownloader init failed (%s), diff path unavailable", e) + brew_downloader = None + + descriptions: list[tuple[str, str]] = [] + if aIntel.ghsa: + cve_text = aIntel.ghsa.description or aIntel.ghsa.summary or "" + if cve_text: + descriptions.append(("ghsa", cve_text)) + if aIntel.ubuntu and aIntel.ubuntu.description: + descriptions.append(("ubuntu", aIntel.ubuntu.description)) + async def identify_node(state: CodeAgentState) -> dict: logger.info("identify_node: starting") - aIntel = intel[0] - - descriptions: list[tuple[str, str]] = [] - if aIntel.ghsa: - cve_text = aIntel.ghsa.description or aIntel.ghsa.summary or "" - if cve_text: - descriptions.append(("ghsa", cve_text)) - if aIntel.ubuntu and aIntel.ubuntu.description: - descriptions.append(("ubuntu", aIntel.ubuntu.description)) with tracer.push_active_function("identify", input_data={"vuln_id": vuln_id}) as span: report: IdentifyReport = await build_identify_subgraph( @@ -111,12 +168,48 @@ async def identify_node(state: CodeAgentState) -> dict: async def locate_node(state: CodeAgentState) -> dict: logger.info("locate_node: starting") + identify_report = state.get("identify_report") + if identify_report is None or not identify_report.approved: + logger.info("locate_node: no approved keywords, skipping") + return { + "locate_report": LocateReport( + locations=[], diff_available=False, + evidence="Skipped: no approved keywords from Identify phase.", + is_sufficient=False, instructions="No keywords to locate.", + ), + "messages": [AIMessage(content="Locate skipped: no approved keywords.")], + } + with tracer.push_active_function( - "locate", input_data={"messages_count": len(state["messages"])} - ): - result = "Locate phase complete: vulnerable code paths mapped." - logger.info("locate_node: %s", result) - return {"messages": [AIMessage(content=result)]} + "locate", + input_data={ + "approved_keywords": [k.term for k in identify_report.approved], + "fix_info": fix_info, + }, + ) as span: + report: LocateReport = await build_locate_pipeline( + llm=llm, + identify_report=identify_report, + vuln_id=vuln_id, + descriptions=descriptions, + lexical_search_fn=lexical_search_fn, + fix_info=fix_info, + brew_downloader=brew_downloader, + source_dir=source_dir, + patch_dir=patch_dir, + tracer=tracer, + ) + span.set_output({ + "locations_count": len(report.locations), + "diff_available": report.diff_available, + "is_sufficient": report.is_sufficient, + }) + if report.diff_available: + ctx.artifacts.patch_source_dir = patch_dir + ctx.artifacts.patch_diff_path = patch_dir.parent / "locate.diff" + + logger.info("locate_node: %s", report.model_dump_json()) + return {"locate_report": report, "messages": [AIMessage(content=report.evidence)]} async def verify_node(state: CodeAgentState) -> dict: logger.info("verify_node: starting") diff --git a/src/vuln_analysis/tools/brew_downloader.py b/src/vuln_analysis/tools/brew_downloader.py index 987fca0f6..15416f0e8 100644 --- a/src/vuln_analysis/tools/brew_downloader.py +++ b/src/vuln_analysis/tools/brew_downloader.py @@ -245,7 +245,22 @@ def download_patched_srpm(self, name: str, version: str, release: str) -> Path | if build is None: return None return self.download_srpm(build) - + + def download_patched_srpm_by_nevra(self, nevra: str) -> Path | None: + """Download the SRPM for a patched version (from NEVRA). + + Returns the cached SRPM path, or ``None`` if the patched build is not + found in Brew. + """ + build = self._session.getBuild(nevra) + if build is None: + logger.warning("Build not found: %s", nvr) + return None + logger.info( + "Found build %s (id=%s, volume=%s, task=%s)", + build["nvr"], build["id"], build.get("volume_name"), build.get("task_id"), + ) + return self.download_srpm(build) def download_target_artifacts(self, name: str, version: str, release: str, arch: str ) -> AcquiredArtifacts | None: artifacts = AcquiredArtifacts() build = self.search_build(name, version, release) From c31255b10d35f17b551180b1ddcf765452c297a4 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 23 Apr 2026 14:56:04 +0300 Subject: [PATCH 06/46] locate mile stone 2 --- .../functions/code_agent_graph_defs.py | 32 ++++++++--- .../functions/cve_package_code_agent.py | 2 +- src/vuln_analysis/register.py | 53 +++++++++++++++++-- src/vuln_analysis/utils/package_identifier.py | 17 ++++++ 4 files changed, 92 insertions(+), 12 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 079817a2f..234ab8761 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -143,11 +143,11 @@ class LocationJudgment(BaseModel): class LocateReport(ReflectionBase): - """Aggregated report for the Locate phase.""" + """Locate phase report (upstream search: diffs target against fixed SRPM).""" locations: list[FileLocation] = Field( description="Source locations where vulnerable code was found.") diff_available: bool = Field( - description="Whether a patched RPM diff was used for location.") + description="Whether a diff against the fixed SRPM was available for location.") evidence: str = Field( description="Human-readable summary for justification.") @@ -384,9 +384,10 @@ def _generate_tree_diff(source_dir: Path, patch_dir: Path) -> str: _NON_SOURCE_EXTENSIONS = frozenset({ ".md", ".txt", ".rst", ".html", ".xml", ".json", ".yaml", ".yml", - ".sgml", ".po", ".pot", ".spec", ".cfg", ".conf", ".1", ".man", + ".sgml", ".po", ".pot", ".spec", ".cfg", ".conf", ".1", ".man","*.in", }) +LANG_PARSER_EXTENSIONS = {".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", ".ts"} def _cross_reference_hunks( hunks: list[DiffHunk], approved_keywords: list[Keyword], @@ -400,7 +401,7 @@ def _cross_reference_hunks( """ locations: list[FileLocation] = [] for hunk in hunks: - if Path(hunk.file_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: + if Path(hunk.file_path).suffix.lower() not in LANG_PARSER_EXTENSIONS: continue hunk_basename = Path(hunk.file_path).name @@ -521,8 +522,25 @@ async def build_locate_pipeline( diff_available = False candidate_locations: list[FileLocation] = [] - # -- Steps A-F: diff-based path ------------------------------------------- - if fix_info and brew_downloader is not None: + # -- Check for cached diff from a prior run -------------------------------- + diff_output_path = patch_dir.parent / "locate.diff" + if diff_output_path.exists() and patch_dir.exists() and any(patch_dir.iterdir()): + with tracer.push_active_function( + "locate_cached_diff", input_data={"diff_path": str(diff_output_path)} + ) as span: + diff_text = diff_output_path.read_text(encoding="utf-8") + hunks = parse_unified_diff(diff_text) + candidate_locations = _cross_reference_hunks(hunks, approved) + diff_available = True + span.set_output({ + "source": "cache", + "hunks_total": len(hunks), + "matched_locations": len(candidate_locations), + }) + logger.info("locate: using cached diff (%d hunks, %d matched)", len(hunks), len(candidate_locations)) + + # -- Steps A-F: diff-based path (only if no cache) ------------------------- + elif fix_info and brew_downloader is not None: try: with tracer.push_active_function( "locate_download_patch", input_data={"fix_info": fix_info} @@ -569,7 +587,6 @@ async def build_locate_pipeline( if candidate_locations: judge_llm = llm.with_structured_output(LocationJudgment) cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) - keywords_text = ", ".join(kw.term for kw in approved) for loc in candidate_locations: with tracer.push_active_function( @@ -579,7 +596,6 @@ async def build_locate_pipeline( SystemMessage(content=locate_prompt), HumanMessage(content=( f"CVE: {vuln_id}\n{cve_context}\n\n" - f"Approved keywords: {keywords_text}\n\n" f"File: {loc.file_path} (line {loc.line_number or 'N/A'})\n" f"Source: {loc.source}\n\n" f"Code region:\n{loc.snippet}" diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index a16081f51..b183ec8d9 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -231,7 +231,7 @@ async def verify_node(state: CodeAgentState) -> dict: flow.add_edge("verify", END) app = flow.compile() - app.get_graph().draw_mermaid_png(output_file_path="code_agent_flow.png") + #app.get_graph().draw_mermaid_png(output_file_path="code_agent_flow.png") return app diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index b9eff0d15..1b0ee7a30 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -24,11 +24,11 @@ from pydantic import Field from exploit_iq_commons.data_models.common import PipelineMode +from exploit_iq_commons.data_models.checker_status import PackageCheckerStatus, PACKAGE_CHECKER_STATUS_DESCRIPTIONS from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput from exploit_iq_commons.data_models.input import AgentMorpheusInput from exploit_iq_commons.data_models.info import AgentMorpheusInfo -from vuln_analysis.data_models.output import AgentMorpheusOutput -from vuln_analysis.data_models.output import OutputPayload +from vuln_analysis.data_models.output import AgentMorpheusEngineOutput, AgentMorpheusOutput, JustificationOutput, OutputPayload from vuln_analysis.data_models.state import AgentMorpheusEngineState # pylint: disable=unused-import from vuln_analysis.functions import cve_agent @@ -285,6 +285,44 @@ async def code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutpu output=OutputPayload(analysis=[], vex=None), ) + @catch_pipeline_errors_async + async def checker_early_exit_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + """Produces a proper output when source_acquisition exits with a non-OK status.""" + ctx = state.info.checker_context + status = ctx.status if ctx else None + reason = ( + PACKAGE_CHECKER_STATUS_DESCRIPTIONS[status] + if status is not None and status in PACKAGE_CHECKER_STATUS_DESCRIPTIONS + else f"Checker exited early with status {status}" + ) + logger.info("checker_early_exit: status=%s reason=%s", status, reason) + analysis = [ + AgentMorpheusEngineOutput( + vuln_id=v.vuln_id, + checklist=[], + summary=reason, + justification=JustificationOutput( + label="not_vulnerable" if status == PackageCheckerStatus.PKG_IDENT_NOT_VUL else "error", + reason=reason, + status="FALSE" if status == PackageCheckerStatus.PKG_IDENT_NOT_VUL else "UNKNOWN", + ), + intel_score=0, + cvss=None, + ) + for v in state.input.scan.vulns + ] + return AgentMorpheusOutput( + input=state.input, info=state.info, + output=OutputPayload(analysis=analysis, vex=None), + ) + + def route_after_source_acquisition(state: AgentMorpheusEngineInput): + """Route to checker_segmentation (happy path) or early exit on non-OK status.""" + ctx = state.info.checker_context + if ctx and ctx.status == PackageCheckerStatus.OK: + return "checker_segmentation" + return "checker_early_exit" + def route_after_add_start_time(state: AgentMorpheusInput): """Route to full pipeline or package checker based on pipeline_mode.""" if state.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER: @@ -338,6 +376,7 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph_builder.add_node("checker_fetch_intel", checker_fetch_intel_node) graph_builder.add_node("source_acquisition", source_acquisition_node) + graph_builder.add_node("checker_early_exit", checker_early_exit_node) graph_builder.add_node("checker_segmentation", checker_segmentation_node) graph_builder.add_node("code_agent", code_agent_node) @@ -365,7 +404,15 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph_builder.add_edge("checker_init_state", "checker_fetch_intel") graph_builder.add_edge("checker_fetch_intel", "source_acquisition") - graph_builder.add_edge("source_acquisition", "checker_segmentation") + graph_builder.add_conditional_edges( + "source_acquisition", + route_after_source_acquisition, + { + "checker_segmentation": "checker_segmentation", + "checker_early_exit": "checker_early_exit", + }, + ) + graph_builder.add_edge("checker_early_exit", "add_completed_time") graph_builder.add_edge("checker_segmentation", "code_agent") graph_builder.add_edge("code_agent", "add_completed_time") diff --git a/src/vuln_analysis/utils/package_identifier.py b/src/vuln_analysis/utils/package_identifier.py index 523a9cf7d..a43eac087 100644 --- a/src/vuln_analysis/utils/package_identifier.py +++ b/src/vuln_analysis/utils/package_identifier.py @@ -25,6 +25,13 @@ logger = LoggingFactory.get_agent_logger(__name__) _RPM_NEVRA_RE = re.compile(r"^(.+?)-(\d+):(.+?)-(.+)$") +_DIST_TAG_RE = re.compile(r"(el\d+)") + + +def _extract_dist_tag(release: str) -> str | None: + """Extract the RHEL dist-tag family (e.g. 'el8') from a release string.""" + m = _DIST_TAG_RE.search(release) + return m.group(1) if m else None @@ -128,6 +135,16 @@ def _is_target_package_fixed(self, intel: CveIntel, package_identify: PackageIde try: target_nvr = f"{target_version}-{target_release}" fix_nvr = f"{fix['version']}-{fix['release']}" + + target_dist = _extract_dist_tag(target_release) if target_release else None + fix_dist = _extract_dist_tag(fix["release"]) + if target_dist and fix_dist and target_dist != fix_dist: + logger.debug( + "Cross-stream fix comparison skipped: target=%s fix=%s", + target_dist, fix_dist, + ) + return EnumIdentifyResult.UNKNOWN + if versions.RpmVersion(target_nvr) >= versions.RpmVersion(fix_nvr): return EnumIdentifyResult.YES return EnumIdentifyResult.NO From 8f26f2a288f1cd8ed84d81d7c0c32d2c7b8717db Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 23 Apr 2026 14:00:52 +0000 Subject: [PATCH 07/46] verify step 1 --- .../functions/code_agent_graph_defs.py | 324 +++++++++++++++++- .../functions/cve_package_code_agent.py | 77 ++++- 2 files changed, 382 insertions(+), 19 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 234ab8761..51b8fb550 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -23,6 +23,7 @@ from __future__ import annotations import logging +import re import shutil import subprocess from pathlib import Path @@ -44,6 +45,7 @@ class CodeAgentState(MessagesState): """LangGraph state for the Identify -> Locate -> Verify graph.""" identify_report: NotRequired[IdentifyReport | None] locate_report: NotRequired[LocateReport | None] + verify_report: NotRequired[VerifyReport | None] # --------------------------------------------------------------------------- @@ -152,6 +154,46 @@ class LocateReport(ReflectionBase): description="Human-readable summary for justification.") +# --------------------------------------------------------------------------- +# Verify schemas +# --------------------------------------------------------------------------- + + +class DownstreamPatchDetail(BaseModel): + """Extracted from a .patch file: shows vulnerable code and the fix.""" + patch_filename: str + file_path: str + vulnerable_code: str = Field( + description="Context lines (no +/- prefix) around the change") + fix_code: str = Field( + description="Added lines (+ prefix stripped)") + line_number: int | None = None + + +class PatchEvidence(BaseModel): + """Evidence of a downstream patch for the CVE.""" + source: Literal[ + "spec_patch_directive", "spec_changelog", "patch_file_content", "build_log" + ] + detail: str + confidence: float = Field(ge=0.0, le=1.0) + patch_details: list[DownstreamPatchDetail] = Field(default_factory=list) + + +class ChangelogJudgment(BaseModel): + """LLM output: judge whether changelog entries describe a fix for the CVE.""" + relevant: bool + confidence: float = Field(ge=0.0, le=1.0) + reasoning: str + + +class VerifyReport(ReflectionBase): + """Verify phase report (downstream search: checks this build's patches and changelog).""" + verdict: Literal["PATCHED", "VULNERABLE", "INCONCLUSIVE"] + evidence: list[PatchEvidence] + summary: str + + # --------------------------------------------------------------------------- # Prompt templates # --------------------------------------------------------------------------- @@ -203,11 +245,21 @@ class LocateReport(ReflectionBase): "that match the CVE description." ) -VERIFY_SYSTEM_PROMPT = ( - "You are a vulnerability analyst performing the **Verify** phase.\n" - "Review the located code paths and confirm whether the vulnerability\n" - "is exploitable in the target package. Produce a final verdict with\n" - "supporting evidence (code snippets, call chains, patch applicability)." +VERIFY_CHANGELOG_PROMPT = ( + "You are a vulnerability analyst performing the **Verify** phase.\n\n" + "You will receive:\n" + "1. A CVE ID and its description.\n" + "2. Recent changelog entries from an RPM .spec file.\n\n" + "Your task is to determine whether any changelog entry describes a fix " + "or backport for this specific CVE. Entries may reference CVEs explicitly " + "(e.g. 'Fixes: CVE-2026-5121') or describe the fix without naming the CVE " + "(e.g. 'add bounds check in iso9660 ZER header parsing').\n\n" + "RULES:\n" + "- An explicit CVE ID mention is strong evidence (confidence >= 0.9).\n" + "- A description that matches the vulnerability semantics without naming " + "the CVE is weaker evidence (confidence 0.5-0.8).\n" + "- Unrelated entries should be marked as not relevant.\n\n" + "Output a structured judgment." ) @@ -628,3 +680,265 @@ async def build_locate_pipeline( is_sufficient=is_sufficient, instructions=instructions, ) + + +# --------------------------------------------------------------------------- +# Verify pipeline +# --------------------------------------------------------------------------- + +_SPEC_PATCH_RE = re.compile(r"^Patch(\d+)\s*:\s*(.+)$", re.IGNORECASE) +_BUILDLOG_PATCH_RE = re.compile( + r"/usr/bin/cat\s+/builddir/build/SOURCES/(\S+\.patch)", +) + + +def _parse_spec_patch_directives( + inspector, spec_path: Path, +) -> list[tuple[int, str, str]]: + """Return ``[(index, filename, raw_line), ...]`` from ``PatchN:`` lines.""" + matches = inspector.grep_content(_SPEC_PATCH_RE.pattern, spec_path) + results: list[tuple[int, str, str]] = [] + for m in matches: + hit = _SPEC_PATCH_RE.match(m.line_content.strip()) + if hit: + results.append((int(hit.group(1)), hit.group(2).strip(), m.line_content.strip())) + return results + + +def _extract_spec_changelog(inspector, spec_path: Path) -> str | None: + """Return text after the ``%changelog`` directive, or ``None``.""" + content = inspector.read_file(spec_path) + idx = content.find("%changelog") + if idx == -1: + return None + return content[idx + len("%changelog"):] + + +def _parse_build_log_applied_patches( + inspector, build_log_path: Path, +) -> list[str]: + """Return patch filenames that were applied during the RPM build.""" + matches = inspector.grep_content(_BUILDLOG_PATCH_RE.pattern, build_log_path) + filenames: list[str] = [] + for m in matches: + hit = _BUILDLOG_PATCH_RE.search(m.line_content) + if hit: + filenames.append(hit.group(1)) + return filenames + + +def _extract_patch_details( + patch_path: Path, locate_report: LocateReport | None, +) -> list[DownstreamPatchDetail]: + """Parse a ``.patch`` file into ``DownstreamPatchDetail`` entries.""" + try: + diff_text = patch_path.read_text(encoding="utf-8", errors="replace") + patch_set = PatchSet.from_string(diff_text) + except Exception: + logger.warning("verify: failed to parse patch file %s", patch_path) + return [] + + locate_files = set() + if locate_report: + locate_files = {loc.file_path for loc in locate_report.locations} + + details: list[DownstreamPatchDetail] = [] + for patched_file in patch_set: + if patched_file.is_binary_file: + continue + + file_path = patched_file.path + if locate_files and not any( + Path(file_path).name in lf for lf in locate_files + ): + continue + + for hunk in patched_file: + context_lines: list[str] = [] + added_lines: list[str] = [] + for line in hunk: + if line.is_context: + context_lines.append(str(line.value).rstrip("\n")) + elif line.is_added: + added_lines.append(str(line.value).rstrip("\n")) + + if added_lines: + details.append(DownstreamPatchDetail( + patch_filename=patch_path.name, + file_path=file_path, + vulnerable_code="\n".join(context_lines[-10:]), + fix_code="\n".join(added_lines), + line_number=hunk.target_start, + )) + return details + + +async def build_verify_pipeline( + *, + inspector, + llm, + vuln_id: str, + descriptions: list[tuple[str, str]], + identify_report: IdentifyReport | None, + locate_report: LocateReport | None, + build_log_path: Path | None, + tracer, +) -> VerifyReport: + """Check whether the target RPM build already applied a downstream patch. + + Three subphases: + 1. **Spec + build-log scan** (deterministic) -- grep for CVE ID in spec + patch directives, changelog, and build log. + 2. **LLM changelog interpretation** -- only when subphase 1 is + inconclusive; feeds changelog entries to the LLM. + 3. **Patch-file evidence extraction** -- parse ``.patch`` files, cross-ref + against Locate results, and extract vulnerable/fix code. + + Parameters + ---------- + inspector: + ``SourceInspector`` scoped to the extracted source directory. + llm: + LangChain LLM for changelog interpretation. + vuln_id: + CVE identifier (e.g. ``"CVE-2026-5121"``). + descriptions: + ``(source_name, text)`` pairs from CVE intel. + identify_report: + Output of the Identify phase (may be ``None``). + locate_report: + Output of the Locate phase (may be ``None``). + build_log_path: + Path to the Koji/Brew build log, or ``None``. + tracer: + Request-scoped tracing context. + """ + evidence: list[PatchEvidence] = [] + cve_pattern = re.escape(vuln_id) + + # ── Subphase 1: deterministic spec + build-log scan ─────────────────── + with tracer.push_active_function( + "verify_spec_scan", input_data={"vuln_id": vuln_id} + ) as span: + spec_files = inspector.find_files("*.spec", recursive=False) + spec_path = spec_files[0] if spec_files else None + + if spec_path: + directives = _parse_spec_patch_directives(inspector, spec_path) + for idx, filename, raw_line in directives: + if re.search(cve_pattern, filename, re.IGNORECASE): + evidence.append(PatchEvidence( + source="spec_patch_directive", + detail=f"Patch{idx}: {filename}", + confidence=0.95, + )) + + changelog = _extract_spec_changelog(inspector, spec_path) + if changelog and re.search(cve_pattern, changelog, re.IGNORECASE): + evidence.append(PatchEvidence( + source="spec_changelog", + detail=f"CVE ID found in %changelog", + confidence=0.95, + )) + + if build_log_path and build_log_path.exists(): + from vuln_analysis.tools.source_inspector import SourceInspector as _SI + blog_inspector = _SI(build_log_path.parent) + applied = _parse_build_log_applied_patches(blog_inspector, build_log_path) + for patch_name in applied: + if re.search(cve_pattern, patch_name, re.IGNORECASE): + evidence.append(PatchEvidence( + source="build_log", + detail=f"Patch applied during build: {patch_name}", + confidence=0.98, + )) + + span.set_output({"evidence_count": len(evidence)}) + + # ── Subphase 2: LLM changelog interpretation (if subphase 1 empty) ─── + if not evidence and spec_path: + changelog = changelog if spec_path and changelog else _extract_spec_changelog(inspector, spec_path) # type: ignore[possibly-undefined] + if changelog: + recent_entries = "\n".join(changelog.strip().splitlines()[:50]) + with tracer.push_active_function( + "verify_llm_changelog", input_data={"vuln_id": vuln_id} + ) as span: + cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) + judge_llm = llm.with_structured_output(ChangelogJudgment) + messages = [ + SystemMessage(content=VERIFY_CHANGELOG_PROMPT), + HumanMessage(content=( + f"CVE: {vuln_id}\n{cve_context}\n\n" + f"Changelog entries:\n{recent_entries}" + )), + ] + judgment: ChangelogJudgment = await judge_llm.ainvoke(messages) + span.set_output({ + "relevant": judgment.relevant, + "confidence": judgment.confidence, + }) + if judgment.relevant: + evidence.append(PatchEvidence( + source="spec_changelog", + detail=f"LLM: {judgment.reasoning}", + confidence=judgment.confidence, + )) + + # ── Subphase 3: patch-file evidence extraction ──────────────────────── + with tracer.push_active_function( + "verify_patch_extraction", input_data={"vuln_id": vuln_id} + ) as span: + patch_files = inspector.find_files("*.patch", recursive=False) + cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)] + + if cve_patches: + for pp in cve_patches: + details = _extract_patch_details(pp, locate_report) + if details: + evidence.append(PatchEvidence( + source="patch_file_content", + detail=f"Patch {pp.name}: {len(details)} hunk(s) with fix code", + confidence=0.95, + patch_details=details, + )) + elif patch_files and locate_report and locate_report.locations: + for pp in patch_files: + details = _extract_patch_details(pp, locate_report) + if details: + evidence.append(PatchEvidence( + source="patch_file_content", + detail=f"Patch {pp.name}: overlaps Locate files ({len(details)} hunk(s))", + confidence=0.6, + patch_details=details, + )) + + span.set_output({ + "cve_patches": len(cve_patches), + "total_patches": len(patch_files), + "evidence_items": len(evidence), + }) + + # ── Aggregate verdict ───────────────────────────────────────────────── + if evidence: + max_conf = max(e.confidence for e in evidence) + verdict: Literal["PATCHED", "VULNERABLE", "INCONCLUSIVE"] = ( + "PATCHED" if max_conf >= 0.8 else "INCONCLUSIVE" + ) + else: + verdict = "VULNERABLE" + + patch_sources = ", ".join(sorted({e.source for e in evidence})) if evidence else "none" + summary = ( + f"Verify verdict={verdict} for {vuln_id}. " + f"Evidence sources: {patch_sources}. " + f"{len(evidence)} evidence item(s) found." + ) + + return VerifyReport( + verdict=verdict, + evidence=evidence, + summary=summary, + is_sufficient=verdict != "INCONCLUSIVE", + instructions="" if verdict != "INCONCLUSIVE" + else "Downstream evidence is ambiguous; manual review recommended.", + ) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index b183ec8d9..f8894e27e 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -15,6 +15,7 @@ import re from pathlib import Path +from typing import Literal from aiq.builder.builder import Builder from aiq.builder.framework_enum import LLMFrameworkEnum @@ -41,9 +42,10 @@ CodeAgentState, IdentifyReport, LocateReport, - VERIFY_SYSTEM_PROMPT, + VerifyReport, build_identify_subgraph, build_locate_pipeline, + build_verify_pipeline, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch @@ -213,12 +215,29 @@ async def locate_node(state: CodeAgentState) -> dict: async def verify_node(state: CodeAgentState) -> dict: logger.info("verify_node: starting") - with tracer.push_active_function( - "verify", input_data={"messages_count": len(state["messages"])} - ): - result = "Verify phase complete: exploit reachability confirmed." - logger.info("verify_node: %s", result) - return {"messages": [AIMessage(content=result)]} + from vuln_analysis.tools.source_inspector import SourceInspector + + identify_report = state.get("identify_report") + locate_report = state.get("locate_report") + build_log = ctx.artifacts.build_log_path if ctx and ctx.artifacts else None + + inspector = SourceInspector(source_dir) + report: VerifyReport = await build_verify_pipeline( + inspector=inspector, + llm=llm, + vuln_id=vuln_id, + descriptions=descriptions, + identify_report=identify_report, + locate_report=locate_report, + build_log_path=Path(build_log) if build_log else None, + tracer=tracer, + ) + + logger.info("verify_node: %s", report.model_dump_json()) + return { + "verify_report": report, + "messages": [AIMessage(content=report.summary)], + } flow = StateGraph(CodeAgentState) flow.add_node("identify", identify_node) @@ -235,9 +254,39 @@ async def verify_node(state: CodeAgentState) -> dict: return app -def _build_stub_analysis( +_StatusLiteral = Literal["TRUE", "FALSE", "UNKNOWN"] + +_VERDICT_TO_JUSTIFICATION: dict[str, tuple[str, _StatusLiteral]] = { + "PATCHED": ("code_not_present", "FALSE"), + "VULNERABLE": ("vulnerable", "TRUE"), + "INCONCLUSIVE": ("code_not_present", "UNKNOWN"), +} + + +def _build_analysis( message: AgentMorpheusEngineInput, result: dict, ) -> list[AgentMorpheusEngineOutput]: + verify_report: VerifyReport | None = result.get("verify_report") + + if verify_report is not None: + label, status = _VERDICT_TO_JUSTIFICATION.get( + verify_report.verdict, ("code_not_present", "UNKNOWN"), + ) + patch_snippets: list[str] = [] + for ev in verify_report.evidence: + for pd in ev.patch_details: + patch_snippets.append( + f"[{pd.patch_filename}] {pd.file_path}:{pd.line_number}\n" + f" fix: {pd.fix_code[:200]}" + ) + reason = verify_report.summary + if patch_snippets: + reason += "\n\nDownstream patch details:\n" + "\n".join(patch_snippets[:5]) + else: + label = "code_not_present" + status: _StatusLiteral = "UNKNOWN" + reason = "Verify phase did not produce a report." + last_msg = result["messages"][-1].content if result.get("messages") else "no result" return [ @@ -245,15 +294,15 @@ def _build_stub_analysis( vuln_id=intel.vuln_id, checklist=[ ChecklistItemOutput( - input="L1 stub checklist item", + input="L1 Package Code Agent analysis", response=last_msg, ), ], - summary=f"Stub summary for {intel.vuln_id}: {last_msg}", + summary=verify_report.summary if verify_report else f"No verify report for {intel.vuln_id}", justification=JustificationOutput( - label="code_not_present", - reason="Stub L1 agent -- no real analysis performed yet.", - status="UNKNOWN", + label=label, + reason=reason, + status=status, ), intel_score=0, cvss=None, @@ -285,7 +334,7 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: return AgentMorpheusOutput( input=message.input, info=message.info, - output=OutputPayload(analysis=_build_stub_analysis(message, result), vex=None), + output=OutputPayload(analysis=_build_analysis(message, result), vex=None), ) yield FunctionInfo.from_fn( From 436d75feaa776cbbb881e8a95a0305e599dc73eb Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sat, 25 Apr 2026 10:44:04 +0300 Subject: [PATCH 08/46] clear labels --- src/vuln_analysis/functions/cve_package_code_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index f8894e27e..738be5d52 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -257,9 +257,9 @@ async def verify_node(state: CodeAgentState) -> dict: _StatusLiteral = Literal["TRUE", "FALSE", "UNKNOWN"] _VERDICT_TO_JUSTIFICATION: dict[str, tuple[str, _StatusLiteral]] = { - "PATCHED": ("code_not_present", "FALSE"), + "PATCHED": ("protected_by_mitigating_control", "FALSE"), "VULNERABLE": ("vulnerable", "TRUE"), - "INCONCLUSIVE": ("code_not_present", "UNKNOWN"), + "INCONCLUSIVE": ("uncertain", "UNKNOWN"), } @@ -270,7 +270,7 @@ def _build_analysis( if verify_report is not None: label, status = _VERDICT_TO_JUSTIFICATION.get( - verify_report.verdict, ("code_not_present", "UNKNOWN"), + verify_report.verdict, ("uncertain", "UNKNOWN"), ) patch_snippets: list[str] = [] for ev in verify_report.evidence: From 50638eef2daf4d7667148ec3051abf834205f927 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 27 Apr 2026 13:26:08 +0000 Subject: [PATCH 09/46] last changes --- .../functions/code_agent_graph_defs.py | 206 ++++++++++-------- 1 file changed, 112 insertions(+), 94 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 51b8fb550..7de9cc113 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -87,6 +87,10 @@ class Keyword(BaseModel): default_factory=list, description="Source files where this keyword was found during Identify", ) + hit_content: list[str] = Field( + default_factory=list, + description="Snippet content from Tantivy hits (parallel to file_paths)", + ) class KeywordJudgment(BaseModel): @@ -100,6 +104,10 @@ class KeywordJudgment(BaseModel): default_factory=list, description="Source files where this keyword was found during Identify", ) + hit_content: list[str] = Field( + default_factory=list, + description="Snippet content from Tantivy hits (parallel to file_paths)", + ) class IdentifyReport(ReflectionBase): @@ -152,6 +160,10 @@ class LocateReport(ReflectionBase): description="Whether a diff against the fixed SRPM was available for location.") evidence: str = Field( description="Human-readable summary for justification.") + evidence_level: Literal["high", "medium", "low"] = Field( + default="medium", + description="high=diff+LLM confirmed, medium=keyword hits only, low=no locations", + ) # --------------------------------------------------------------------------- @@ -372,6 +384,7 @@ async def build_identify_subgraph( ] judgment: KeywordJudgment = await judge_llm.ainvoke(messages) judgment.file_paths = hit_files + judgment.hit_content = [h["content"][:MAX_CONTENT_CHARS] for h in hits if "content" in h] span.set_output({"approved": judgment.approved, "confidence": judgment.confidence}) judgments.append(judgment) @@ -382,6 +395,7 @@ async def build_identify_subgraph( kw_obj = Keyword( term=j.term, keyword_type=j.keyword_type, confidence=j.confidence, file_paths=j.file_paths, + hit_content=j.hit_content, ) if j.approved: approved.append(kw_obj) @@ -479,42 +493,47 @@ def _cross_reference_hunks( return locations -async def _tantivy_fallback( - approved_keywords: list[Keyword], - lexical_search_fn, - tracer, -) -> list[FileLocation]: - """Fall back to keyword-only Tantivy search when no patched RPM diff is available.""" +def _tantivy_candidates(approved_keywords: list[Keyword]) -> list[FileLocation]: + """Build FileLocation objects from Identify phase hits (no re-search). + + Uses hit_content stored on each Keyword during the Identify phase, + avoiding redundant Tantivy queries. + """ locations: list[FileLocation] = [] for kw in approved_keywords: - with tracer.push_active_function( - f"locate_search_{kw.term}", input_data={"keyword": kw.term} - ): - try: - hits = await lexical_search_fn(kw.term) - except Exception as e: - logger.warning("locate tantivy search failed for '%s': %s", kw.term, e) - hits = [] - if isinstance(hits, str): - hits = [] - for h in hits: - source_path = h["source"] - if Path(source_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: - continue - locations.append(FileLocation( - file_path=source_path, - line_number=None, - snippet=h["content"], - matched_keywords=[kw.term], - source="tantivy", - )) + for file_path, content in zip(kw.file_paths, kw.hit_content): + if Path(file_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: + continue + locations.append(FileLocation( + file_path=file_path, + line_number=None, + snippet=content, + matched_keywords=[kw.term], + source="tantivy", + )) return locations # --------------------------------------------------------------------------- # Locate subgraph pipeline # --------------------------------------------------------------------------- - +def downloand_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, source_dir: Path, patch_dir: Path) -> Path: + """Download the patched SRPM and generate the diff file between the source and the patched SRPM.""" + from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader + + srpm_path = brew_downloader.download_patched_srpm_by_nevra(fix_info["nevra"]) + if srpm_path is None: + srpm_path = brew_downloader.download_patched_srpm(fix_info["name"], fix_info["version"], fix_info["release"],) + if srpm_path is not None: + patch_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(srpm_path, patch_dir) + SourceRPMDownloader.extract_src_rpm(srpm_path, patch_dir) + + diff_text = _generate_tree_diff(source_dir, patch_dir) + diff_output_path = patch_dir.parent / "locate.diff" + diff_output_path.write_text(diff_text, encoding="utf-8") + return diff_output_path + return None async def build_locate_pipeline( *, @@ -574,69 +593,36 @@ async def build_locate_pipeline( diff_available = False candidate_locations: list[FileLocation] = [] - # -- Check for cached diff from a prior run -------------------------------- + # -- Compare the upstream files with the fixed rpm files if exist using the diff file -------------------------------- diff_output_path = patch_dir.parent / "locate.diff" - if diff_output_path.exists() and patch_dir.exists() and any(patch_dir.iterdir()): - with tracer.push_active_function( - "locate_cached_diff", input_data={"diff_path": str(diff_output_path)} - ) as span: + diff_file_exists = False + if diff_output_path.exists(): + diff_file_exists = True + elif fix_info and brew_downloader is not None: + diff_output_path = downloand_patch_and_gen_diff(fix_info, brew_downloader, source_dir, patch_dir) + if diff_output_path is not None: + diff_file_exists = True + + if diff_file_exists: + with tracer.push_active_function("locate_diff_file", input_data={"diff_path": str(diff_output_path)}) as span: diff_text = diff_output_path.read_text(encoding="utf-8") hunks = parse_unified_diff(diff_text) candidate_locations = _cross_reference_hunks(hunks, approved) diff_available = True span.set_output({ - "source": "cache", + "source": "diff_file", "hunks_total": len(hunks), - "matched_locations": len(candidate_locations), + "upstream_locations_candidate_count": len(candidate_locations), }) - logger.info("locate: using cached diff (%d hunks, %d matched)", len(hunks), len(candidate_locations)) + logger.info("locate: using cached diff (%d hunks, %d matched)", len(hunks), len(candidate_locations)) + else: + logger.info("locate: no diff file found, falling back to Tantivy") - # -- Steps A-F: diff-based path (only if no cache) ------------------------- - elif fix_info and brew_downloader is not None: - try: - with tracer.push_active_function( - "locate_download_patch", input_data={"fix_info": fix_info} - ) as span: - from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader - - srpm_path = brew_downloader.download_patched_srpm_by_nevra(fix_info["nevra"]) - if srpm_path is None: - srpm_path = brew_downloader.download_patched_srpm( - fix_info["name"], fix_info["version"], fix_info["release"], - ) - if srpm_path is not None: - patch_dir.mkdir(parents=True, exist_ok=True) - shutil.copy2(srpm_path, patch_dir) - SourceRPMDownloader.extract_src_rpm(srpm_path, patch_dir) - - diff_text = _generate_tree_diff(source_dir, patch_dir) - diff_output_path = patch_dir.parent / "locate.diff" - diff_output_path.write_text(diff_text, encoding="utf-8") - - hunks = parse_unified_diff(diff_text) - candidate_locations = _cross_reference_hunks(hunks, approved) - diff_available = True - span.set_output({ - "srpm": str(srpm_path), - "hunks_total": len(hunks), - "matched_locations": len(candidate_locations), - }) - else: - span.set_output({"srpm": None, "reason": "patched build not found in Brew"}) - logger.info("locate: patched SRPM not found, falling back to Tantivy") - except Exception as e: - logger.warning("locate: diff path failed (%s), falling back to Tantivy", e) - - # -- Fallback: keyword-only Tantivy search -------------------------------- - if not candidate_locations: - with tracer.push_active_function( - "locate_tantivy_fallback", input_data={"keywords": [k.term for k in approved]} - ): - candidate_locations = await _tantivy_fallback(approved, lexical_search_fn, tracer) + tantivy_locations = _tantivy_candidates(approved) # -- Step G: LLM judges each candidate location --------------------------- confirmed: list[FileLocation] = [] - if candidate_locations: + if candidate_locations or tantivy_locations: judge_llm = llm.with_structured_output(LocationJudgment) cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) @@ -659,25 +645,57 @@ async def build_locate_pipeline( confirmed.append(loc) # -- Aggregate into LocateReport ------------------------------------------ - is_sufficient = len(confirmed) > 0 - if confirmed: - file_list = ", ".join(sorted({loc.file_path for loc in confirmed})) - evidence = ( - f"Located {len(confirmed)} vulnerability-relevant code region(s) " - f"in: {file_list}." - ) - if diff_available: - evidence += " Evidence sourced from patched RPM diff." - instructions = "" - else: - evidence = "No vulnerability-relevant code locations found in the source tree." - instructions = "No code locations confirmed. Verify phase should assess based on available intel." + with tracer.push_active_function( + "locate_aggregate", + input_data={ + "confirmed_count": len(confirmed), + "tantivy_count": len(tantivy_locations), + }, + ) as span: + if confirmed: + final_locations = confirmed + evidence_level: Literal["high", "medium", "low"] = "high" + file_list = ", ".join(sorted({loc.file_path for loc in confirmed})) + evidence = ( + f"Located {len(confirmed)} vulnerability-relevant code region(s) " + f"in: {file_list}. Evidence sourced from patched RPM diff (high confidence)." + ) + instructions = "" + elif tantivy_locations: + final_locations = tantivy_locations + evidence_level = "medium" + file_list = ", ".join(sorted({loc.file_path for loc in tantivy_locations})) + evidence = ( + f"Located {len(tantivy_locations)} candidate code region(s) via keyword search " + f"in: {file_list}. No upstream diff available; downstream verification required." + ) + instructions = "Keyword-only locations require downstream patch/changelog verification." + else: + final_locations = [] + evidence_level = "low" + evidence = "No vulnerability-relevant code locations found in the source tree." + instructions = "No code locations found. Verify phase should assess based on available intel." + + span.set_output({ + "source": "diff" if confirmed else ("tantivy" if tantivy_locations else "none"), + "evidence_level": evidence_level, + "final_locations_count": len(final_locations), + "evidence": evidence, + }) + + logger.info( + "locate: aggregation complete (source=%s, evidence_level=%s, locations=%d)", + "diff" if confirmed else ("tantivy" if tantivy_locations else "none"), + evidence_level, + len(final_locations), + ) return LocateReport( - locations=confirmed, + locations=final_locations, diff_available=diff_available, evidence=evidence, - is_sufficient=is_sufficient, + evidence_level=evidence_level, + is_sufficient=len(final_locations) > 0, instructions=instructions, ) From 89534584c1e5e31c96220da1fe1b4a673900b135 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 28 Apr 2026 07:24:12 +0000 Subject: [PATCH 10/46] generating report for L1 agent --- .../functions/code_agent_graph_defs.py | 332 ++++++++++++++++++ .../functions/cve_package_code_agent.py | 95 ++++- 2 files changed, 420 insertions(+), 7 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 7de9cc113..75f669d17 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -206,6 +206,72 @@ class VerifyReport(ReflectionBase): summary: str +# --------------------------------------------------------------------------- +# Code Agent Report schema +# --------------------------------------------------------------------------- + + +class CodeAgentReport(BaseModel): + """Final L1 Code Agent investigation report synthesizing all phases.""" + confidence: float = Field( + ge=0.0, le=1.0, + description="Overall confidence in the verdict (0.0-1.0)") + justification_label: str = Field( + description=( + "Justification category aligned with VEX: one of " + "code_not_present, protected_by_mitigating_control, vulnerable, uncertain" + )) + executive_summary: str = Field( + description="2-3 sentence summary of the investigation findings and verdict") + evidence_chain: list[str] = Field( + description="Ordered list of evidence items tracing the vulnerability through phases") + affected_files: list[str] = Field( + description="Source files where vulnerable code was identified") + patch_analysis: str | None = Field( + default=None, + description="Analysis of downstream patches if any were found") + limitations: list[str] = Field( + default_factory=list, + description="Gaps or uncertainties in the investigation") + recommendation: str = Field( + description="Actionable recommendation based on the verdict") + + def to_markdown(self, vuln_id: str = "", target_package: str = "") -> str: + """Render the report as a formatted markdown string.""" + lines = [ + "# L1 Code Agent Investigation Report", + "", + ] + if vuln_id or target_package: + lines.extend([f"**CVE:** {vuln_id}", f"**Package:** {target_package}", ""]) + + lines.extend([ + f"**Justification:** `{self.justification_label}`", + f"**Confidence:** {self.confidence:.0%}", + "", + "## Executive Summary", + self.executive_summary, + "", + "## Evidence Chain", + ]) + lines.extend(f"- {ev}" for ev in self.evidence_chain) + + if self.affected_files: + lines.extend(["", "## Affected Files"]) + lines.extend(f"- `{f}`" for f in self.affected_files) + + if self.patch_analysis: + lines.extend(["", "## Patch Analysis", self.patch_analysis]) + + if self.limitations: + lines.extend(["", "## Limitations"]) + lines.extend(f"- {lim}" for lim in self.limitations) + + lines.extend(["", "## Recommendation", self.recommendation]) + + return "\n".join(lines) + + # --------------------------------------------------------------------------- # Prompt templates # --------------------------------------------------------------------------- @@ -274,6 +340,272 @@ class VerifyReport(ReflectionBase): "Output a structured judgment." ) +CODE_AGENT_REPORT_PROMPT = """\ + +You are a security analyst generating the final L1 Code Agent investigation report. +Synthesize the results from all three investigation phases (Identify, Locate, Verify) +into a comprehensive, auditable report with a clear justification and supporting evidence. + + + +CVE: {vuln_id} +Target Package: {target_package} +CVE Description: {cve_description} + + + +## Identify Phase +{identify_section} + +## Locate Phase +{locate_section} + +## Verify Phase +{verify_section} + + + +Generate a structured report following these requirements: + +1. JUSTIFICATION LABEL (select the most appropriate): + - code_not_present: Vulnerable code/function is absent from this package version + - protected_by_mitigating_control: Downstream patch or backport mitigates the vulnerability + - vulnerable: Package is actually vulnerable and needs patching + - uncertain: Insufficient information to determine exploitability + +2. EVIDENCE CHAIN: + - Trace the investigation from keywords -> locations -> verification + - Cite specific files, line numbers, and code snippets + - Reference patch filenames and changelog entries when applicable + +3. LIMITATIONS: + - Note any missing data (no diff available, no build log, etc.) + - Flag low-confidence findings that need manual review + +4. RECOMMENDATION: + - Provide actionable guidance based on the justification + - For protected_by_mitigating_control: confirm the fix is deployed + - For vulnerable: recommend patching or mitigation + - For uncertain: suggest next investigation steps + + + +Provide a structured JSON response with: +- confidence: 0.0-1.0 overall confidence +- justification_label: one of the labels above +- executive_summary: 2-3 sentence summary +- evidence_chain: list of evidence items in logical order +- affected_files: list of source files involved +- patch_analysis: analysis of patches (or null if none) +- limitations: list of investigation gaps +- recommendation: actionable next step + +""" + + +# --------------------------------------------------------------------------- +# Report formatting helpers +# --------------------------------------------------------------------------- + +MAX_SNIPPET_CHARS = 500 + + +def _format_identify_for_report(report: IdentifyReport | None) -> str: + """Format Identify phase results for prompt injection.""" + if report is None: + return "Identify phase did not produce results." + + lines = [] + if report.approved: + lines.append(f"**Approved Keywords ({len(report.approved)}):**") + for kw in report.approved: + files_str = ", ".join(kw.file_paths[:3]) if kw.file_paths else "no files" + if len(kw.file_paths) > 3: + files_str += f" (+{len(kw.file_paths) - 3} more)" + lines.append( + f"- `{kw.term}` ({kw.keyword_type}, confidence={kw.confidence:.2f})" + ) + lines.append(f" Found in: {files_str}") + if kw.hit_content: + snippet = kw.hit_content[0][:MAX_SNIPPET_CHARS] + if len(kw.hit_content[0]) > MAX_SNIPPET_CHARS: + snippet += "..." + lines.append(f" Snippet: {snippet}") + else: + lines.append("**No approved keywords found.**") + + if report.rejected: + lines.append(f"\n**Rejected Keywords ({len(report.rejected)}):** " + + ", ".join(kw.term for kw in report.rejected[:5])) + if len(report.rejected) > 5: + lines.append(f" (+{len(report.rejected) - 5} more)") + + lines.append(f"\nPhase sufficient: {report.is_sufficient}") + if report.instructions: + lines.append(f"Instructions: {report.instructions}") + + return "\n".join(lines) + + +def _format_locate_for_report(report: LocateReport | None) -> str: + """Format Locate phase results for prompt injection.""" + if report is None: + return "Locate phase did not produce results." + + lines = [] + lines.append(f"**Evidence Level:** {report.evidence_level}") + lines.append(f"**Diff Available:** {report.diff_available}") + lines.append(f"**Evidence Summary:** {report.evidence}") + + if report.locations: + lines.append(f"\n**Located Code Regions ({len(report.locations)}):**") + for loc in report.locations[:10]: + line_info = f":{loc.line_number}" if loc.line_number else "" + lines.append(f"- `{loc.file_path}{line_info}` (source: {loc.source})") + lines.append(f" Keywords: {', '.join(loc.matched_keywords)}") + snippet = loc.snippet[:MAX_SNIPPET_CHARS] + if len(loc.snippet) > MAX_SNIPPET_CHARS: + snippet += "..." + lines.append(f" ```\n{snippet}\n ```") + if len(report.locations) > 10: + lines.append(f" (+{len(report.locations) - 10} more locations)") + else: + lines.append("\n**No vulnerability-relevant code locations found.**") + + lines.append(f"\nPhase sufficient: {report.is_sufficient}") + if report.instructions: + lines.append(f"Instructions: {report.instructions}") + + return "\n".join(lines) + + +def _format_verify_for_report(report: VerifyReport | None) -> str: + """Format Verify phase results for prompt injection.""" + if report is None: + return "Verify phase did not produce results." + + lines = [] + lines.append(f"**Verdict:** {report.verdict}") + lines.append(f"**Summary:** {report.summary}") + + if report.evidence: + lines.append(f"\n**Evidence Items ({len(report.evidence)}):**") + for ev in report.evidence: + lines.append(f"- Source: {ev.source} (confidence={ev.confidence:.2f})") + lines.append(f" Detail: {ev.detail}") + if ev.patch_details: + lines.append(f" Patch details ({len(ev.patch_details)} hunks):") + for pd in ev.patch_details[:3]: + lines.append(f" - {pd.patch_filename}: {pd.file_path}:{pd.line_number or 'N/A'}") + fix_snippet = pd.fix_code[:200] + if len(pd.fix_code) > 200: + fix_snippet += "..." + lines.append(f" Fix: {fix_snippet}") + if len(ev.patch_details) > 3: + lines.append(f" (+{len(ev.patch_details) - 3} more hunks)") + else: + lines.append("\n**No patch evidence found.**") + + lines.append(f"\nPhase sufficient: {report.is_sufficient}") + if report.instructions: + lines.append(f"Instructions: {report.instructions}") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report generation pipeline +# --------------------------------------------------------------------------- + + +async def generate_code_agent_report( + *, + llm, + vuln_id: str, + target_package: str, + descriptions: list[tuple[str, str]], + identify_report: IdentifyReport | None, + locate_report: LocateReport | None, + verify_report: VerifyReport | None, + tracer, +) -> CodeAgentReport: + """Generate the final L1 Code Agent investigation report. + + Synthesizes results from all three phases (Identify, Locate, Verify) + into a comprehensive, auditable report with a clear verdict. + + Parameters + ---------- + llm: + LangChain LLM for report generation. + vuln_id: + CVE identifier (e.g. "CVE-2026-5121"). + target_package: + Name of the package being investigated. + descriptions: + ``(source_name, text)`` pairs from CVE intel. + identify_report: + Output of the Identify phase (may be None). + locate_report: + Output of the Locate phase (may be None). + verify_report: + Output of the Verify phase (may be None). + tracer: + Request-scoped tracing context. + + Returns + ------- + CodeAgentReport + Structured report with verdict, evidence, and recommendations. + """ + from langchain_core.messages import HumanMessage, SystemMessage + + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + + identify_section = _format_identify_for_report(identify_report) + locate_section = _format_locate_for_report(locate_report) + verify_section = _format_verify_for_report(verify_report) + + prompt_text = CODE_AGENT_REPORT_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package, + cve_description=cve_description, + identify_section=identify_section, + locate_section=locate_section, + verify_section=verify_section, + ) + + report_llm = llm.with_structured_output(CodeAgentReport) + + with tracer.push_active_function( + "generate_report", + input_data={ + "vuln_id": vuln_id, + "target_package": target_package, + "identify_sufficient": identify_report.is_sufficient if identify_report else False, + "locate_sufficient": locate_report.is_sufficient if locate_report else False, + "verify_verdict": verify_report.verdict if verify_report else None, + }, + ) as span: + messages = [ + SystemMessage(content=prompt_text), + HumanMessage(content="Generate the report."), + ] + report: CodeAgentReport = await report_llm.ainvoke(messages) + span.set_output({ + "confidence": report.confidence, + "justification_label": report.justification_label, + "affected_files_count": len(report.affected_files), + "limitations_count": len(report.limitations), + }) + + logger.info( + "generate_code_agent_report: confidence=%.2f justification=%s", + report.confidence, report.justification_label, + ) + + return report + # --------------------------------------------------------------------------- # Identify subgraph pipeline diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 738be5d52..7ef6da309 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -40,12 +40,14 @@ ) from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, + CodeAgentReport, IdentifyReport, LocateReport, VerifyReport, build_identify_subgraph, build_locate_pipeline, build_verify_pipeline, + generate_code_agent_report, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch @@ -262,13 +264,40 @@ async def verify_node(state: CodeAgentState) -> dict: "INCONCLUSIVE": ("uncertain", "UNKNOWN"), } +_JUSTIFICATION_LABEL_TO_STATUS: dict[str, _StatusLiteral] = { + "code_not_present": "FALSE", + "code_not_reachable": "FALSE", + "protected_by_mitigating_control": "FALSE", + "protected_by_compiler": "FALSE", + "vulnerable": "TRUE", + "uncertain": "UNKNOWN", +} + def _build_analysis( - message: AgentMorpheusEngineInput, result: dict, + message: AgentMorpheusEngineInput, + result: dict, + code_agent_report: CodeAgentReport | None = None, ) -> list[AgentMorpheusEngineOutput]: verify_report: VerifyReport | None = result.get("verify_report") - if verify_report is not None: + if code_agent_report is not None: + label = code_agent_report.justification_label + status: _StatusLiteral = _JUSTIFICATION_LABEL_TO_STATUS.get(label, "UNKNOWN") + + reason_parts = [code_agent_report.executive_summary] + if code_agent_report.evidence_chain: + reason_parts.append("\n\nEvidence chain:") + reason_parts.extend(f"- {ev}" for ev in code_agent_report.evidence_chain[:5]) + if code_agent_report.patch_analysis: + reason_parts.append(f"\n\nPatch analysis: {code_agent_report.patch_analysis}") + if code_agent_report.limitations: + reason_parts.append("\n\nLimitations:") + reason_parts.extend(f"- {lim}" for lim in code_agent_report.limitations) + reason_parts.append(f"\n\nRecommendation: {code_agent_report.recommendation}") + reason = "\n".join(reason_parts) + summary = code_agent_report.executive_summary + elif verify_report is not None: label, status = _VERDICT_TO_JUSTIFICATION.get( verify_report.verdict, ("uncertain", "UNKNOWN"), ) @@ -282,10 +311,12 @@ def _build_analysis( reason = verify_report.summary if patch_snippets: reason += "\n\nDownstream patch details:\n" + "\n".join(patch_snippets[:5]) + summary = verify_report.summary else: label = "code_not_present" - status: _StatusLiteral = "UNKNOWN" + status = "UNKNOWN" reason = "Verify phase did not produce a report." + summary = "No verify report available." last_msg = result["messages"][-1].content if result.get("messages") else "no result" @@ -298,7 +329,7 @@ def _build_analysis( response=last_msg, ), ], - summary=verify_report.summary if verify_report else f"No verify report for {intel.vuln_id}", + summary=summary, justification=JustificationOutput( label=label, reason=reason, @@ -328,13 +359,63 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: result = await l1_agent_graph.ainvoke(initial_state) logger.info("package_code_agent: L1 investigation finished") - with tracer.push_active_function("agent_finish", input_data={"verdict": "not guilty"}): - pass + + vuln_id = message.input.scan.vulns[0].vuln_id + target_package = message.input.image.target_package + target_package_name = target_package.name if target_package else "unknown" + intel = message.info.intel + + descriptions: list[tuple[str, str]] = [] + if intel: + a_intel = intel[0] + if a_intel.ghsa: + cve_text = a_intel.ghsa.description or a_intel.ghsa.summary or "" + if cve_text: + descriptions.append(("ghsa", cve_text)) + if a_intel.ubuntu and a_intel.ubuntu.description: + descriptions.append(("ubuntu", a_intel.ubuntu.description)) + + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + code_agent_report: CodeAgentReport = await generate_code_agent_report( + llm=llm, + vuln_id=vuln_id, + target_package=target_package_name, + descriptions=descriptions, + identify_report=result.get("identify_report"), + locate_report=result.get("locate_report"), + verify_report=result.get("verify_report"), + tracer=tracer, + ) + + # Write markdown report for debug/dev + ctx = message.info.checker_context + source_key = ctx.source_key + report_dir = Path(config.base_checker_dir) / source_key / "report" + report_dir.mkdir(parents=True, exist_ok=True) + report_path = report_dir / f"L1_report_{vuln_id}.md" + report_path.write_text(code_agent_report.to_markdown(vuln_id=vuln_id, target_package=target_package_name)) + logger.info("package_code_agent: wrote report to %s", report_path) + + with tracer.push_active_function( + "agent_finish", + input_data={ + "confidence": code_agent_report.confidence, + "justification_label": code_agent_report.justification_label, + }, + ) as span: + span.set_output({ + "executive_summary": code_agent_report.executive_summary, + "affected_files": code_agent_report.affected_files, + "recommendation": code_agent_report.recommendation, + }) return AgentMorpheusOutput( input=message.input, info=message.info, - output=OutputPayload(analysis=_build_analysis(message, result), vex=None), + output=OutputPayload( + analysis=_build_analysis(message, result, code_agent_report), + vex=None, + ), ) yield FunctionInfo.from_fn( From 49bdfddc754aa6082479c203ca10e777cb03c62b Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 28 Apr 2026 08:37:14 +0000 Subject: [PATCH 11/46] fix L1 report --- .../functions/code_agent_graph_defs.py | 18 +++++++++++++++--- .../functions/cve_package_code_agent.py | 18 ++++++++++++++++-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 75f669d17..f3f1d638e 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -236,17 +236,29 @@ class CodeAgentReport(BaseModel): recommendation: str = Field( description="Actionable recommendation based on the verdict") - def to_markdown(self, vuln_id: str = "", target_package: str = "") -> str: + def to_markdown( + self, + vuln_id: str = "", + target_package: str = "", + version: str = "", + release: str = "", + ) -> str: """Render the report as a formatted markdown string.""" lines = [ "# L1 Code Agent Investigation Report", "", ] if vuln_id or target_package: - lines.extend([f"**CVE:** {vuln_id}", f"**Package:** {target_package}", ""]) + lines.append(f"**CVE:** {vuln_id} ") + lines.append(f"**Package:** {target_package} ") + if version: + lines.append(f"**Version:** {version} ") + if release: + lines.append(f"**Release:** {release}") + lines.append("") lines.extend([ - f"**Justification:** `{self.justification_label}`", + f"**Justification:** `{self.justification_label}` ", f"**Confidence:** {self.confidence:.0%}", "", "## Executive Summary", diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 7ef6da309..38d5febd9 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -390,10 +390,24 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: # Write markdown report for debug/dev ctx = message.info.checker_context source_key = ctx.source_key + fix_info = _parse_fix_info_from_context(ctx, target_package_name) report_dir = Path(config.base_checker_dir) / source_key / "report" report_dir.mkdir(parents=True, exist_ok=True) - report_path = report_dir / f"L1_report_{vuln_id}.md" - report_path.write_text(code_agent_report.to_markdown(vuln_id=vuln_id, target_package=target_package_name)) + # Build filename: L1_report_CVE-XXXX-package-version-release.md + version = fix_info.get("version", "") + release = fix_info.get("release", "") + suffix = f"-{target_package_name}" if target_package_name else "" + if version: + suffix += f"-{version}" + if release: + suffix += f"-{release}" + report_path = report_dir / f"L1_report_{vuln_id}{suffix}.md" + report_path.write_text(code_agent_report.to_markdown( + vuln_id=vuln_id, + target_package=target_package_name, + version=version, + release=release, + )) logger.info("package_code_agent: wrote report to %s", report_path) with tracer.push_active_function( From 70bd805e50fb9821e15da2e302079435a9a29bea Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 28 Apr 2026 13:31:23 +0300 Subject: [PATCH 12/46] update prompt --- .../functions/code_agent_graph_defs.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index f3f1d638e..9561434e6 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -385,8 +385,19 @@ def to_markdown( - vulnerable: Package is actually vulnerable and needs patching - uncertain: Insufficient information to determine exploitability + PHASE PRECEDENCE RULES: + - If Verify phase provides definitive evidence (PATCHED or VULNERABLE with confidence >= 0.8), + this takes precedence over failures in Identify or Locate phases. + - Failed keyword/location searches do NOT imply vulnerability if patches are confirmed downstream. + - A "PATCHED" verdict from Verify maps to "protected_by_mitigating_control". + - A "VULNERABLE" verdict from Verify maps to "vulnerable". + - Only use "uncertain" when Verify phase is INCONCLUSIVE or no phases produced strong evidence. + 2. EVIDENCE CHAIN: - - Trace the investigation from keywords -> locations -> verification + - Trace the investigation through available phases + - If Identify/Locate succeeded: cite keywords -> locations -> verification + - If Identify/Locate failed but Verify succeeded: explain that patches were found + directly in build artifacts/changelogs, making keyword search unnecessary - Cite specific files, line numbers, and code snippets - Reference patch filenames and changelog entries when applicable @@ -404,6 +415,9 @@ def to_markdown( Provide a structured JSON response with: - confidence: 0.0-1.0 overall confidence + * >= 0.8: High certainty (definitive patch match or clear vulnerability) + * 0.5-0.79: Moderate certainty (semantic match, needs verification) + * < 0.5: Low certainty (flag for manual review) - justification_label: one of the labels above - executive_summary: 2-3 sentence summary - evidence_chain: list of evidence items in logical order @@ -411,6 +425,9 @@ def to_markdown( - patch_analysis: analysis of patches (or null if none) - limitations: list of investigation gaps - recommendation: actionable next step + +Ensure all code snippets and special characters within JSON string values are properly escaped +(e.g., quotes as \", backslashes as \\, newlines as \\n) to maintain valid JSON format. """ From ceea1555b60b2c5458ce52abe523cee138737f02 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 30 Apr 2026 05:52:38 +0000 Subject: [PATCH 13/46] Save changes before change in design --- .../functions/code_agent_graph_defs.py | 349 +++++++++++++----- .../functions/cve_package_code_agent.py | 6 +- src/vuln_analysis/utils/package_identifier.py | 66 ++-- 3 files changed, 300 insertions(+), 121 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 9561434e6..d011f163e 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -27,7 +27,10 @@ import shutil import subprocess from pathlib import Path -from typing import Literal, NotRequired +from typing import Literal, NotRequired, TYPE_CHECKING + +if TYPE_CHECKING: + from vuln_analysis.tools.brew_downloader import BrewDownloader from langchain_core.messages import HumanMessage, SystemMessage from langgraph.graph import MessagesState @@ -102,8 +105,11 @@ class KeywordJudgment(BaseModel): reasoning: str = Field(description="Brief explanation of the decision") file_paths: list[str] = Field( default_factory=list, - description="Source files where this keyword was found during Identify", - ) + description="Source files where this keyword was found during Identify",) + +class KeywordHit(BaseModel): + judgment: KeywordJudgment + hit_content: list[str] = Field( default_factory=list, description="Snippet content from Tantivy hits (parallel to file_paths)", @@ -199,6 +205,13 @@ class ChangelogJudgment(BaseModel): reasoning: str +class PatchContentJudgment(BaseModel): + """LLM output: judge whether patch content fixes the CVE.""" + relevant: bool + confidence: float = Field(ge=0.0, le=1.0) + reasoning: str + + class VerifyReport(ReflectionBase): """Verify phase report (downstream search: checks this build's patches and changelog).""" verdict: Literal["PATCHED", "VULNERABLE", "INCONCLUSIVE"] @@ -222,7 +235,7 @@ class CodeAgentReport(BaseModel): "code_not_present, protected_by_mitigating_control, vulnerable, uncertain" )) executive_summary: str = Field( - description="2-3 sentence summary of the investigation findings and verdict") + description="3-5 sentence summary of the investigation findings and verdict") evidence_chain: list[str] = Field( description="Ordered list of evidence items tracing the vulnerability through phases") affected_files: list[str] = Field( @@ -302,7 +315,9 @@ def to_markdown( "5. If the description mentions a specific API, endpoint, or configuration key, include it.\n" "6. Include both the short function name and the fully-qualified name if available " "(e.g. both 'Parse' and 'encoding/xml.Decoder.Parse').\n" - "7. Do NOT include CVE IDs, GHSA IDs, or version numbers as keywords.\n\n" + "7. Do NOT include CVE IDs, GHSA IDs, or version numbers as keywords.\n" + "8. Do NOT include the TARGET PACKAGE name as a keyword - you are already searching within " + "its source tree. Generic package names produce too many matches.\n\n" "Output a structured list of keywords with reasoning." ) @@ -352,6 +367,22 @@ def to_markdown( "Output a structured judgment." ) +VERIFY_PATCH_CONTENT_PROMPT = ( + "You are a vulnerability analyst performing the **Verify** phase.\n\n" + "You will receive:\n" + "1. A CVE ID and its description.\n" + "2. Patch content (diff hunks) from a patch file.\n\n" + "Your task is to determine whether this patch fixes or mitigates the CVE.\n\n" + "RULES:\n" + "- Look for code changes that address the vulnerability (bounds checks, input validation, " + "null checks, API changes, etc.).\n" + "- If the patch modifies functions/files mentioned in the CVE description, confidence is higher.\n" + "- If the patch adds security-relevant logic matching the CVE fix pattern, confidence >= 0.8.\n" + "- If the patch touches related code but the fix is unclear, confidence 0.5-0.7.\n" + "- If the patch is unrelated to the vulnerability, mark as not relevant.\n\n" + "Output a structured judgment." +) + CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. @@ -646,6 +677,7 @@ async def build_identify_subgraph( llm, descriptions: list[tuple[str, str]], vuln_id: str, + package_name: str, lexical_search_fn, tracer, identify_prompt: str = IDENTIFY_SYSTEM_PROMPT, @@ -666,6 +698,9 @@ async def build_identify_subgraph( ``(source_name, text)`` pairs built from CVE intel (e.g. GHSA, Ubuntu). vuln_id: CVE identifier, included in context blocks sent to the LLM. + package_name: + Target package name (e.g. 'libarchive'). Passed to the LLM to avoid + extracting it as a keyword since we are already searching within its source. lexical_search_fn: ``async (str) -> list[dict]`` -- calls the Tantivy index. tracer: @@ -683,7 +718,7 @@ async def build_identify_subgraph( all_reasoning: list[str] = [] for source_name, desc_text in descriptions: - context_block = f"CVE: {vuln_id}\nKNOWLEDGE:\n{desc_text}" + context_block = f"CVE: {vuln_id}\nTARGET PACKAGE: {package_name}\nKNOWLEDGE:\n{desc_text}" if all_keywords: context_block += ( "\n\nALREADY IDENTIFIED KEYWORDS (do not repeat): " @@ -710,30 +745,19 @@ async def build_identify_subgraph( # -- Step B: per-keyword search + judge ------------------------------------ judge_llm = llm.with_structured_output(KeywordJudgment) - judgments: list[KeywordJudgment] = [] + judgments: list[KeywordHit] = [] cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) for kw in unique_keywords: with tracer.push_active_function(f"judge_{kw}", input_data={"keyword": kw}) as span: try: - hits = await lexical_search_fn(kw) + results_text = await lexical_search_fn(kw) except Exception as e: logger.warning("lexical search failed for '%s': %s", kw, e) - hits = [] - - if isinstance(hits, str): - hits = [] - - hit_files = [h["source"] for h in hits if "source" in h] + results_text = "No results found." - results_text = ( - "\n\n".join( - f"File: {h['source']}\n{h['content'][:MAX_CONTENT_CHARS]}" - for h in hits - ) - if hits - else "No results found." - ) + if results_text is None: + results_text = "No results found." messages = [ SystemMessage(content=reflect_prompt), @@ -743,20 +767,26 @@ async def build_identify_subgraph( f"Search results:\n{results_text}" )), ] - judgment: KeywordJudgment = await judge_llm.ainvoke(messages) - judgment.file_paths = hit_files - judgment.hit_content = [h["content"][:MAX_CONTENT_CHARS] for h in hits if "content" in h] + try: + judgment: KeywordJudgment = await judge_llm.ainvoke(messages) + except Exception as e: + logger.warning("LLM judgment failed for keyword '%s': %s", kw, e) + span.set_output({"error": str(e)}) + continue + + judgmentHit = KeywordHit(judgment=judgment, hit_content=[results_text]) span.set_output({"approved": judgment.approved, "confidence": judgment.confidence}) - judgments.append(judgment) + judgments.append(judgmentHit) # -- Step C: aggregate into IdentifyReport --------------------------------- approved: list[Keyword] = [] rejected: list[Keyword] = [] - for j in judgments: + for jh in judgments: + j = jh.judgment kw_obj = Keyword( term=j.term, keyword_type=j.keyword_type, confidence=j.confidence, file_paths=j.file_paths, - hit_content=j.hit_content, + hit_content=jh.hit_content, ) if j.approved: approved.append(kw_obj) @@ -776,34 +806,80 @@ async def build_identify_subgraph( # --------------------------------------------------------------------------- -def parse_unified_diff(diff_text: str) -> list[DiffHunk]: +def parse_unified_diff(diff_text: str, tracer) -> list[DiffHunk]: """Parse unified diff text into structured DiffHunk objects using unidiff.""" - patch = PatchSet.from_string(diff_text) + # Remove "\ No newline at end of file" markers that unidiff can't parse + cleaned_diff = "\n".join( + line for line in diff_text.splitlines() + if not line.startswith("\\ No newline at end of file") + ) + + try: + patch = PatchSet.from_string(cleaned_diff) + except Exception as e: + with tracer.push_active_function("parse_unified_diff_error", input_data={"error": str(e)}) as span: + span.set_output({"error": str(e), "error_type": type(e).__name__}) + logger.warning("Failed to parse diff: %s", e) + return [] + hunks: list[DiffHunk] = [] for patched_file in patch: if patched_file.is_binary_file: continue - for hunk in patched_file: - hunks.append(DiffHunk( - file_path=patched_file.path, - source_start_line=hunk.source_start, - source_end_line=hunk.source_start + hunk.source_length, - patch_start_line=hunk.target_start, - patch_end_line=hunk.target_start + hunk.target_length, - content=str(hunk), - )) + if patched_file.is_added_file or patched_file.is_removed_file: + continue + if patched_file.added > 0 or patched_file.removed > 0: + for hunk in patched_file: + hunks.append(DiffHunk( + file_path=patched_file.path, + source_start_line=hunk.source_start, + source_end_line=hunk.source_start + hunk.source_length, + patch_start_line=hunk.target_start, + patch_end_line=hunk.target_start + hunk.target_length, + content=str(hunk), + )) return hunks +_DIFF_EXCLUDE_PATTERNS = [ + ":(exclude)*.S", ":(exclude)*.s", ":(exclude)*.asm", # Assembly (causes parser errors) + ":(exclude)*.pod", ":(exclude)*.pl", # Perl docs/scripts + ":(exclude)*.pem", ":(exclude)*.cnf", # Certificates/config + ":(exclude)*.sh", ":(exclude)*.bat", ":(exclude)*.com", # Shell/batch scripts +] + + def _generate_tree_diff(source_dir: Path, patch_dir: Path) -> str: """Run ``git diff --no-index`` between two directory trees. Uses git's built-in binary detection to automatically skip binary files (RPMs, tarballs, images, etc.) without needing an explicit exclude list. + Excludes assembly and non-source files that cause parser issues. """ result = subprocess.run( ["git", "diff", "--no-index", "--no-color", - str(source_dir), str(patch_dir)], + str(source_dir), str(patch_dir), "--"] + _DIFF_EXCLUDE_PATTERNS, + capture_output=True, timeout=300, + ) + return result.stdout.decode("utf-8", errors="replace") + + +def _generate_targeted_diff(source_dir: Path, patch_dir: Path, target_basenames: set[str]) -> str: + """Generate diff only for specific files between two trees. + + Instead of diffing all files and filtering afterward, this runs git diff + with file patterns to only diff the files we care about. Much faster and + avoids parser issues from problematic files (assembly, scripts, etc.). + """ + if not target_basenames: + return "" + + # Build file patterns for git diff - match files by basename anywhere in tree + file_patterns = [f"*/{basename}" for basename in target_basenames] + + result = subprocess.run( + ["git", "diff", "--no-index", "--no-color", + str(source_dir), str(patch_dir), "--"] + file_patterns, capture_output=True, timeout=300, ) return result.stdout.decode("utf-8", errors="replace") @@ -816,41 +892,50 @@ def _generate_tree_diff(source_dir: Path, patch_dir: Path) -> str: LANG_PARSER_EXTENSIONS = {".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", ".ts"} -def _cross_reference_hunks( - hunks: list[DiffHunk], approved_keywords: list[Keyword], +def _hunks_to_locations( + hunks: list[DiffHunk], + approved_keywords: list[Keyword], ) -> list[FileLocation]: - """Match diff hunks against approved keywords from the Identify phase. - - Applies two layers of filtering before keyword matching: - 1. Extension filter – skip non-source files (docs, configs, translations). - 2. File-scope filter – when a keyword carries ``file_paths`` from Identify, - only consider hunks whose file basename appears in that list. + """Convert hunks to FileLocations, filtering by file and content. + + Applies two filters: + 1. File filter - hunk's file must be in approved_keywords.file_paths + 2. Content filter - at least one keyword term must appear in hunk content """ + # Build basename -> keywords lookup + basename_to_keywords: dict[str, list[str]] = {} + for kw in approved_keywords: + for fp in kw.file_paths: + basename = Path(fp).name + basename_to_keywords.setdefault(basename, []).append(kw.term) + locations: list[FileLocation] = [] for hunk in hunks: + basename = Path(hunk.file_path).name + if basename not in basename_to_keywords: + continue + # Extension filter to skip non-code files if Path(hunk.file_path).suffix.lower() not in LANG_PARSER_EXTENSIONS: continue - - hunk_basename = Path(hunk.file_path).name + + # Content filter - check which keywords appear in hunk content searchable = (hunk.content + " " + hunk.file_path).lower() - - matched: list[str] = [] - for kw in approved_keywords: - if kw.file_paths and not any( - hunk_basename in fp for fp in kw.file_paths - ): - continue - if kw.term.lower() in searchable: - matched.append(kw.term) - - if matched: - locations.append(FileLocation( - file_path=hunk.file_path, - line_number=hunk.source_start_line, - snippet=hunk.content, - matched_keywords=matched, - source="diff", - )) + matched_keywords = [ + kw for kw in basename_to_keywords[basename] + if kw.lower() in searchable + ] + + # Only include hunk if at least one keyword matches content + if not matched_keywords: + continue + + locations.append(FileLocation( + file_path=hunk.file_path, + line_number=hunk.source_start_line, + snippet=hunk.content, + matched_keywords=matched_keywords, + source="diff", + )) return locations @@ -878,7 +963,7 @@ def _tantivy_candidates(approved_keywords: list[Keyword]) -> list[FileLocation]: # --------------------------------------------------------------------------- # Locate subgraph pipeline # --------------------------------------------------------------------------- -def downloand_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, source_dir: Path, patch_dir: Path) -> Path: +def download_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, source_dir: Path, patch_dir: Path) -> Path | None: """Download the patched SRPM and generate the diff file between the source and the patched SRPM.""" from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader @@ -890,10 +975,10 @@ def downloand_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloade shutil.copy2(srpm_path, patch_dir) SourceRPMDownloader.extract_src_rpm(srpm_path, patch_dir) - diff_text = _generate_tree_diff(source_dir, patch_dir) - diff_output_path = patch_dir.parent / "locate.diff" - diff_output_path.write_text(diff_text, encoding="utf-8") - return diff_output_path + #diff_text = _generate_tree_diff(source_dir, patch_dir) + #diff_output_path = patch_dir.parent / "locate.diff" + #diff_output_path.write_text(diff_text, encoding="utf-8") + #return diff_output_path return None async def build_locate_pipeline( @@ -954,28 +1039,49 @@ async def build_locate_pipeline( diff_available = False candidate_locations: list[FileLocation] = [] - # -- Compare the upstream files with the fixed rpm files if exist using the diff file -------------------------------- - diff_output_path = patch_dir.parent / "locate.diff" - diff_file_exists = False - if diff_output_path.exists(): - diff_file_exists = True - elif fix_info and brew_downloader is not None: - diff_output_path = downloand_patch_and_gen_diff(fix_info, brew_downloader, source_dir, patch_dir) - if diff_output_path is not None: - diff_file_exists = True + # -- Download patched SRPM if available and not cached -- + if fix_info and brew_downloader is not None and not patch_dir.exists(): + with tracer.push_active_function( + "locate_download_patch", input_data={"fix_info": fix_info} + ) as span: + try: + download_patch_and_gen_diff(fix_info, brew_downloader, source_dir, patch_dir) + span.set_output({"patch_dir_exists": patch_dir.exists()}) + except Exception as e: + logger.warning("locate: failed to download/extract patched SRPM: %s", e) + span.set_output({"error": str(e), "patch_dir_exists": False}) + + # -- Generate targeted diff for only the files from approved keywords -------------------------------- + # Extract target basenames from approved keywords + target_basenames: set[str] = set() + for kw in approved: + for fp in kw.file_paths: + target_basenames.add(Path(fp).name) + + # Check if we can generate targeted diff (need both source and patch dirs) + can_diff = source_dir.exists() and patch_dir.exists() and target_basenames - if diff_file_exists: - with tracer.push_active_function("locate_diff_file", input_data={"diff_path": str(diff_output_path)}) as span: - diff_text = diff_output_path.read_text(encoding="utf-8") - hunks = parse_unified_diff(diff_text) - candidate_locations = _cross_reference_hunks(hunks, approved) + if can_diff: + with tracer.push_active_function( + "locate_targeted_diff", + input_data={"target_files_count": len(target_basenames), "target_basenames": list(target_basenames)[:10]} + ) as span: + # Generate diff only for target files - much faster than full diff + diff_text = _generate_targeted_diff(source_dir, patch_dir, target_basenames) + if diff_text: + diff_output_path = patch_dir.parent / "locate.diff" + diff_output_path.write_text(diff_text, encoding="utf-8") + hunks = parse_unified_diff(diff_text, tracer) + candidate_locations = _hunks_to_locations(hunks, approved) diff_available = True span.set_output({ - "source": "diff_file", + "source": "targeted_diff", "hunks_total": len(hunks), - "upstream_locations_candidate_count": len(candidate_locations), + "target_files_count": len(target_basenames), + "candidate_locations_count": len(candidate_locations), }) - logger.info("locate: using cached diff (%d hunks, %d matched)", len(hunks), len(candidate_locations)) + logger.info("locate: targeted diff for %d files (%d hunks, %d candidates)", + len(target_basenames), len(hunks), len(candidate_locations)) else: logger.info("locate: no diff file found, falling back to Tantivy") @@ -1152,6 +1258,60 @@ def _extract_patch_details( return details +async def _analyze_suspected_patches( + *, + llm, + evidence: list[PatchEvidence], + vuln_id: str, + descriptions: list[tuple[str, str]], + tracer, +) -> list[PatchEvidence]: + """Analyze suspected patches (confidence=0.6) with LLM and return updated evidence.""" + SUSPECTED_CONFIDENCE = 0.6 + + suspected = [e for e in evidence if e.confidence == SUSPECTED_CONFIDENCE and e.patch_details] + if not suspected: + return evidence + + cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) + patch_judge_llm = llm.with_structured_output(PatchContentJudgment) + + updated_evidence = [e for e in evidence if e.confidence != SUSPECTED_CONFIDENCE or not e.patch_details] + + for ev in suspected: + patch_content = "\n".join( + f"File: {d.file_path}\nContext:\n{d.vulnerable_code}\nFix:\n{d.fix_code}" + for d in ev.patch_details + ) + patch_name = ev.detail.split(":")[0].replace("Patch ", "") + + with tracer.push_active_function( + f"verify_llm_patch_{patch_name}", input_data={"patch": patch_name} + ) as patch_span: + messages = [ + SystemMessage(content=VERIFY_PATCH_CONTENT_PROMPT), + HumanMessage(content=( + f"CVE: {vuln_id}\n{cve_context}\n\n" + f"Patch: {patch_name}\n\nPatch content:\n{patch_content}" + )), + ] + judgment: PatchContentJudgment = await patch_judge_llm.ainvoke(messages) + patch_span.set_output({ + "relevant": judgment.relevant, + "confidence": judgment.confidence, + }) + + if judgment.relevant: + updated_evidence.append(PatchEvidence( + source="patch_file_content", + detail=f"LLM: {patch_name}: {judgment.reasoning}", + confidence=judgment.confidence, + patch_details=ev.patch_details, + )) + + return updated_evidence + + async def build_verify_pipeline( *, inspector, @@ -1300,9 +1460,12 @@ async def build_verify_pipeline( # ── Aggregate verdict ───────────────────────────────────────────────── if evidence: max_conf = max(e.confidence for e in evidence) - verdict: Literal["PATCHED", "VULNERABLE", "INCONCLUSIVE"] = ( - "PATCHED" if max_conf >= 0.8 else "INCONCLUSIVE" - ) + + if max_conf >= 0.8: + verdict = "PATCHED" + else: + verdict = "VULNERABLE" + else: verdict = "VULNERABLE" diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 38d5febd9..7cac098f2 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -158,6 +158,7 @@ async def identify_node(state: CodeAgentState) -> dict: llm=llm, descriptions=descriptions, vuln_id=vuln_id, + package_name=target_package.name, lexical_search_fn=lexical_search_fn, tracer=tracer, ) @@ -390,12 +391,11 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: # Write markdown report for debug/dev ctx = message.info.checker_context source_key = ctx.source_key - fix_info = _parse_fix_info_from_context(ctx, target_package_name) report_dir = Path(config.base_checker_dir) / source_key / "report" report_dir.mkdir(parents=True, exist_ok=True) # Build filename: L1_report_CVE-XXXX-package-version-release.md - version = fix_info.get("version", "") - release = fix_info.get("release", "") + version = (target_package.version or "") if target_package else "" + release = (target_package.release or "") if target_package else "" suffix = f"-{target_package_name}" if target_package_name else "" if version: suffix += f"-{version}" diff --git a/src/vuln_analysis/utils/package_identifier.py b/src/vuln_analysis/utils/package_identifier.py index a43eac087..0b1f328a3 100644 --- a/src/vuln_analysis/utils/package_identifier.py +++ b/src/vuln_analysis/utils/package_identifier.py @@ -26,6 +26,16 @@ _RPM_NEVRA_RE = re.compile(r"^(.+?)-(\d+):(.+?)-(.+)$") _DIST_TAG_RE = re.compile(r"(el\d+)") +_ARCH_SUFFIXES = frozenset({"x86_64", "aarch64", "i686", "noarch", "s390x", "ppc64le", "armv7hl", "src"}) + + +def _strip_arch_suffix(release_arch: str) -> str: + """Remove .arch suffix if present, preserving dist tags like .el6_10.""" + if "." in release_arch: + base, suffix = release_arch.rsplit(".", 1) + if suffix in _ARCH_SUFFIXES: + return base + return release_arch def _extract_dist_tag(release: str) -> str | None: @@ -128,29 +138,35 @@ def _is_target_package_fixed(self, intel: CveIntel, package_identify: PackageIde if not matching: return EnumIdentifyResult.UNKNOWN - target_version = self._target_package.version - target_release = self._target_package.release - - fix = matching[0] - try: - target_nvr = f"{target_version}-{target_release}" - fix_nvr = f"{fix['version']}-{fix['release']}" - - target_dist = _extract_dist_tag(target_release) if target_release else None - fix_dist = _extract_dist_tag(fix["release"]) - if target_dist and fix_dist and target_dist != fix_dist: - logger.debug( - "Cross-stream fix comparison skipped: target=%s fix=%s", - target_dist, fix_dist, - ) - return EnumIdentifyResult.UNKNOWN - - if versions.RpmVersion(target_nvr) >= versions.RpmVersion(fix_nvr): - return EnumIdentifyResult.YES - return EnumIdentifyResult.NO - except Exception as exc: - logger.debug("Fix version comparison failed: %s", exc) - return EnumIdentifyResult.UNKNOWN + # NOTE: Version comparison disabled to test Option A (rely entirely on Verify phase). + # fixed_rpm_list is still populated for reference/logging. + # To re-enable, uncomment the block below. + return EnumIdentifyResult.UNKNOWN + + # --- DISABLED: Version comparison logic --- + # target_version = self._target_package.version + # target_release = self._target_package.release + # + # fix = matching[0] + # try: + # target_nvr = f"{target_version}-{target_release}" + # fix_nvr = f"{fix['version']}-{fix['release']}" + # + # target_dist = _extract_dist_tag(target_release) if target_release else None + # fix_dist = _extract_dist_tag(fix["release"]) + # if target_dist and fix_dist and target_dist != fix_dist: + # logger.debug( + # "Cross-stream fix comparison skipped: target=%s fix=%s", + # target_dist, fix_dist, + # ) + # return EnumIdentifyResult.UNKNOWN + # + # if versions.RpmVersion(target_nvr) >= versions.RpmVersion(fix_nvr): + # return EnumIdentifyResult.YES + # return EnumIdentifyResult.NO + # except Exception as exc: + # logger.debug("Fix version comparison failed: %s", exc) + # return EnumIdentifyResult.UNKNOWN def _version_in_affected_range(self, target_version: str, intel: CveIntel) -> bool: @@ -263,7 +279,7 @@ def _extract_fixed_rpms(intel: CveIntel) -> list[dict]: seen.add(name) version = m.group(3) release_arch = m.group(4) - release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch + release = _strip_arch_suffix(release_arch) results.append({"nevra": raw, "name": name, "version": version, "release": release}) return results @@ -291,7 +307,7 @@ def _extract_fix_info(intel: CveIntel | None, resolved_name: str) -> dict: continue version = m.group(3) release_arch = m.group(4) - release = release_arch.rsplit(".", 1)[0] if "." in release_arch else release_arch + release = _strip_arch_suffix(release_arch) return {"nevra": raw, "name": name, "version": version, "release": release} return {} From 66e64a3d47754df7a90d9cb7d729f26a08bbebe9 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 30 Apr 2026 12:12:23 +0300 Subject: [PATCH 14/46] redesign: preprocess node --- .../configs/config-http-openai.yml | 5 + .../functions/code_agent_graph_defs.py | 173 +++++++++++++++++- .../functions/cve_package_code_agent.py | 37 ++++ src/vuln_analysis/tools/tool_names.py | 5 + 4 files changed, 218 insertions(+), 2 deletions(-) diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index 99cd91152..fdaef0d86 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -80,6 +80,11 @@ functions: Code Keyword Search: _type: lexical_code_search top_k: 5 + Source Grep: + _type: source_grep + base_checker_dir: .cache/am_cache/checker + max_results: 50 + context_lines: 2 CVE Web Search: _type: serp_wrapper max_retries: 5 diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index d011f163e..d514e4cca 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -49,12 +49,30 @@ class CodeAgentState(MessagesState): identify_report: NotRequired[IdentifyReport | None] locate_report: NotRequired[LocateReport | None] verify_report: NotRequired[VerifyReport | None] + downstream_report: NotRequired[DownstreamSearchReport | None] + upstream_report: NotRequired[UpstreamSearchReport | None] # --------------------------------------------------------------------------- # Reflection schemas # --------------------------------------------------------------------------- - +class DownstreamSearchReport(BaseModel): + """Result of a downstream search.""" + is_patch_file_available: bool = Field(description="True if a patch file is available") + patch_file_name: str = Field(description="The name of the patch file") + is_patch_in_spec_file: bool = Field(description="True if a patch file is in the spec file") + spec_file_log_change: str = Field(description="The log change of patchin the spec file") + is_patch_applied_in_build: bool = Field(description="True if a patch file is applied in the build") + build_log_patch_applied: str = Field(description="The patch applied in the build log") + parsed_patch: ParsedPatch = Field(description="The parsed patch file") + +class UpstreamSearchReport(BaseModel): + """Result of an upstream search.""" + is_fixed_srpm_is_needed: Literal["yes", "no", "not_needed"] = Field(description="not_needed if the target package is already fixed") + vulnerable_locations: list[FileLocation] = Field(default_factory=list) + reason_cve_code: str = Field(description="Does the CVE description match the code which is vulnerable") + is_code_fixed_by_reabse: bool = Field(description="True if the code is fixed by rebase") + reason_code_fixed_by_rebase: str = Field(description="The reason why the code is fixed by rebase") class ReflectionBase(BaseModel): """Base schema for phase reports. @@ -145,7 +163,7 @@ class FileLocation(BaseModel): line_number: int | None = None snippet: str = Field(description="Code context around the match") matched_keywords: list[str] - source: Literal["diff", "tantivy"] = Field( + source: Literal["diff", "tantivy","grep"] = Field( description="How this location was discovered") @@ -177,6 +195,32 @@ class LocateReport(ReflectionBase): # --------------------------------------------------------------------------- +class PatchHunk(BaseModel): + """A single hunk from a downstream patch file.""" + source_start: int + source_length: int + target_start: int + target_length: int + context_lines: list[str] = Field(default_factory=list, description="Unchanged lines") + removed_lines: list[str] = Field(default_factory=list, description="Deleted lines (- stripped)") + added_lines: list[str] = Field(default_factory=list, description="Added lines (+ stripped)") + + +class PatchFile(BaseModel): + """Changes to a single file in a downstream patch.""" + source_path: str + target_path: str + hunks: list[PatchHunk] + is_new_file: bool = False + is_deleted_file: bool = False + + +class ParsedPatch(BaseModel): + """Structured representation of a downstream patch file.""" + patch_filename: str + files: list[PatchFile] + + class DownstreamPatchDetail(BaseModel): """Extracted from a .patch file: shows vulnerable code and the fix.""" patch_filename: str @@ -1212,6 +1256,55 @@ def _parse_build_log_applied_patches( return filenames +def parse_patch_file(patch_path: Path) -> ParsedPatch | None: + """Parse a downstream .patch file into structured data. + + Returns None if the file cannot be parsed. + """ + try: + diff_text = patch_path.read_text(encoding="utf-8", errors="replace") + patch_set = PatchSet.from_string(diff_text) + except Exception: + logger.warning("parse_patch_file: failed to parse %s", patch_path) + return None + + files: list[PatchFile] = [] + for patched_file in patch_set: + if patched_file.is_binary_file: + continue + + hunks: list[PatchHunk] = [] + for hunk in patched_file: + context, removed, added = [], [], [] + for line in hunk: + if line.is_context: + context.append(str(line.value).rstrip("\n")) + elif line.is_removed: + removed.append(str(line.value).rstrip("\n")) + elif line.is_added: + added.append(str(line.value).rstrip("\n")) + + hunks.append(PatchHunk( + source_start=hunk.source_start, + source_length=hunk.source_length, + target_start=hunk.target_start, + target_length=hunk.target_length, + context_lines=context, + removed_lines=removed, + added_lines=added, + )) + + files.append(PatchFile( + source_path=patched_file.source_file, + target_path=patched_file.target_file, + hunks=hunks, + is_new_file=patched_file.is_added_file, + is_deleted_file=patched_file.is_removed_file, + )) + + return ParsedPatch(patch_filename=patch_path.name, files=files) + + def _extract_patch_details( patch_path: Path, locate_report: LocateReport | None, ) -> list[DownstreamPatchDetail]: @@ -1484,3 +1577,79 @@ async def build_verify_pipeline( instructions="" if verdict != "INCONCLUSIVE" else "Downstream evidence is ambiguous; manual review recommended.", ) + + + +#--------------------------------------------------------------------- +# Downstream search pipeline +#--------------------------------------------------------------------- +async def downstream_search_preprocss( + *, + llm, + vuln_id: str, + descriptions: list[tuple[str, str]], + source_path: Path, + build_log_path: Path | None, + tracer, +) -> DownstreamSearchReport: + """Build the downstream search pipeline.""" + from vuln_analysis.tools.source_inspector import SourceInspector + inspector = SourceInspector(source_path) + + cve_pattern = re.escape(vuln_id) + report = DownstreamSearchReport() + with tracer.push_active_function("Is_patch_file_available", input_data={"vuln_id": vuln_id}) as span: + patch_files = inspector.find_files("*.patch", recursive=False) + cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)] + if cve_patches: + report.is_patch_file_available = True + patch_file = cve_patches[0] + else: + report.is_patch_file_available = False + report.is_patch_in_spec_file = False + report.is_patch_in_spec_file = False + return report + + if not patch_file: + raise ValueError("No patch file found for the CVE") + else: + report.patch_file_name = patch_file.name + + with tracer.push_active_function( + "Is_patch_in_spec_file", input_data={"patch_file_name": patch_file.name} + ) as span: + spec_files = inspector.find_files("*.spec", recursive=False) + spec_path = spec_files[0] if spec_files else None + + if not spec_path: + report.is_patch_in_spec_file = False + else: + grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) + if grep_spec_matches: + report.is_patch_in_spec_file = True + report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches) + else: + report.is_patch_in_spec_file = False + + with tracer.push_active_function( + "Is_patch_applied_in_build", input_data={"patch_file_name": patch_file.name} + ) as span: + if build_log_path and build_log_path.exists(): + build_inspector = SourceInspector(build_log_path.parent) + build_log_matches = build_inspector.grep_content(cve_pattern, build_log_path) + if build_log_matches: + report.is_patch_applied_in_build = True + report.build_log_patch_applied = "\n".join(m.line_content for m in build_log_matches) + else: + report.is_patch_applied_in_build = False + else: + report.is_patch_applied_in_build = False + + with tracer.push_active_function("Extract_patch_details", input_data={"patch_file_name": patch_file.name}) as span: + details = parse_patch_file(patch_file) + if details: + report.parsed_patch = details + else: + report.parsed_patch = None + + return report \ No newline at end of file diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 7cac098f2..788a3515c 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -44,10 +44,12 @@ IdentifyReport, LocateReport, VerifyReport, + DownstreamSearchReport, build_identify_subgraph, build_locate_pipeline, build_verify_pipeline, generate_code_agent_report, + downstream_search_preprocss, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch @@ -241,6 +243,41 @@ async def verify_node(state: CodeAgentState) -> dict: "verify_report": report, "messages": [AIMessage(content=report.summary)], } + async def downstream_search(state: CodeAgentState) -> dict: + logger.info("downstream_search: starting") + + + build_log = ctx.artifacts.build_log_path if ctx and ctx.artifacts else None + with tracer.push_active_function("downstream_search", input_data={}) as span: + report: DownstreamSearchReport = await downstream_search_preprocss( + llm=llm, + vuln_id=vuln_id, + descriptions=descriptions, + source_path=Path(source_dir), + build_log_path=Path(build_log) if build_log else None, + tracer=tracer, + ) + span.set_output({ + "is_patch_file_available": report.is_patch_file_available, + "is_patch_in_spec_file": report.is_patch_in_spec_file, + "spec_file_log_change": report.spec_file_log_change, + "is_patch_applied_in_build": report.is_patch_applied_in_build, + "build_log_patch_applied": report.build_log_patch_applied, + "patch_details": report.patch_details, + }) + + return { + "downstream_report": DownstreamSearchReport( + is_patch_file_available=False, + is_patch_in_spec_file=False, + spec_file_log_change="", + is_patch_applied_in_build=False, + build_log_patch_applied="", + patch_details=[], + ), + "messages": [AIMessage(content="Downstream search skipped: no vulnerable locations.")], + } + flow = StateGraph(CodeAgentState) flow.add_node("identify", identify_node) diff --git a/src/vuln_analysis/tools/tool_names.py b/src/vuln_analysis/tools/tool_names.py index 6f46faa61..5771f6e96 100644 --- a/src/vuln_analysis/tools/tool_names.py +++ b/src/vuln_analysis/tools/tool_names.py @@ -47,6 +47,9 @@ class ToolNames: FUNCTION_LIBRARY_VERSION_FINDER = "Function Library Version Finder" """Checks in which library version the function is used""" + SOURCE_GREP = "Source Grep" + """Fast grep search in source code using native Unix grep""" + # Export as module-level constants CODE_SEMANTIC_SEARCH = ToolNames.CODE_SEMANTIC_SEARCH @@ -58,6 +61,7 @@ class ToolNames: CVE_WEB_SEARCH = ToolNames.CVE_WEB_SEARCH CONTAINER_ANALYSIS_DATA = ToolNames.CONTAINER_ANALYSIS_DATA FUNCTION_LIBRARY_VERSION_FINDER = ToolNames.FUNCTION_LIBRARY_VERSION_FINDER +SOURCE_GREP = ToolNames.SOURCE_GREP @@ -72,4 +76,5 @@ class ToolNames: 'CONTAINER_ANALYSIS_DATA', 'FUNCTION_LOCATOR', 'FUNCTION_LIBRARY_VERSION_FINDER', + 'SOURCE_GREP', ] \ No newline at end of file From db9e0fd402dd4215e8f3a254fefbb8890363d0db Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Fri, 1 May 2026 08:39:33 +0000 Subject: [PATCH 15/46] save work --- .../functions/code_agent_graph_defs.py | 296 +++++++++++++++++- .../functions/cve_package_code_agent.py | 219 ++++++------- 2 files changed, 405 insertions(+), 110 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index d514e4cca..1f841f6a3 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -68,11 +68,32 @@ class DownstreamSearchReport(BaseModel): class UpstreamSearchReport(BaseModel): """Result of an upstream search.""" - is_fixed_srpm_is_needed: Literal["yes", "no", "not_needed"] = Field(description="not_needed if the target package is already fixed") + + is_fixed_srpm_is_needed: bool = Field(description="True if a fixed SRPM is needed downstream style patch files") + fixed_srpm_file_name: str = Field(description="The name of the fixed SRPM file") + fixed_parsed_patch: ParsedPatch = Field(description="The parsed fixed SRPM patch file") vulnerable_locations: list[FileLocation] = Field(default_factory=list) - reason_cve_code: str = Field(description="Does the CVE description match the code which is vulnerable") - is_code_fixed_by_reabse: bool = Field(description="True if the code is fixed by rebase") - reason_code_fixed_by_rebase: str = Field(description="The reason why the code is fixed by rebase") + reason_cve_code: str = Field( + default="", + description="Does the CVE description match the code which is vulnerable", + ) + is_code_fixed_by_rebase: Literal["yes", "no", "unknown"] = Field( + default="unknown", + description="yes if the code is fixed by rebase", + ) + spec_file_log_change: str = Field( + default="", + description="The log change of patch in the spec file", + ) + spec_fixed_srpm_change: str = Field( + default="", + description="The change of the fixed SRPM in the spec file", + ) + spec_fixed_srpm_rebase: bool = Field(description="True if the fixed SRPM is rebased in the spec file") + reason_code_fixed_by_rebase: str = Field( + default="", + description="The reason why the code is fixed by rebase", + ) class ReflectionBase(BaseModel): """Base schema for phase reports. @@ -1652,4 +1673,269 @@ async def downstream_search_preprocss( else: report.parsed_patch = None - return report \ No newline at end of file + return report + +async def upstream_search_preprocess( + *, + vuln_id: str, + source_path: Path, + fix_info: dict, + brew_downloader: BrewDownloader, + patch_dir: Path, + tracer, +) -> UpstreamSearchReport: + """Build the upstream search pipeline.""" + from vuln_analysis.tools.source_inspector import SourceInspector + inspector = SourceInspector(source_path) + report = UpstreamSearchReport() + cve_pattern = re.escape(vuln_id) + with tracer.push_active_function("Is_upstream_fixed_by_rebase", input_data={"vuln_id": vuln_id}) as span: + spec_files = inspector.find_files("*.spec", recursive=False) + spec_path = spec_files[0] if spec_files else None + + if not spec_path: + report.is_code_fixed_by_rebase = "unknown" + else: + grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) + if grep_spec_matches: + report.is_code_fixed_by_rebase = "yes" + report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches) + else: + report.is_code_fixed_by_rebase = "unknown" + span.set_output({ + "is_code_fixed_by_rebase": report.is_code_fixed_by_rebase, + "spec_file_log_change": report.spec_file_log_change, + }) + + if fix_info and brew_downloader is not None and not patch_dir.exists(): + with tracer.push_active_function( + "download_rpm_patch", input_data={"fix_info": fix_info} + ) as span: + try: + download_patch_and_gen_diff(fix_info, brew_downloader, source_path, patch_dir) + span.set_output({"patch_dir_exists": patch_dir.exists()}) + except Exception as e: + logger.warning("locate: failed to download/extract patched SRPM: %s", e) + span.set_output({"error": str(e), "patch_dir_exists": False}) + + if patch_dir.exists(): + patch_inspector = SourceInspector(patch_dir) + with tracer.push_active_function("is_patch_downsteam_patch_file", input_data={"patch_dir": patch_dir}) as span: + + patch_files = patch_inspector.find_files("*.patch", recursive=False) + cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)] + if cve_patches: + report.is_fixed_srpm_is_needed = True + report.fixed_srpm_file_name = cve_patches[0].name + report.fixed_parsed_patch = parse_patch_file(cve_patches[0]) + return report + else: + report.is_fixed_srpm_is_needed = False + span.set_output({ + "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed}) + with tracer.push_active_function("is_fixed_srpm_rebase", input_data={"spec file"}) as span: + spec_files = patch_inspector.find_files("*.spec", recursive=False) + spec_path = spec_files[0] if spec_files else None + if not spec_path: + report.spec_fixed_srpm_rebase = False + else: + grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) + if grep_spec_matches: + report.spec_fixed_srpm_rebase = True + report.spec_fixed_srpm_change = "\n".join(m.line_content for m in grep_spec_matches) + return report + else: + report.spec_fixed_srpm_rebase = False + span.set_output({ + "spec_fixed_srpm_rebase": report.spec_fixed_srpm_rebase, + "spec_fixed_srpm_change": report.spec_fixed_srpm_change, + }) + + + return report + + +# --------------------------------------------------------------------------- +# L1 Agent Prompt Templates (Patch Available Scenario) +# --------------------------------------------------------------------------- + +L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE = ( + "You are a security analyst investigating whether a CVE fix has been applied to a package.\n" + "You have access to a downstream patch file that contains the fix for this vulnerability.\n\n" + "MANDATORY STEPS (follow in order):\n" + "1. ANALYZE the patch: Identify the vulnerable code (removed lines) and fix code (added lines).\n" + "2. LOCATE the vulnerable code in the source tree using search tools.\n" + "3. VERIFY whether the vulnerable code still exists or has been replaced by the fix.\n" + "4. REASON about the vulnerability status based on evidence.\n\n" + "CRITICAL RULES:\n" + "- The patch shows WHAT was vulnerable (- lines) and HOW it was fixed (+ lines).\n" + "- If you find the vulnerable code pattern in the source, the package is VULNERABLE.\n" + "- If you find the fix pattern instead, the package is PATCHED.\n" + "- If neither pattern is found, search for related function/variable names.\n" + "- Base conclusions ONLY on tool results, not assumptions.\n\n" + "ANSWER QUALITY:\n" + "- Cite specific file paths and line numbers from tool results.\n" + "- Quote the actual code found, not just describe it.\n" + "- Explain WHY the code matches or differs from the patch.\n" + "- State confidence level based on evidence quality." +) + +L1_AGENT_PROMPT_TEMPLATE = """{sys_prompt} + + +CVE ID: {vuln_id} +Target Package: {target_package} +CVE Description: {cve_description} + + + +Patch File: {patch_filename} +Files Modified: +{patch_files_summary} + +Key Changes: +{patch_hunks_summary} + + + +{tools} + + + +{tool_selection_strategy} + + +{tool_instructions} + +RESPONSE: +{{""" + +L1_AGENT_THOUGHT_INSTRUCTIONS = """ +1. Output valid JSON only. thought < 100 words. final_answer < 150 words. +2. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +3. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +4. Code Keyword Search: use query field for broader searches. +5. Do NOT call the same tool with the same input twice. +6. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. +7. If a pattern contains special regex characters, escape them or use literal substrings. + + +{{"thought": "Search for the vulnerable function from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code pattern"}}, "final_answer": null}} + + +{{"thought": "Found code. Now verify if it matches vulnerable or fixed version", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable pattern exists"}}, "final_answer": null}} + + +{{"thought": "Evidence gathered", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} +""" + + +# --------------------------------------------------------------------------- +# L1 Agent Helper Functions +# --------------------------------------------------------------------------- + +def format_patch_files_summary(parsed_patch: ParsedPatch | None) -> str: + """Generate a concise summary of files modified by the patch. + + Returns a bullet list showing each file with hunk count and line changes. + + Example output: + - archive_read_support_format_zip.c (3 hunks, +15/-8 lines) + - archive.h (1 hunk, +2/-0 lines, new file) + """ + if not parsed_patch or not parsed_patch.files: + return "No files in patch." + + lines = [] + for pf in parsed_patch.files: + # Extract clean filename from path (strip a/ or b/ prefix) + filename = pf.target_path.lstrip("ab/") + + # Count total added/removed lines across all hunks + added = sum(len(h.added_lines) for h in pf.hunks) + removed = sum(len(h.removed_lines) for h in pf.hunks) + hunk_count = len(pf.hunks) + + # Build descriptor + hunk_word = "hunk" if hunk_count == 1 else "hunks" + desc = f"- {filename} ({hunk_count} {hunk_word}, +{added}/-{removed} lines" + + if pf.is_new_file: + desc += ", new file" + elif pf.is_deleted_file: + desc += ", deleted" + desc += ")" + + lines.append(desc) + + return "\n".join(lines) + + +def format_patch_hunks_summary( + parsed_patch: ParsedPatch | None, + max_hunks: int = 5, + max_lines_per_hunk: int = 5, +) -> str: + """Extract key code changes from patch hunks for LLM context. + + Shows the vulnerable code (removed lines) and fix code (added lines) + in a searchable format. Prioritizes hunks with actual code changes + over pure additions/deletions. + + Args: + parsed_patch: The parsed patch structure + max_hunks: Maximum number of hunks to include (default 5) + max_lines_per_hunk: Max lines to show per removed/added section + + Returns: + Formatted string showing file, line numbers, and code changes. + """ + if not parsed_patch or not parsed_patch.files: + return "No patch content available." + + output_lines: list[str] = [] + hunk_count = 0 + + for pf in parsed_patch.files: + filename = pf.target_path.lstrip("ab/") + + for hunk in pf.hunks: + if hunk_count >= max_hunks: + remaining = sum(len(f.hunks) for f in parsed_patch.files) - hunk_count + if remaining > 0: + output_lines.append(f"\n... and {remaining} more hunks") + return "\n".join(output_lines) + + # Skip hunks with no actual changes (just context) + if not hunk.removed_lines and not hunk.added_lines: + continue + + output_lines.append(f"\nFile: {filename} (line {hunk.source_start})") + + # Show removed lines (vulnerable code) + if hunk.removed_lines: + output_lines.append(" VULNERABLE (removed):") + for line in hunk.removed_lines[:max_lines_per_hunk]: + cleaned = line.strip() + if cleaned: + output_lines.append(f" {cleaned}") + if len(hunk.removed_lines) > max_lines_per_hunk: + output_lines.append( + f" ... (+{len(hunk.removed_lines) - max_lines_per_hunk} more lines)" + ) + + # Show added lines (fix code) + if hunk.added_lines: + output_lines.append(" FIX (added):") + for line in hunk.added_lines[:max_lines_per_hunk]: + cleaned = line.strip() + if cleaned: + output_lines.append(f" {cleaned}") + if len(hunk.added_lines) > max_lines_per_hunk: + output_lines.append( + f" ... (+{len(hunk.added_lines) - max_lines_per_hunk} more lines)" + ) + + hunk_count += 1 + + return "\n".join(output_lines) if output_lines else "No code changes in patch." \ No newline at end of file diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 788a3515c..e43b5fa51 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -27,6 +27,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id from langgraph.graph import StateGraph, START, END +from langgraph.prebuilt import ToolNode from langchain_core.messages import HumanMessage, AIMessage from nat.builder.context import Context @@ -45,17 +46,21 @@ LocateReport, VerifyReport, DownstreamSearchReport, + UpstreamSearchReport, build_identify_subgraph, build_locate_pipeline, build_verify_pipeline, generate_code_agent_report, downstream_search_preprocss, + upstream_search_preprocess ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch logger = LoggingFactory.get_agent_logger(__name__) + + _RPM_NEVRA_RE = re.compile(r"^(.+?)-(?:(\d+):)?(\d\S*?)-(\S+)$") @@ -111,8 +116,24 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer): + # Node name constants + THOUGHT_NODE = "thought" + TOOL_NODE = "tool" + FORCED_FINISH_NODE = "forced_finish" + OBSERVATION_NODE = "observation" + DOWNSTREAM_SEARCH_NODE = "downstream_search" + GATHER_MORE_INFO_NODE = "gather_more_info" + L1_AGENT_NODE = "L1_agent" llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + # Get tool names after filtering for dynamic guidance + enabled_tool_names = [tool.name for tool in tools] + tool_descriptions_list = [t.name + ": " + t.description for t in tools] + tools_node = ToolNode(tools, handle_tool_errors=True) + + vuln_id = state.input.scan.vulns[0].vuln_id ctx = state.info.checker_context intel = state.info.intel @@ -152,97 +173,25 @@ async def lexical_search_fn(query: str) -> list: if aIntel.ubuntu and aIntel.ubuntu.description: descriptions.append(("ubuntu", aIntel.ubuntu.description)) - async def identify_node(state: CodeAgentState) -> dict: - logger.info("identify_node: starting") - - with tracer.push_active_function("identify", input_data={"vuln_id": vuln_id}) as span: - report: IdentifyReport = await build_identify_subgraph( - llm=llm, - descriptions=descriptions, - vuln_id=vuln_id, - package_name=target_package.name, - lexical_search_fn=lexical_search_fn, - tracer=tracer, - ) - span.set_output({ - "approved": [kw.term for kw in report.approved], - "rejected": [kw.term for kw in report.rejected], - "is_sufficient": report.is_sufficient, - }) - - logger.info("identify_node: %s", report.model_dump_json()) - return {"identify_report": report} - - async def locate_node(state: CodeAgentState) -> dict: - logger.info("locate_node: starting") - identify_report = state.get("identify_report") - if identify_report is None or not identify_report.approved: - logger.info("locate_node: no approved keywords, skipping") - return { - "locate_report": LocateReport( - locations=[], diff_available=False, - evidence="Skipped: no approved keywords from Identify phase.", - is_sufficient=False, instructions="No keywords to locate.", - ), - "messages": [AIMessage(content="Locate skipped: no approved keywords.")], - } - - with tracer.push_active_function( - "locate", - input_data={ - "approved_keywords": [k.term for k in identify_report.approved], - "fix_info": fix_info, - }, - ) as span: - report: LocateReport = await build_locate_pipeline( - llm=llm, - identify_report=identify_report, - vuln_id=vuln_id, - descriptions=descriptions, - lexical_search_fn=lexical_search_fn, - fix_info=fix_info, - brew_downloader=brew_downloader, - source_dir=source_dir, - patch_dir=patch_dir, - tracer=tracer, - ) + async def L1_agent(state: CodeAgentState) -> dict: + logger.info("L1_agent: starting") + downstream_report = state.get("downstream_report") + with tracer.push_active_function("L1_agent", input_data={}) as span: + runtime_prompt ="You are a helpful assistant that is investigating a CVE." span.set_output({ - "locations_count": len(report.locations), - "diff_available": report.diff_available, - "is_sufficient": report.is_sufficient, - }) - if report.diff_available: - ctx.artifacts.patch_source_dir = patch_dir - ctx.artifacts.patch_diff_path = patch_dir.parent / "locate.diff" - - logger.info("locate_node: %s", report.model_dump_json()) - return {"locate_report": report, "messages": [AIMessage(content=report.evidence)]} - - async def verify_node(state: CodeAgentState) -> dict: - logger.info("verify_node: starting") - from vuln_analysis.tools.source_inspector import SourceInspector - - identify_report = state.get("identify_report") - locate_report = state.get("locate_report") - build_log = ctx.artifacts.build_log_path if ctx and ctx.artifacts else None - - inspector = SourceInspector(source_dir) - report: VerifyReport = await build_verify_pipeline( - inspector=inspector, - llm=llm, - vuln_id=vuln_id, - descriptions=descriptions, - identify_report=identify_report, - locate_report=locate_report, - build_log_path=Path(build_log) if build_log else None, - tracer=tracer, - ) - - logger.info("verify_node: %s", report.model_dump_json()) + "runtime_prompt": runtime_prompt,}) return { - "verify_report": report, - "messages": [AIMessage(content=report.summary)], + "messages": [AIMessage(content="L1 agent completed")], + "identify_report": report, } + + async def should_continue_downstream(state: CodeAgentState) -> str: + downstream_report = state.get("downstream_report") + if downstream_report.is_patch_file_available: + return "L1_agent" + else: + return "gather_more_info" + async def downstream_search(state: CodeAgentState) -> dict: logger.info("downstream_search: starting") @@ -267,27 +216,87 @@ async def downstream_search(state: CodeAgentState) -> dict: }) return { - "downstream_report": DownstreamSearchReport( - is_patch_file_available=False, - is_patch_in_spec_file=False, - spec_file_log_change="", - is_patch_applied_in_build=False, - build_log_patch_applied="", - patch_details=[], - ), - "messages": [AIMessage(content="Downstream search skipped: no vulnerable locations.")], + "downstream_report": report, + "messages": [AIMessage(content="Downstream flow preprocess completed")], } + async def gather_more_info(state: CodeAgentState) -> dict: + logger.info("gather_more_info: starting") + with tracer.push_active_function("gather_more_info", input_data={}) as span: + report: UpstreamSearchReport = await upstream_search_preprocess( + vuln_id=vuln_id, + fix_info=fix_info, + brew_downloader=brew_downloader, + patch_dir=Path(patch_dir), + source_path=Path(source_dir), + tracer=tracer, + ) + span.set_output({ + "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed, + }) + return { + "messages": [AIMessage(content="Gathering more information...")], + "upstream_report": report, + } + + async def thought_node(state: CodeAgentState) -> dict: + """Generate next thought/action using the LLM.""" + logger.info("thought_node: starting") + # TODO: Implement LLM reasoning to decide next action + return { + "messages": [AIMessage(content="Thinking...")], + } + + async def forced_finish_node(state: CodeAgentState) -> dict: + """Force finish when max iterations reached.""" + logger.info("forced_finish_node: max iterations reached") + # TODO: Implement forced completion logic + return { + "messages": [AIMessage(content="Max iterations reached, forcing finish")], + } + + async def observation_node(state: CodeAgentState) -> dict: + """Process tool output and add observation to state.""" + logger.info("observation_node: starting") + # TODO: Implement observation processing + return { + "messages": [AIMessage(content="Observation recorded")], + } + + async def should_continue(state: CodeAgentState) -> str: + thought = state.get("thought", None) + if thought is not None and thought.mode == "finish": + return END + if state.get("step", 0) >= state.get("max_steps", config.max_iterations): + return FORCED_FINISH_NODE + return TOOL_NODE flow = StateGraph(CodeAgentState) - flow.add_node("identify", identify_node) - flow.add_node("locate", locate_node) - flow.add_node("verify", verify_node) - - flow.add_edge(START, "identify") - flow.add_edge("identify", "locate") - flow.add_edge("locate", "verify") - flow.add_edge("verify", END) + + flow.add_node(DOWNSTREAM_SEARCH_NODE, downstream_search) + flow.add_node(GATHER_MORE_INFO_NODE, gather_more_info) + flow.add_node(L1_AGENT_NODE, L1_agent) + flow.add_node(THOUGHT_NODE, thought_node) + flow.add_node(TOOL_NODE, tools_node) + flow.add_node(FORCED_FINISH_NODE, forced_finish_node) + flow.add_node(OBSERVATION_NODE, observation_node) + + flow.add_edge(START, DOWNSTREAM_SEARCH_NODE) + flow.add_conditional_edges(DOWNSTREAM_SEARCH_NODE, should_continue_downstream, { + L1_AGENT_NODE: L1_AGENT_NODE, + GATHER_MORE_INFO_NODE: GATHER_MORE_INFO_NODE, + }) + flow.add_edge(GATHER_MORE_INFO_NODE, L1_AGENT_NODE) + flow.add_edge(L1_AGENT_NODE, THOUGHT_NODE) + flow.add_conditional_edges( + THOUGHT_NODE, + should_continue, + {END: END, TOOL_NODE: TOOL_NODE, FORCED_FINISH_NODE: FORCED_FINISH_NODE} + ) + flow.add_edge(TOOL_NODE, OBSERVATION_NODE) + flow.add_edge(OBSERVATION_NODE, THOUGHT_NODE) + flow.add_edge(FORCED_FINISH_NODE, END) + app = flow.compile() #app.get_graph().draw_mermaid_png(output_file_path="code_agent_flow.png") From b85aa87fc8320fbea8b65539d95f5b440e3122c5 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sat, 2 May 2026 14:37:05 +0300 Subject: [PATCH 16/46] first React agent loop work --- .../configs/config-http-openai.yml | 7 +- .../functions/code_agent_graph_defs.py | 61 +++--- .../functions/cve_package_code_agent.py | 135 ++++++++++-- .../functions/react_internals.py | 28 +++ src/vuln_analysis/register.py | 9 +- .../tools/lexical_full_search.py | 14 +- src/vuln_analysis/tools/source_grep.py | 145 +++++++++++++ src/vuln_analysis/tools/source_inspector.py | 205 ++++++++++++++++++ 8 files changed, 551 insertions(+), 53 deletions(-) create mode 100644 src/vuln_analysis/tools/source_grep.py create mode 100644 src/vuln_analysis/tools/source_inspector.py diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index fdaef0d86..6326cbbcb 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -162,14 +162,14 @@ functions: _type: cve_checker_segmentation base_checker_dir: .cache/am_cache/checker base_code_index_dir: .cache/am_cache/code_index - cve_package_checker_probe: - _type: cve_package_checker_probe - probe_log_path: .cache/am_cache/checker/probe_results.jsonl cve_package_code_agent: _type: cve_package_code_agent llm_name: cve_agent_executor_llm base_checker_dir: .cache/am_cache/checker base_code_index_dir: .cache/am_cache/code_index + tool_names: + - Source Grep + - Code Keyword Search health_check: _type: health_check @@ -263,7 +263,6 @@ workflow: cve_output_config_name: cve_http_output cve_source_acquisition_name: cve_source_acquisition cve_checker_segmentation_name: cve_checker_segmentation - cve_package_checker_probe_name: cve_package_checker_probe cve_package_code_agent_name: cve_package_code_agent eval: diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 1f841f6a3..d86cb4f0b 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -39,6 +39,7 @@ logger = logging.getLogger(__name__) +from vuln_analysis.functions.react_internals import CheckerThought # --------------------------------------------------------------------------- # Graph state # --------------------------------------------------------------------------- @@ -51,6 +52,12 @@ class CodeAgentState(MessagesState): verify_report: NotRequired[VerifyReport | None] downstream_report: NotRequired[DownstreamSearchReport | None] upstream_report: NotRequired[UpstreamSearchReport | None] + runtime_prompt: NotRequired[str | None] + last_thought: NotRequired[CheckerThought | None] + step: int = Field(default=0, description="The current step number") + max_steps: int = Field(default=10, description="The maximum number of steps") + output: str = Field(default="", description="The output of the last step") + thought: NotRequired[CheckerThought | None] # --------------------------------------------------------------------------- @@ -58,20 +65,20 @@ class CodeAgentState(MessagesState): # --------------------------------------------------------------------------- class DownstreamSearchReport(BaseModel): """Result of a downstream search.""" - is_patch_file_available: bool = Field(description="True if a patch file is available") - patch_file_name: str = Field(description="The name of the patch file") - is_patch_in_spec_file: bool = Field(description="True if a patch file is in the spec file") - spec_file_log_change: str = Field(description="The log change of patchin the spec file") - is_patch_applied_in_build: bool = Field(description="True if a patch file is applied in the build") - build_log_patch_applied: str = Field(description="The patch applied in the build log") - parsed_patch: ParsedPatch = Field(description="The parsed patch file") + is_patch_file_available: bool = Field(default=False, description="True if a patch file is available") + patch_file_name: str = Field(default="", description="The name of the patch file") + is_patch_in_spec_file: bool = Field(default=False, description="True if a patch file is in the spec file") + spec_file_log_change: str = Field(default="", description="The log change of patchin the spec file") + is_patch_applied_in_build: bool = Field(default=False, description="True if a patch file is applied in the build") + build_log_patch_applied: str = Field(default="", description="The patch applied in the build log") + parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed patch file") class UpstreamSearchReport(BaseModel): """Result of an upstream search.""" - is_fixed_srpm_is_needed: bool = Field(description="True if a fixed SRPM is needed downstream style patch files") - fixed_srpm_file_name: str = Field(description="The name of the fixed SRPM file") - fixed_parsed_patch: ParsedPatch = Field(description="The parsed fixed SRPM patch file") + is_fixed_srpm_is_needed: bool = Field(default=False, description="True if a fixed SRPM is needed downstream style patch files") + fixed_srpm_file_name: str = Field(default="", description="The name of the fixed SRPM file") + fixed_parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed fixed SRPM patch file") vulnerable_locations: list[FileLocation] = Field(default_factory=list) reason_cve_code: str = Field( default="", @@ -89,7 +96,7 @@ class UpstreamSearchReport(BaseModel): default="", description="The change of the fixed SRPM in the spec file", ) - spec_fixed_srpm_rebase: bool = Field(description="True if the fixed SRPM is rebased in the spec file") + spec_fixed_srpm_rebase: bool = Field(default=False, description="True if the fixed SRPM is rebased in the spec file") reason_code_fixed_by_rebase: str = Field( default="", description="The reason why the code is fixed by rebase", @@ -1702,7 +1709,7 @@ async def upstream_search_preprocess( report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches) else: report.is_code_fixed_by_rebase = "unknown" - span.set_output({ + span.set_output({ "is_code_fixed_by_rebase": report.is_code_fixed_by_rebase, "spec_file_log_change": report.spec_file_log_change, }) @@ -1762,11 +1769,11 @@ async def upstream_search_preprocess( L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE = ( "You are a security analyst investigating whether a CVE fix has been applied to a package.\n" "You have access to a downstream patch file that contains the fix for this vulnerability.\n\n" - "MANDATORY STEPS (follow in order):\n" - "1. ANALYZE the patch: Identify the vulnerable code (removed lines) and fix code (added lines).\n" - "2. LOCATE the vulnerable code in the source tree using search tools.\n" - "3. VERIFY whether the vulnerable code still exists or has been replaced by the fix.\n" - "4. REASON about the vulnerability status based on evidence.\n\n" + "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" + "- VULNERABLE code (- lines that were removed)\n" + "- FIX code (+ lines that were added)\n\n" + "YOUR FIRST ACTION must be to search for the vulnerable code pattern using Source Grep or Code Keyword Search.\n" + "Then verify whether the code matches the vulnerable or fixed version.\n\n" "CRITICAL RULES:\n" "- The patch shows WHAT was vulnerable (- lines) and HOW it was fixed (+ lines).\n" "- If you find the vulnerable code pattern in the source, the package is VULNERABLE.\n" @@ -1811,20 +1818,24 @@ async def upstream_search_preprocess( {{""" L1_AGENT_THOUGHT_INSTRUCTIONS = """ -1. Output valid JSON only. thought < 100 words. final_answer < 150 words. -2. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -3. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). -4. Code Keyword Search: use query field for broader searches. -5. Do NOT call the same tool with the same input twice. -6. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. -7. If a pattern contains special regex characters, escape them or use literal substrings. +1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +5. Code Keyword Search: use query field for broader searches. +6. Do NOT call the same tool with the same input twice. +7. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. +8. If a pattern contains special regex characters, escape them or use literal substrings. {{"thought": "Search for the vulnerable function from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code pattern"}}, "final_answer": null}} -{{"thought": "Found code. Now verify if it matches vulnerable or fixed version", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable pattern exists"}}, "final_answer": null}} +{{"thought": "Source Grep found no results. Try broader search with Code Keyword Search", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Broader search for the function"}}, "final_answer": null}} + +{{"thought": "Found code. Now verify if it matches vulnerable or fixed version", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable pattern exists"}}, "final_answer": null}} + {{"thought": "Evidence gathered", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} """ diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index e43b5fa51..6cfd3e541 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -28,7 +28,7 @@ from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode -from langchain_core.messages import HumanMessage, AIMessage +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from nat.builder.context import Context from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput @@ -52,14 +52,22 @@ build_verify_pipeline, generate_code_agent_report, downstream_search_preprocss, - upstream_search_preprocess + upstream_search_preprocess, + L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, + L1_AGENT_PROMPT_TEMPLATE, + L1_AGENT_THOUGHT_INSTRUCTIONS, + format_patch_files_summary, + format_patch_hunks_summary, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch -logger = LoggingFactory.get_agent_logger(__name__) +from vuln_analysis.functions.react_internals import CheckerThought +from vuln_analysis.runtime_context import ctx_state +logger = LoggingFactory.get_agent_logger(__name__) +import uuid _RPM_NEVRA_RE = re.compile(r"^(.+?)-(?:(\d+):)?(\d\S*?)-(\S+)$") @@ -91,6 +99,21 @@ def _parse_fix_info_from_context(ctx, target_name: str) -> dict: return {} +def _build_tool_strategy(tool_names: list[str]) -> str: + """Generate tool usage guidance based on available tools.""" + strategies = [] + tool_names_lower = [t.lower().replace("_", " ") for t in tool_names] + + if any("grep" in t for t in tool_names_lower): + strategies.append("- Use Source Grep for exact code patterns from patch (function names, variable names, specific code)") + if any("keyword" in t or "search" in t for t in tool_names_lower): + strategies.append("- Use Code Keyword Search for broader concept searches when grep fails") + if any("read" in t for t in tool_names_lower): + strategies.append("- Use Read File to examine full context around matches") + + return "\n".join(strategies) if strategies else "Use available tools to search for vulnerable and fixed code patterns." + + class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent"): """ Level 1 Package Code Agent. Investigates each CVE using extracted source @@ -117,7 +140,7 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer): # Node name constants - THOUGHT_NODE = "thought" + THOUGHT_NODE = "think_node" TOOL_NODE = "tool" FORCED_FINISH_NODE = "forced_finish" OBSERVATION_NODE = "observation" @@ -128,6 +151,7 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + thought_llm = llm.with_structured_output(CheckerThought) # Get tool names after filtering for dynamic guidance enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] @@ -176,18 +200,56 @@ async def lexical_search_fn(query: str) -> list: async def L1_agent(state: CodeAgentState) -> dict: logger.info("L1_agent: starting") downstream_report = state.get("downstream_report") + upstream_report = state.get("upstream_report") + with tracer.push_active_function("L1_agent", input_data={}) as span: - runtime_prompt ="You are a helpful assistant that is investigating a CVE." - span.set_output({ - "runtime_prompt": runtime_prompt,}) + if downstream_report and downstream_report.is_patch_file_available: + parsed_patch = downstream_report.parsed_patch + + patch_files_summary = format_patch_files_summary(parsed_patch) + patch_hunks_summary = format_patch_hunks_summary(parsed_patch) + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + tools_str = "\n".join(tool_descriptions_list) + tool_strategy = _build_tool_strategy(enabled_tool_names) + + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( + sys_prompt=L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + patch_filename=downstream_report.patch_file_name, + patch_files_summary=patch_files_summary, + patch_hunks_summary=patch_hunks_summary, + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_INSTRUCTIONS, + ) + + span.set_output({ + "mode": "patch_available", + "patch_filename": downstream_report.patch_file_name, + "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, + }) + else: + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + runtime_prompt = ( + "You are a security analyst investigating a CVE.\n\n" + f"CVE ID: {vuln_id}\n" + f"Target Package: {target_package.name}\n" + f"CVE Description: {cve_description}\n\n" + "No downstream patch file is available. Use upstream analysis to determine vulnerability status." + ) + span.set_output({ + "mode": "no_patch", + }) + return { - "messages": [AIMessage(content="L1 agent completed")], - "identify_report": report, + "runtime_prompt": runtime_prompt, } async def should_continue_downstream(state: CodeAgentState) -> str: downstream_report = state.get("downstream_report") - if downstream_report.is_patch_file_available: + if downstream_report and downstream_report.is_patch_file_available: return "L1_agent" else: return "gather_more_info" @@ -212,7 +274,7 @@ async def downstream_search(state: CodeAgentState) -> dict: "spec_file_log_change": report.spec_file_log_change, "is_patch_applied_in_build": report.is_patch_applied_in_build, "build_log_patch_applied": report.build_log_patch_applied, - "patch_details": report.patch_details, + "parsed_patch": report.parsed_patch.patch_filename if report.parsed_patch else None, }) return { @@ -241,10 +303,33 @@ async def gather_more_info(state: CodeAgentState) -> dict: async def thought_node(state: CodeAgentState) -> dict: """Generate next thought/action using the LLM.""" - logger.info("thought_node: starting") - # TODO: Implement LLM reasoning to decide next action + step_num = state.get("step", 0) + logger.info("thought_node: starting step %d", step_num) + runtime_prompt = state.get("runtime_prompt") or "You are a security analyst investigating a CVE." + messages = [SystemMessage(content=runtime_prompt)] + state["messages"] + with tracer.push_active_function("thought_node", input_data=messages) as span: + response: CheckerThought = await thought_llm.ainvoke(messages) + if response.mode == "finish": + ai_message = AIMessage(content=response.final_answer) + else: + tool_name = response.actions.tool + arguments = response.actions.query + tool_call_id = str(uuid.uuid4()) + ai_message = AIMessage( + content=response.thought, + tool_calls=[{"name": tool_name, "args": {"query": arguments}, "id": tool_call_id}] + ) + span.set_output({ + "thought": response.thought, + "mode": response.mode, + "actions": response.actions, + "final_answer": response.final_answer, + }) return { - "messages": [AIMessage(content="Thinking...")], + "messages": [ai_message], + "thought": response, + "step": step_num + 1, + "max_steps": config.max_iterations, } async def forced_finish_node(state: CodeAgentState) -> dict: @@ -258,7 +343,21 @@ async def forced_finish_node(state: CodeAgentState) -> dict: async def observation_node(state: CodeAgentState) -> dict: """Process tool output and add observation to state.""" logger.info("observation_node: starting") - # TODO: Implement observation processing + tool_message = state["messages"][-1] + last_thought = state.get("thought") + if not last_thought: + return { + "messages": [AIMessage(content="No thought found")], + } + last_thought_text = last_thought.thought + tool_used = last_thought.actions.tool + tool_input_detail = last_thought.actions.query + with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: + tool_output_for_llm = tool_message.content + span.set_output({ + "last_thought_text": last_thought_text, + "tool_output_for_llm": tool_output_for_llm, + }) return { "messages": [AIMessage(content="Observation recorded")], } @@ -395,6 +494,12 @@ async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Bui async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: trace_id.set(message.input.scan.id) tracer = Context.get() + + # Set ctx_state so tools (e.g., Source Grep, Lexical Search) can access checker_context + from types import SimpleNamespace + workflow_state = SimpleNamespace(original_input=message, info=message.info) + ctx_state.set(workflow_state) + logger.info("package_code_agent: starting L1 investigation") l1_agent_graph = await create_graph_code_agent(config, builder, message, tracer) diff --git a/src/vuln_analysis/functions/react_internals.py b/src/vuln_analysis/functions/react_internals.py index 7ec52bb27..5de50a13e 100644 --- a/src/vuln_analysis/functions/react_internals.py +++ b/src/vuln_analysis/functions/react_internals.py @@ -42,6 +42,34 @@ class Thought(BaseModel): max_length=3000, ) + +class CheckerToolCall(BaseModel): + """Tool call for RPM checker flow - simpler schema with just query.""" + tool: str = Field(description="Exact tool name from AVAILABLE_TOOLS") + query: str = Field(description="Search pattern for Source Grep or Code Keyword Search") + reason: str = Field(description="Briefly explain why this tool helps the investigation") + + +class CheckerThought(BaseModel): + """Thought model for RPM checker flow with simplified tool call schema.""" + thought: str = Field( + description="Brief reasoning about next step (max 3-4 sentences)", + max_length=3000, + ) + mode: Literal["act", "finish"] = Field( + description="'act' to call tools, 'finish' to return final answer" + ) + actions: CheckerToolCall | None = Field( + default=None, + description="When mode is 'act', the tool to execute" + ) + final_answer: str | None = Field( + default=None, + description="When mode is 'finish', concise answer with evidence", + max_length=3000, + ) + + class CodeFindings(BaseModel): """Compressed code comprehension output from raw tool results.""" findings: list[str] = Field( diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 1b0ee7a30..826aa1f81 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -39,7 +39,6 @@ from vuln_analysis.functions import cve_generate_vdbs from vuln_analysis.functions import cve_http_output from vuln_analysis.functions import cve_justify -from vuln_analysis.functions import cve_package_checker_probe from vuln_analysis.functions import cve_package_code_agent from vuln_analysis.functions import cve_checker_segmentation from vuln_analysis.functions import cve_source_acquisition @@ -54,6 +53,7 @@ from vuln_analysis.tools import container_image_analysis_data from vuln_analysis.tools import local_vdb from vuln_analysis.tools import serp +from vuln_analysis.tools import source_grep from vuln_analysis.utils.error_handling_decorator import catch_pipeline_errors_async # pylint: enable=unused-import from vuln_analysis.utils.llm_engine_utils import postprocess_engine_output, finalize_preprocess_engine_input @@ -90,9 +90,6 @@ class CVEAgentWorkflowConfig(FunctionBaseConfig, name="cve_agent"): default=None, description="Function name for scoped code indexing of extracted checker sources (Tantivy only)", ) - cve_package_checker_probe_name: str | None = Field( - default=None, - description="Function name for the package checker probe (logs package identification data per CVE)") cve_package_code_agent_name: str | None = Field( default=None, description="Function name for the Level 1 Package Code Agent (source-level CVE investigation)", @@ -121,10 +118,6 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder): cve_generate_vex_fn = builder.get_function(name=config.cve_generate_vex_name) cve_generate_cvss_fn = builder.get_function(name=config.cve_generate_cvss_name) cve_output_fn = builder.get_function(name=config.cve_output_config_name) if config.cve_output_config_name else None - cve_package_checker_probe_fn = ( - builder.get_function(name=config.cve_package_checker_probe_name) - if config.cve_package_checker_probe_name else None - ) cve_source_acquisition_fn = ( builder.get_function(name=config.cve_source_acquisition_name) if config.cve_source_acquisition_name else None diff --git a/src/vuln_analysis/tools/lexical_full_search.py b/src/vuln_analysis/tools/lexical_full_search.py index 0b24fcc1e..3ef0032f3 100644 --- a/src/vuln_analysis/tools/lexical_full_search.py +++ b/src/vuln_analysis/tools/lexical_full_search.py @@ -21,6 +21,7 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from exploit_iq_commons.data_models.input import PipelineMode from vuln_analysis.utils.error_handling_decorator import catch_tool_errors LEXICAL_CODE_SEARCH = "lexical_code_search" @@ -33,6 +34,10 @@ class LexicalSearchToolConfig(FunctionBaseConfig, name=LEXICAL_CODE_SEARCH): Lexical search tool used to search source code. """ top_k: int = Field(default=5, description="Top K to use for the lexical search") + base_code_index_dir: str = Field( + default=".cache/am_cache/code_index", + description="Base directory for Tantivy code index storage.", + ) @register_function(config_type=LexicalSearchToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) @@ -43,7 +48,14 @@ async def lexical_search(config: LexicalSearchToolConfig, builder: Builder): # @catch_tool_errors(LEXICAL_CODE_SEARCH) async def _arun(query: str) -> str: workflow_state = ctx_state.get() - code_index_path = workflow_state.code_index_path + + pipeline_mode = workflow_state.original_input.input.image.pipeline_mode + if pipeline_mode == PipelineMode.PACKAGE_CHECKER: + source_key = workflow_state.info.checker_context.source_key + code_index_path = str(FullTextSearch.get_index_directory(config.base_code_index_dir, source_key)) + else: + code_index_path = workflow_state.info.vdb.code_index_path + full_text_search = FullTextSearch(cache_path=code_index_path) if full_text_search.is_empty(): diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py new file mode 100644 index 000000000..508560b35 --- /dev/null +++ b/src/vuln_analysis/tools/source_grep.py @@ -0,0 +1,145 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Native Unix grep tool for fast source code searching. + +Provides an LLM-callable tool that uses native grep subprocess for +faster searching compared to Python-based regex scanning. +""" + +from pathlib import Path + +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from vuln_analysis.tools.source_inspector import SourceInspector +from vuln_analysis.utils.error_handling_decorator import catch_tool_errors + +SOURCE_GREP = "source_grep" + +logger = LoggingFactory.get_agent_logger(__name__) + + +class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): + """Fast grep search using native Unix grep subprocess.""" + + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts.", + ) + max_results: int = Field( + default=50, + description="Maximum number of grep results to return.", + ) + context_lines: int = Field( + default=2, + description="Number of context lines around each match.", + ) + + +def _parse_query(query: str) -> tuple[str, str | None]: + """Parse query string into (pattern, file_glob). + + Supports formats: + - "pattern" -> search all source files + - "pattern,*.c" -> search only .c files + - "pattern,*.h" -> search only headers + """ + query = query.strip().strip('"').strip("'") + if "," in query: + parts = query.split(",", 1) + pattern = parts[0].strip() + file_glob = parts[1].strip() if len(parts) > 1 else None + return pattern, file_glob + return query, None + + +def _format_results(pattern: str, matches: list, root: Path) -> str: + """Format grep results for LLM consumption.""" + if not matches: + return f"No matches found for '{pattern}'" + + lines = [f"Found {len(matches)} match(es) for '{pattern}':\n"] + for i, match in enumerate(matches, 1): + try: + rel_path = match.file_path.relative_to(root) + except ValueError: + rel_path = match.file_path + lines.append(f"{i}. {rel_path}:{match.line_number}") + lines.append(f" {match.line_content.strip()}") + lines.append("") + + return "\n".join(lines) + + +@register_function(config_type=SourceGrepToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def source_grep(config: SourceGrepToolConfig, builder: Builder): # pylint: disable=unused-argument + from vuln_analysis.runtime_context import ctx_state + + @catch_tool_errors(SOURCE_GREP) + async def _arun(query: str) -> str: + """Search source code using native Unix grep. + + Query format: 'pattern' or 'pattern,file_glob' + Examples: + - 'GENERAL_NAME_cmp' - search all source files + - 'GENERAL_NAME_cmp,*.c' - search only .c files + - 'archive_read_open,*.h' - search only headers + """ + workflow_state = ctx_state.get() + + checker_context = None + if workflow_state.original_input and workflow_state.original_input.info: + checker_context = workflow_state.original_input.info.checker_context + + if checker_context is None or not checker_context.source_key: + raise ValueError("Checker context or source_key not available in workflow state") + + source_key = checker_context.source_key + source_dir = (Path(config.base_checker_dir) / source_key / "source").resolve() + + if not source_dir.is_dir(): + raise ValueError(f"Source directory does not exist: {source_dir}") + + inspector = SourceInspector(source_dir) + pattern, file_glob = _parse_query(query) + + logger.info("Source grep: searching for '%s' in %s (glob: %s)", + pattern, source_dir, file_glob or "all source files") + + matches = await inspector.grep_native( + pattern=pattern, + file_glob=file_glob, + context_lines=config.context_lines, + max_results=config.max_results, + ) + + logger.info("Source grep: found %d matches for '%s'", len(matches), pattern) + return _format_results(pattern, matches, source_dir) + + yield FunctionInfo.from_fn( + _arun, + description=( + "Fast grep search in source code using native Unix grep. " + "Input: 'pattern' or 'pattern,file_glob'. " + "Examples: 'GENERAL_NAME_cmp' searches all source files, " + "'GENERAL_NAME_cmp,*.c' searches only C files." + ), + ) diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py new file mode 100644 index 000000000..29a23b89c --- /dev/null +++ b/src/vuln_analysis/tools/source_inspector.py @@ -0,0 +1,205 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generic filesystem utility for inspecting extracted RPM source trees. + +Provides low-level primitives (find, grep, read) that can be composed by +pipeline code or called by an LLM agent in the future. +""" + +from __future__ import annotations + +import asyncio +import re +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class GrepMatch: + """A single regex match inside a file.""" + file_path: Path + line_number: int + line_content: str + + +class SourceInspector: + """Filesystem inspector scoped to a root directory. + + All returned paths are absolute. The class carries no domain-specific + logic (RPM, spec, changelog); callers compose the primitives for that. + """ + + def __init__(self, source_dir: Path) -> None: + self._root = source_dir.resolve() + if not self._root.is_dir(): + raise FileNotFoundError(f"source_dir does not exist: {self._root}") + + @property + def root(self) -> Path: + return self._root + + def find_files(self, pattern: str, recursive: bool = True) -> list[Path]: + """Glob over the source tree. + + Parameters + ---------- + pattern: + Shell glob pattern, e.g. ``"*.spec"`` or ``"*.patch"``. + recursive: + If *True* use ``**/`` (deep search). + If *False* use ```` (root-level only). + """ + glob_expr = f"**/{pattern}" if recursive else pattern + return sorted(self._root.glob(glob_expr)) + + def grep_content( + self, + pattern: str, + file_path: Path | None = None, + *, + recursive: bool = False, + ) -> list[GrepMatch]: + """Search file contents for a regex *pattern*. + + Parameters + ---------- + pattern: + Regular expression (case-sensitive by default). + file_path: + If given, search that file only, or (if it is a directory) every file + in that directory (one level, regular files only). + If the path does not exist, return no matches. + If *None*, search every file under *source_dir* + (depth controlled by *recursive*). + recursive: + Only used when *file_path* is ``None``. + ``False`` searches only root-level files; ``True`` walks the tree. + """ + regex = re.compile(pattern) + matches: list[GrepMatch] = [] + + if file_path is not None: + resolved = file_path.resolve() + if resolved.is_file(): + targets = [resolved] + elif resolved.is_dir(): + targets = sorted(p for p in resolved.iterdir() if p.is_file()) + else: + targets = [] + elif recursive: + targets = sorted(p for p in self._root.rglob("*") if p.is_file()) + else: + targets = sorted(p for p in self._root.iterdir() if p.is_file()) + + for fp in targets: + try: + lines = fp.read_text(encoding="utf-8", errors="replace").splitlines() + except (OSError, UnicodeDecodeError): + continue + for idx, line in enumerate(lines, start=1): + if regex.search(line): + matches.append(GrepMatch(file_path=fp, line_number=idx, line_content=line)) + return matches + + async def grep_native( + self, + pattern: str, + file_glob: str | None = None, + *, + case_insensitive: bool = False, + context_lines: int = 0, + max_results: int = 50, + ) -> list[GrepMatch]: + """Fast grep using native Unix grep subprocess. + + Parameters + ---------- + pattern: + Search pattern (passed to grep as-is, supports basic regex). + file_glob: + Optional file pattern (e.g., ``"*.c"``, ``"*.h"``). If *None*, searches + common source extensions: .c, .h, .cpp, .hpp, .py, .go, .java + case_insensitive: + If *True*, perform case-insensitive matching (``-i`` flag). + context_lines: + Lines of context around match (``-C`` flag). Default 0. + max_results: + Stop after this many matches (``-m`` flag). Default 50. + + Returns + ------- + list[GrepMatch] + Matches found, with file paths relative to source root removed + from line_content for cleaner output. + """ + cmd = ["grep", "-rn", "-I"] + + if case_insensitive: + cmd.append("-i") + if context_lines > 0: + cmd.extend(["-C", str(context_lines)]) + + if file_glob: + cmd.extend(["--include", file_glob]) + else: + for ext in ["*.c", "*.h", "*.cpp", "*.hpp", "*.py", "*.go", "*.java"]: + cmd.extend(["--include", ext]) + + cmd.extend(["-m", str(max_results), "--", pattern, str(self._root)]) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + + results: list[GrepMatch] = [] + for line in stdout.decode("utf-8", errors="replace").splitlines(): + parts = line.split(":", 2) + if len(parts) >= 3: + try: + line_num = int(parts[1]) + except ValueError: + continue + results.append(GrepMatch( + file_path=Path(parts[0]), + line_number=line_num, + line_content=parts[2], + )) + return results + + def read_file( + self, + file_path: Path, + offset: int = 0, + max_lines: int | None = None, + ) -> str: + """Read file content starting from a line *offset*. + + Parameters + ---------- + file_path: + Absolute or relative path (resolved against *source_dir*). + offset: + 0-based line offset to start reading from. + max_lines: + Maximum number of lines to return. ``None`` means read to EOF. + """ + resolved = file_path if file_path.is_absolute() else (self._root / file_path) + lines = resolved.read_text(encoding="utf-8", errors="replace").splitlines() + end = (offset + max_lines) if max_lines is not None else len(lines) + return "\n".join(lines[offset:end]) From d18da881f6ef104e1ab46ecade9b4624073a5fac Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sat, 2 May 2026 16:35:48 +0300 Subject: [PATCH 17/46] improve report for downstream L1 --- .../functions/code_agent_graph_defs.py | 497 ++++++++++++++++-- .../functions/cve_package_code_agent.py | 157 +++++- 2 files changed, 578 insertions(+), 76 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index d86cb4f0b..a75c9637e 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -68,9 +68,28 @@ class DownstreamSearchReport(BaseModel): is_patch_file_available: bool = Field(default=False, description="True if a patch file is available") patch_file_name: str = Field(default="", description="The name of the patch file") is_patch_in_spec_file: bool = Field(default=False, description="True if a patch file is in the spec file") - spec_file_log_change: str = Field(default="", description="The log change of patchin the spec file") + spec_file_log_change: str = Field( + default="", + description="All lines in the .spec file that match a grep for the CVE id (not changelog-only)", + ) is_patch_applied_in_build: bool = Field(default=False, description="True if a patch file is applied in the build") build_log_patch_applied: str = Field(default="", description="The patch applied in the build log") + spec_patch_directives_for_cve: list[str] = Field( + default_factory=list, + description="Raw PatchN: lines from the spec whose patch filename token matches this CVE", + ) + spec_changelog_cve_lines: str = Field( + default="", + description="Lines from the %changelog section of the .spec that mention the CVE", + ) + spec_source0_line: str = Field( + default="", + description="The Source0: line from the spec file (upstream tarball reference)", + ) + spec_version_line: str = Field( + default="", + description="The Version: line from the spec file", + ) parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed patch file") class UpstreamSearchReport(BaseModel): @@ -296,6 +315,17 @@ class VerifyReport(ReflectionBase): # --------------------------------------------------------------------------- +class CodeSnippet(BaseModel): + """A code snippet from the investigation.""" + file_path: str = Field(description="Path to the source file") + line_number: int | None = Field(default=None, description="Starting line number") + code: str = Field(description="The code content") + snippet_type: Literal["vulnerable", "fix", "context"] = Field( + description="Type of snippet: vulnerable code, fix code, or context") + source: Literal["downstream_patch", "upstream_patch", "source_search"] = Field( + description="Where this snippet came from") + + class CodeAgentReport(BaseModel): """Final L1 Code Agent investigation report synthesizing all phases.""" confidence: float = Field( @@ -320,6 +350,9 @@ class CodeAgentReport(BaseModel): description="Gaps or uncertainties in the investigation") recommendation: str = Field( description="Actionable recommendation based on the verdict") + code_snippets: list[CodeSnippet] = Field( + default_factory=list, + description="Structured code snippets showing vulnerable and fix code") def to_markdown( self, @@ -327,6 +360,8 @@ def to_markdown( target_package: str = "", version: str = "", release: str = "", + downstream_report: DownstreamSearchReport | None = None, + policy_context: str | None = None, ) -> str: """Render the report as a formatted markdown string.""" lines = [ @@ -353,6 +388,18 @@ def to_markdown( ]) lines.extend(f"- {ev}" for ev in self.evidence_chain) + if policy_context: + lines.extend([ + "", + "## Red Hat / package identification context", + "", + policy_context, + ]) + + if downstream_report is not None: + lines.append("") + lines.extend(_format_extracted_facts_section(downstream_report)) + if self.affected_files: lines.extend(["", "## Affected Files"]) lines.extend(f"- `{f}`" for f in self.affected_files) @@ -366,6 +413,16 @@ def to_markdown( lines.extend(["", "## Recommendation", self.recommendation]) + if self.code_snippets: + lines.extend(["", "## Code Snippets"]) + for snippet in self.code_snippets: + lines.append(f"\n### {snippet.snippet_type.title()} - `{snippet.file_path}`") + if snippet.line_number: + lines.append(f"Line {snippet.line_number} (source: {snippet.source}):") + else: + lines.append(f"(source: {snippet.source})") + lines.append(f"```\n{snippet.code}\n```") + return "\n".join(lines) @@ -458,7 +515,7 @@ def to_markdown( CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. -Synthesize the results from all three investigation phases (Identify, Locate, Verify) +Synthesize the results from the downstream search, upstream search, and L1 agent analysis into a comprehensive, auditable report with a clear justification and supporting evidence. @@ -468,15 +525,16 @@ def to_markdown( CVE Description: {cve_description} +{policy_context_section} -## Identify Phase -{identify_section} +## Downstream Search +{downstream_section} -## Locate Phase -{locate_section} +## Upstream Search +{upstream_section} -## Verify Phase -{verify_section} +## L1 Agent Analysis +{l1_agent_section} @@ -488,31 +546,48 @@ def to_markdown( - vulnerable: Package is actually vulnerable and needs patching - uncertain: Insufficient information to determine exploitability - PHASE PRECEDENCE RULES: - - If Verify phase provides definitive evidence (PATCHED or VULNERABLE with confidence >= 0.8), - this takes precedence over failures in Identify or Locate phases. - - Failed keyword/location searches do NOT imply vulnerability if patches are confirmed downstream. - - A "PATCHED" verdict from Verify maps to "protected_by_mitigating_control". - - A "VULNERABLE" verdict from Verify maps to "vulnerable". - - Only use "uncertain" when Verify phase is INCONCLUSIVE or no phases produced strong evidence. + PRECEDENCE RULES: + - If a CVE-specific patch file exists AND is applied in build, use "protected_by_mitigating_control". + - If L1 agent found the fix code in source, use "protected_by_mitigating_control". + - If L1 agent found vulnerable code pattern still present, use "vulnerable". + - If upstream shows rebase fixed the issue, use "protected_by_mitigating_control". + - Only use "uncertain" when evidence is conflicting or insufficient. 2. EVIDENCE CHAIN: - - Trace the investigation through available phases - - If Identify/Locate succeeded: cite keywords -> locations -> verification - - If Identify/Locate failed but Verify succeeded: explain that patches were found - directly in build artifacts/changelogs, making keyword search unnecessary - - Cite specific files, line numbers, and code snippets - - Reference patch filenames and changelog entries when applicable - -3. LIMITATIONS: - - Note any missing data (no diff available, no build log, etc.) + - Start with downstream patch availability + - Include L1 agent's code search findings (vulnerable vs fix patterns) + - Reference specific files, line numbers, and code snippets + - Summarize findings; the rendered report places an "Extracted facts" section **after** the Evidence chain with verbatim spec Patch lines, changelog hits, and build log lines (when available)—do not invent `PatchN:` numbers or spec quotes; only state patch indices you could derive from the investigation text below, or point readers to *Extracted facts* for exact lines + +3. CODE SNIPPETS: + - Extract key code snippets from patches showing vulnerable and fix code + - Include file paths and line numbers + - Mark each snippet as "vulnerable", "fix", or "context" + - When downstream investigation includes a parsed patch, code_snippets may be filled programmatically from that patch; use an empty code_snippets list if you do not have verbatim lines to copy. + - Always populate affected_files with CVE-relevant source paths so patch hunks can be prioritized. + +4. LIMITATIONS: + - Note any missing data (no patch file, no build log, etc.) - Flag low-confidence findings that need manual review -4. RECOMMENDATION: +5. RECOMMENDATION: - Provide actionable guidance based on the justification - For protected_by_mitigating_control: confirm the fix is deployed - For vulnerable: recommend patching or mitigation - For uncertain: suggest next investigation steps + +6. EXECUTIVE SUMMARY (tie to NVR posture): + - When RED_HAT_PACKAGE_CONTEXT lists fixed/affected NVRs or RHSA excerpts, relate the **scanned NVR** to that posture in plain language (e.g. "The scanned build matches the fixed NVR…" or "Scanned NVR is listed as affected; verify fix deployment"). + - Do NOT invent RHSA IDs or advisory references not present in the context above. + - If no identify context is provided, base the summary solely on downstream/upstream investigation results. + +7. PATCH ANALYSIS (semantic fix narrative): + - When downstream patch evidence exists, briefly describe **what** the fix does: name the function(s) or file(s) and the nature of the change (e.g. "adds range validation 15–17 in parse_rockridge_ZF1"). + - Derive this from Downstream Search summary, patch file names, or L1 agent code excerpts—do NOT invent code or function names absent from investigation results. + +8. DELIVERY MODEL: + - When a CVE-named patch file is present, explicitly note that the fix is carried as a separate `%patch` directive while the upstream tarball (`Source0`) version may remain unchanged. + - Encourage citing "Extracted facts" for exact spec `PatchN:` and `Source0`/`Version` lines when shown below. @@ -528,6 +603,8 @@ def to_markdown( - patch_analysis: analysis of patches (or null if none) - limitations: list of investigation gaps - recommendation: actionable next step +- code_snippets: list of code snippets with file_path, line_number, code, snippet_type, source (may be overwritten from the downstream patch when one is parsed) +- affected_files: required for prioritizing patch excerpts when a downstream patch exists Ensure all code snippets and special characters within JSON string values are properly escaped (e.g., quotes as \", backslashes as \\, newlines as \\n) to maintain valid JSON format. @@ -540,6 +617,81 @@ def to_markdown( # --------------------------------------------------------------------------- MAX_SNIPPET_CHARS = 500 +L1_EXTRACTED_FACTS_EXCERPT_CHARS = 2000 + + +def _cap_text_excerpt(text: str, max_chars: int) -> tuple[str, bool]: + """Return (possibly truncated) text and whether truncation occurred.""" + t = text.strip() + if len(t) <= max_chars: + return t, False + return t[: max_chars] + "\n[… truncated …]", True + + +def _format_extracted_facts_section( + d: DownstreamSearchReport, + *, + max_excerpt: int = L1_EXTRACTED_FACTS_EXCERPT_CHARS, +) -> list[str]: + """Build markdown lines for the deterministic *Extracted facts* block.""" + lines: list[str] = [ + "## Extracted facts", + "", + "*Verbatim excerpts from spec/build grep and parsers. Narrative sections below are model-generated.*", + "", + ] + lines.append(f"- **Downstream patch file found:** {d.is_patch_file_available}") + if d.patch_file_name: + lines.append(f"- **Patch file name:** `{d.patch_file_name}`") + lines.append(f"- **Patch referenced in spec (CVE grep):** {d.is_patch_in_spec_file}") + lines.append(f"- **Build log shows CVE / patch application:** {d.is_patch_applied_in_build}") + lines.append("") + + if d.spec_patch_directives_for_cve: + lines.append("**Spec `PatchN:` line(s) whose patch filename contains this CVE:**") + block = "\n".join(d.spec_patch_directives_for_cve) + lines.extend(["", "```", block, "```", ""]) + else: + lines.extend(["**Spec `PatchN:` line(s) whose patch filename contains this CVE:** *None found*", ""]) + + if d.spec_changelog_cve_lines.strip(): + ex, trunc = _cap_text_excerpt(d.spec_changelog_cve_lines, max_excerpt) + sub = f" (truncated to ~{max_excerpt} chars)" if trunc else "" + lines.append(f"**%changelog line(s) mentioning this CVE:**{sub}") + lines.extend(["", "```", ex, "```", ""]) + else: + lines.extend(["**%changelog line(s) mentioning this CVE:** *No matching lines* ", ""]) + + if d.spec_file_log_change.strip(): + ex, trunc = _cap_text_excerpt(d.spec_file_log_change, max_excerpt) + hdr = "**All spec lines matching CVE grep (may include Patch, changelog, comments):**" + if trunc: + hdr += f" *({max_excerpt} char excerpt)*" + lines.append(hdr) + lines.extend(["", "```", ex, "```", ""]) + else: + lines.extend(["**All spec lines matching CVE grep:** *None*", ""]) + + if d.build_log_patch_applied.strip(): + ex, trunc = _cap_text_excerpt(d.build_log_patch_applied, max_excerpt) + hdr = "**Build log line(s) matching CVE grep:**" + if trunc: + hdr += f" *({max_excerpt} char excerpt)*" + lines.append(hdr) + lines.extend(["", "```", ex, "```", ""]) + else: + lines.extend(["**Build log line(s) matching CVE grep:** *None or build log not available* ", ""]) + + # Spec tarball reference (Source0/Version) for delivery-model context + if d.spec_version_line or d.spec_source0_line: + lines.append("**Spec tarball reference:**") + if d.spec_version_line: + lines.append(f"- `{d.spec_version_line}`") + if d.spec_source0_line: + lines.append(f"- `{d.spec_source0_line}`") + lines.append("") + + return lines def _format_identify_for_report(report: IdentifyReport | None) -> str: @@ -645,10 +797,211 @@ def _format_verify_for_report(report: VerifyReport | None) -> str: return "\n".join(lines) +def _format_downstream_for_report(report: DownstreamSearchReport | None) -> str: + """Format Downstream search results for prompt injection.""" + if report is None: + return "Downstream search did not produce results." + + lines = [] + lines.append(f"**Patch File Available:** {report.is_patch_file_available}") + + if report.is_patch_file_available: + lines.append(f"**Patch File:** `{report.patch_file_name}`") + lines.append(f"**In Spec File:** {report.is_patch_in_spec_file}") + if report.spec_file_log_change: + lines.append(f"**Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```") + lines.append(f"**Applied in Build:** {report.is_patch_applied_in_build}") + if report.build_log_patch_applied: + lines.append(f"**Build Log Evidence:**\n```\n{report.build_log_patch_applied[:500]}\n```") + + if report.parsed_patch: + lines.append(f"\n**Parsed Patch ({len(report.parsed_patch.files)} files):**") + for pf in report.parsed_patch.files[:5]: + added = sum(len(h.added_lines) for h in pf.hunks) + removed = sum(len(h.removed_lines) for h in pf.hunks) + lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)") + if len(report.parsed_patch.files) > 5: + lines.append(f" (+{len(report.parsed_patch.files) - 5} more files)") + else: + lines.append("No CVE-specific patch file found in downstream package.") + + return "\n".join(lines) + + +def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str: + """Format Upstream search results for prompt injection.""" + if report is None: + return "Upstream search did not produce results." + + lines = [] + lines.append(f"**Fixed by Rebase:** {report.is_code_fixed_by_rebase}") + + if report.spec_file_log_change: + lines.append(f"**Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```") + + if report.is_fixed_srpm_is_needed: + lines.append(f"**Fixed SRPM Available:** Yes") + lines.append(f"**Fixed SRPM File:** `{report.fixed_srpm_file_name}`") + if report.fixed_parsed_patch: + lines.append(f"\n**Fixed Patch ({len(report.fixed_parsed_patch.files)} files):**") + for pf in report.fixed_parsed_patch.files[:5]: + added = sum(len(h.added_lines) for h in pf.hunks) + removed = sum(len(h.removed_lines) for h in pf.hunks) + lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)") + + if report.spec_fixed_srpm_rebase: + lines.append(f"**SRPM Rebased:** Yes") + if report.spec_fixed_srpm_change: + lines.append(f"**Rebase Changes:**\n```\n{report.spec_fixed_srpm_change[:500]}\n```") + + if report.vulnerable_locations: + lines.append(f"\n**Vulnerable Locations ({len(report.vulnerable_locations)}):**") + for loc in report.vulnerable_locations[:5]: + line_info = f":{loc.line_number}" if loc.line_number else "" + lines.append(f"- `{loc.file_path}{line_info}`") + if loc.snippet: + snippet = loc.snippet[:200] + "..." if len(loc.snippet) > 200 else loc.snippet + lines.append(f" ```\n{snippet}\n ```") + + if report.reason_code_fixed_by_rebase: + lines.append(f"\n**Rebase Reasoning:** {report.reason_code_fixed_by_rebase}") + + return "\n".join(lines) + + # --------------------------------------------------------------------------- # Report generation pipeline # --------------------------------------------------------------------------- +MAX_REPORT_CODE_SNIPPETS_VULNERABLE = 3 +MAX_REPORT_CODE_SNIPPETS_FIX = 3 + + +def _normalize_snippet_path(path: str) -> str: + """Stable comparison key for patch vs affected file paths.""" + p = path.strip().replace("\\", "/") + while p.startswith("./"): + p = p[2:] + if p.startswith("ab/"): + p = p[3:] + return p.lower() + + +def _snippet_matches_any_affected_path(snippet_path: str, affected_files: list[str]) -> bool: + if not affected_files: + return False + norm_snip = _normalize_snippet_path(snippet_path) + snip_base = Path(snippet_path).name.lower() + for af in affected_files: + norm_af = _normalize_snippet_path(af) + if norm_snip == norm_af: + return True + if snip_base and snip_base == Path(af).name.lower(): + return True + if norm_snip.endswith(norm_af) or norm_af.endswith(norm_snip): + return True + return False + + +def _rank_patch_snippets_for_relevance( + snippets: list[CodeSnippet], + affected_files: list[str], +) -> list[CodeSnippet]: + """Paths matching affected_files first; preserve original order within each bucket.""" + if not affected_files: + return list(snippets) + indexed = list(enumerate(snippets)) + indexed.sort( + key=lambda pair: ( + 0 if _snippet_matches_any_affected_path(pair[1].file_path, affected_files) else 1, + pair[0], + ), + ) + return [s for _, s in indexed] + + +def _cap_snippets_by_type( + snippets: list[CodeSnippet], + *, + max_vulnerable: int = MAX_REPORT_CODE_SNIPPETS_VULNERABLE, + max_fix: int = MAX_REPORT_CODE_SNIPPETS_FIX, +) -> list[CodeSnippet]: + """Keep insertion order; at most max_vulnerable vulnerable and max_fix fix snippets.""" + n_vuln = n_fix = 0 + out: list[CodeSnippet] = [] + for s in snippets: + if s.snippet_type == "vulnerable": + if n_vuln >= max_vulnerable: + continue + n_vuln += 1 + out.append(s) + elif s.snippet_type == "fix": + if n_fix >= max_fix: + continue + n_fix += 1 + out.append(s) + else: + out.append(s) + return out + + +def _extract_downstream_patch_code_snippets( + downstream_report: DownstreamSearchReport | None, +) -> list[CodeSnippet]: + """Extract vulnerable/fix snippets from the downstream parsed patch only.""" + if not downstream_report or not downstream_report.parsed_patch: + return [] + snippets: list[CodeSnippet] = [] + for pf in downstream_report.parsed_patch.files: + for hunk in pf.hunks: + if hunk.removed_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.source_start, + code="\n".join(hunk.removed_lines[:10]), + snippet_type="vulnerable", + source="downstream_patch", + )) + if hunk.added_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.target_start, + code="\n".join(hunk.added_lines[:10]), + snippet_type="fix", + source="downstream_patch", + )) + return snippets + + +def _extract_code_snippets( + downstream_report: DownstreamSearchReport | None, + upstream_report: UpstreamSearchReport | None, +) -> list[CodeSnippet]: + """Extract code snippets from parsed patches.""" + snippets: list[CodeSnippet] = _extract_downstream_patch_code_snippets(downstream_report) + + if upstream_report and upstream_report.fixed_parsed_patch: + for pf in upstream_report.fixed_parsed_patch.files: + for hunk in pf.hunks: + if hunk.removed_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.source_start, + code="\n".join(hunk.removed_lines[:10]), + snippet_type="vulnerable", + source="upstream_patch", + )) + if hunk.added_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.target_start, + code="\n".join(hunk.added_lines[:10]), + snippet_type="fix", + source="upstream_patch", + )) + + return snippets + async def generate_code_agent_report( *, @@ -656,14 +1009,15 @@ async def generate_code_agent_report( vuln_id: str, target_package: str, descriptions: list[tuple[str, str]], - identify_report: IdentifyReport | None, - locate_report: LocateReport | None, - verify_report: VerifyReport | None, + downstream_report: DownstreamSearchReport | None, + upstream_report: UpstreamSearchReport | None, + l1_agent_answer: str | None, tracer, + policy_context: str = "", ) -> CodeAgentReport: """Generate the final L1 Code Agent investigation report. - Synthesizes results from all three phases (Identify, Locate, Verify) + Synthesizes results from downstream search, upstream search, and L1 agent analysis into a comprehensive, auditable report with a clear verdict. Parameters @@ -676,14 +1030,16 @@ async def generate_code_agent_report( Name of the package being investigated. descriptions: ``(source_name, text)`` pairs from CVE intel. - identify_report: - Output of the Identify phase (may be None). - locate_report: - Output of the Locate phase (may be None). - verify_report: - Output of the Verify phase (may be None). + downstream_report: + Output of downstream search (may be None). + upstream_report: + Output of upstream search (may be None). + l1_agent_answer: + Final answer from the L1 ReAct agent (may be None). tracer: Request-scoped tracing context. + policy_context: + Pre-formatted NVR posture and RHSA excerpt context for the LLM prompt. Returns ------- @@ -694,17 +1050,27 @@ async def generate_code_agent_report( cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) - identify_section = _format_identify_for_report(identify_report) - locate_section = _format_locate_for_report(locate_report) - verify_section = _format_verify_for_report(verify_report) + downstream_section = _format_downstream_for_report(downstream_report) + upstream_section = _format_upstream_for_report(upstream_report) + l1_agent_section = l1_agent_answer or "L1 agent did not produce a final answer." + + if policy_context: + policy_context_section = ( + "\n" + + policy_context + + "\n\n" + ) + else: + policy_context_section = "" prompt_text = CODE_AGENT_REPORT_PROMPT.format( vuln_id=vuln_id, target_package=target_package, cve_description=cve_description, - identify_section=identify_section, - locate_section=locate_section, - verify_section=verify_section, + policy_context_section=policy_context_section, + downstream_section=downstream_section, + upstream_section=upstream_section, + l1_agent_section=l1_agent_section, ) report_llm = llm.with_structured_output(CodeAgentReport) @@ -714,9 +1080,9 @@ async def generate_code_agent_report( input_data={ "vuln_id": vuln_id, "target_package": target_package, - "identify_sufficient": identify_report.is_sufficient if identify_report else False, - "locate_sufficient": locate_report.is_sufficient if locate_report else False, - "verify_verdict": verify_report.verdict if verify_report else None, + "has_downstream_patch": downstream_report.is_patch_file_available if downstream_report else False, + "has_upstream_patch": upstream_report.is_fixed_srpm_is_needed if upstream_report else False, + "has_l1_answer": l1_agent_answer is not None, }, ) as span: messages = [ @@ -724,11 +1090,26 @@ async def generate_code_agent_report( HumanMessage(content="Generate the report."), ] report: CodeAgentReport = await report_llm.ainvoke(messages) + + snippet_source = "unchanged" + downstream_patch_snippet_count_pre_cap = 0 + if downstream_report and downstream_report.parsed_patch: + raw = _extract_downstream_patch_code_snippets(downstream_report) + downstream_patch_snippet_count_pre_cap = len(raw) + ranked = _rank_patch_snippets_for_relevance(raw, report.affected_files) + report.code_snippets = _cap_snippets_by_type(ranked) + snippet_source = "downstream_patch" + elif not report.code_snippets: + report.code_snippets = _extract_code_snippets(downstream_report, upstream_report) + span.set_output({ "confidence": report.confidence, "justification_label": report.justification_label, "affected_files_count": len(report.affected_files), "limitations_count": len(report.limitations), + "code_snippets_count": len(report.code_snippets), + "snippet_source": snippet_source, + "downstream_patch_snippet_count_pre_cap": downstream_patch_snippet_count_pre_cap, }) logger.info( @@ -1652,12 +2033,28 @@ async def downstream_search_preprocss( if not spec_path: report.is_patch_in_spec_file = False else: - grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) - if grep_spec_matches: - report.is_patch_in_spec_file = True - report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches) - else: - report.is_patch_in_spec_file = False + cve_c = re.compile(cve_pattern, re.IGNORECASE) + for _idx, fname, raw_line in _parse_spec_patch_directives(inspector, spec_path): + if cve_c.search(fname): + report.spec_patch_directives_for_cve.append(raw_line) + chlog = _extract_spec_changelog(inspector, spec_path) + if chlog: + cve_in_cl = [ln for ln in chlog.splitlines() if cve_c.search(ln)] + report.spec_changelog_cve_lines = "\n".join(cve_in_cl) + grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) + if grep_spec_matches: + report.is_patch_in_spec_file = True + report.spec_file_log_change = "\n".join(m.line_content for m in grep_spec_matches) + else: + report.is_patch_in_spec_file = False + + # Extract Source0: and Version: lines for delivery-model context + source0_matches = inspector.grep_content(r"^Source0:", spec_path) + if source0_matches: + report.spec_source0_line = source0_matches[0].line_content.strip() + version_matches = inspector.grep_content(r"^Version:", spec_path) + if version_matches: + report.spec_version_line = version_matches[0].line_content.strip() with tracer.push_active_function( "Is_patch_applied_in_build", input_data={"patch_file_name": patch_file.name} diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 6cfd3e541..787c50fec 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -114,6 +114,85 @@ def _build_tool_strategy(tool_names: list[str]) -> str: return "\n".join(strategies) if strategies else "Use available tools to search for vulnerable and fixed code patterns." +# --------------------------------------------------------------------------- +# Policy context formatting for L1 reports (Feedback-2 gap coverage) +# --------------------------------------------------------------------------- + +_POLICY_MAX_RPM_LIST_ITEMS = 5 +_POLICY_RHSA_STATEMENT_CAP = 400 +_POLICY_MAX_PACKAGE_STATE_ITEMS = 8 + + +def _format_policy_context_for_l1_report( + *, + target_nvr: str, + identify_result, + intel, +) -> str: + """Build a context block for the LLM prompt covering NVR posture and RHSA excerpts. + + Returns an empty string if no meaningful context is available. + """ + lines: list[str] = [] + + # 1. Scanned target NVR + if target_nvr: + lines.append(f"**Scanned target NVR:** `{target_nvr}`") + + # 2. PackageIdentifyResult: affected/fixed lists + if identify_result: + affected = identify_result.affected_rpm_list or [] + fixed = identify_result.fixed_rpm_list or [] + + if affected: + shown = affected[:_POLICY_MAX_RPM_LIST_ITEMS] + suffix = f" (+ {len(affected) - len(shown)} more)" if len(affected) > len(shown) else "" + lines.append(f"**Affected NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}") + lines.append(f" - is_target_package_affected: `{identify_result.is_target_package_affected.value}`") + + if fixed: + shown = fixed[:_POLICY_MAX_RPM_LIST_ITEMS] + suffix = f" (+ {len(fixed) - len(shown)} more)" if len(fixed) > len(shown) else "" + lines.append(f"**Fixed NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}") + lines.append(f" - is_target_package_fixed: `{identify_result.is_target_package_fixed.value}`") + + # 3. RHSA excerpts (if present) + rhsa = None + if intel and len(intel) > 0: + rhsa = intel[0].rhsa + + if rhsa: + # Statement excerpt + if rhsa.statement: + stmt = rhsa.statement + if len(stmt) > _POLICY_RHSA_STATEMENT_CAP: + stmt = stmt[:_POLICY_RHSA_STATEMENT_CAP] + " …" + lines.append(f"**RHSA statement excerpt:** {stmt}") + + # Upstream fix + if rhsa.upstream_fix: + lines.append(f"**RHSA upstream_fix:** `{rhsa.upstream_fix}`") + + # Package state (compact table-like bullets) + pkg_states = rhsa.package_state or [] + if pkg_states: + lines.append("**RHSA package_state:**") + for ps in pkg_states[:_POLICY_MAX_PACKAGE_STATE_ITEMS]: + parts = [] + if ps.product_name: + parts.append(ps.product_name) + if ps.package_name: + parts.append(f"pkg={ps.package_name}") + if ps.fix_state: + parts.append(f"fix_state={ps.fix_state}") + if parts: + lines.append(f" - {' | '.join(parts)}") + if len(pkg_states) > _POLICY_MAX_PACKAGE_STATE_ITEMS: + lines.append(f" - (+ {len(pkg_states) - _POLICY_MAX_PACKAGE_STATE_ITEMS} more)") + + return "\n".join(lines) + + class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent"): """ Level 1 Package Code Agent. Investigates each CVE using extracted source @@ -425,7 +504,7 @@ def _build_analysis( result: dict, code_agent_report: CodeAgentReport | None = None, ) -> list[AgentMorpheusEngineOutput]: - verify_report: VerifyReport | None = result.get("verify_report") + downstream_report: DownstreamSearchReport | None = result.get("downstream_report") if code_agent_report is not None: label = code_agent_report.justification_label @@ -437,32 +516,41 @@ def _build_analysis( reason_parts.extend(f"- {ev}" for ev in code_agent_report.evidence_chain[:5]) if code_agent_report.patch_analysis: reason_parts.append(f"\n\nPatch analysis: {code_agent_report.patch_analysis}") + if code_agent_report.code_snippets: + reason_parts.append("\n\nCode snippets:") + for snippet in code_agent_report.code_snippets[:3]: + reason_parts.append(f"- [{snippet.snippet_type}] {snippet.file_path}:{snippet.line_number or 'N/A'}") if code_agent_report.limitations: reason_parts.append("\n\nLimitations:") reason_parts.extend(f"- {lim}" for lim in code_agent_report.limitations) reason_parts.append(f"\n\nRecommendation: {code_agent_report.recommendation}") reason = "\n".join(reason_parts) summary = code_agent_report.executive_summary - elif verify_report is not None: - label, status = _VERDICT_TO_JUSTIFICATION.get( - verify_report.verdict, ("uncertain", "UNKNOWN"), - ) - patch_snippets: list[str] = [] - for ev in verify_report.evidence: - for pd in ev.patch_details: - patch_snippets.append( - f"[{pd.patch_filename}] {pd.file_path}:{pd.line_number}\n" - f" fix: {pd.fix_code[:200]}" - ) - reason = verify_report.summary - if patch_snippets: - reason += "\n\nDownstream patch details:\n" + "\n".join(patch_snippets[:5]) - summary = verify_report.summary + elif downstream_report is not None: + if downstream_report.is_patch_file_available and downstream_report.is_patch_applied_in_build: + label = "protected_by_mitigating_control" + status = "FALSE" + summary = f"CVE patch {downstream_report.patch_file_name} found and applied in build." + elif downstream_report.is_patch_file_available: + label = "protected_by_mitigating_control" + status = "FALSE" + summary = f"CVE patch {downstream_report.patch_file_name} found in package." + else: + label = "uncertain" + status = "UNKNOWN" + summary = "No CVE-specific patch file found." + + reason_parts = [summary] + if downstream_report.spec_file_log_change: + reason_parts.append(f"\n\nSpec changelog: {downstream_report.spec_file_log_change[:200]}") + if downstream_report.build_log_patch_applied: + reason_parts.append(f"\n\nBuild log: {downstream_report.build_log_patch_applied[:200]}") + reason = "\n".join(reason_parts) else: - label = "code_not_present" + label = "uncertain" status = "UNKNOWN" - reason = "Verify phase did not produce a report." - summary = "No verify report available." + reason = "No downstream search results available." + summary = "Investigation incomplete." last_msg = result["messages"][-1].content if result.get("messages") else "no result" @@ -528,25 +616,40 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: descriptions.append(("ubuntu", a_intel.ubuntu.description)) llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + final_answer = None + thought = result.get("thought") + if thought and thought.mode == "finish": + final_answer = thought.final_answer + + # Build policy context for Feedback-2 gap coverage (NVR posture, RHSA excerpts) + ctx = message.info.checker_context + version = (target_package.version or "") if target_package else "" + release = (target_package.release or "") if target_package else "" + target_nvr = f"{target_package_name}-{version}-{release}" if target_package_name else "" + policy_context = _format_policy_context_for_l1_report( + target_nvr=target_nvr, + identify_result=ctx.identify_result if ctx else None, + intel=intel, + ) + code_agent_report: CodeAgentReport = await generate_code_agent_report( llm=llm, vuln_id=vuln_id, target_package=target_package_name, descriptions=descriptions, - identify_report=result.get("identify_report"), - locate_report=result.get("locate_report"), - verify_report=result.get("verify_report"), + downstream_report=result.get("downstream_report"), + upstream_report=result.get("upstream_report"), + l1_agent_answer=final_answer, tracer=tracer, + policy_context=policy_context, ) # Write markdown report for debug/dev - ctx = message.info.checker_context source_key = ctx.source_key report_dir = Path(config.base_checker_dir) / source_key / "report" report_dir.mkdir(parents=True, exist_ok=True) - # Build filename: L1_report_CVE-XXXX-package-version-release.md - version = (target_package.version or "") if target_package else "" - release = (target_package.release or "") if target_package else "" + # Build filename: L1_report_CVE-XXXX-package-version-release.md (version/release already extracted above) suffix = f"-{target_package_name}" if target_package_name else "" if version: suffix += f"-{version}" @@ -558,6 +661,8 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: target_package=target_package_name, version=version, release=release, + downstream_report=result.get("downstream_report"), + policy_context=policy_context, )) logger.info("package_code_agent: wrote report to %s", report_path) From f38d5c88659be38cc5ff65fd34d69d590dad8214 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 3 May 2026 11:42:46 +0300 Subject: [PATCH 18/46] before report change --- .../functions/code_agent_graph_defs.py | 59 ++++++++++++++++++- .../functions/cve_package_code_agent.py | 36 ++++++++--- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index a75c9637e..eebd562c3 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -2016,7 +2016,6 @@ async def downstream_search_preprocss( else: report.is_patch_file_available = False report.is_patch_in_spec_file = False - report.is_patch_in_spec_file = False return report if not patch_file: @@ -2184,6 +2183,37 @@ async def upstream_search_preprocess( "- State confidence level based on evidence quality." ) +L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH = ( + "You are a security analyst verifying that a package is VULNERABLE to a CVE.\n" + "The TARGET package does NOT contain a CVE-specific patch file.\n" + "However, you have access to the patch from a FIXED RPM version (upstream/patched build).\n\n" + "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" + "- VULNERABLE code (- lines that were removed in the fix)\n" + "- FIX code (+ lines that were added in the fix)\n\n" + "YOUR TASK: Verify the TARGET package contains the vulnerable code and LACKS the fix.\n\n" + "VERIFICATION STRATEGY:\n" + "1. FIRST search for the VULNERABLE code pattern (removed lines from the patch).\n" + " - Use function names, variable names, or unique code snippets from the '- lines'.\n" + " - The vulnerable code SHOULD exist in the target package.\n" + "2. If vulnerable code is found, search for the FIX code pattern (added lines).\n" + " - The fix code should NOT exist in the target package.\n" + "3. CONCLUSION:\n" + " - If vulnerable code EXISTS and fix is ABSENT → Package is VULNERABLE.\n" + " - If fix code IS found → Package may be patched via rebase (investigate further).\n" + " - If neither is found → Use file paths from patch to locate relevant code.\n\n" + "CRITICAL RULES:\n" + "- The patch is from a FIXED version - expect the target to have vulnerable code.\n" + "- Use file paths and function names from the patch to locate code.\n" + "- Search for distinctive code patterns, not generic keywords.\n" + "- Base conclusions ONLY on tool results, not assumptions.\n\n" + "ANSWER QUALITY:\n" + "- Cite specific file paths and line numbers from tool results.\n" + "- Quote the actual code found, not just describe it.\n" + "- Compare found code against both vulnerable and fix patterns from the patch.\n" + "- Clearly state whether vulnerable code exists and whether fix is absent.\n" + "- State confidence level based on evidence quality." +) + L1_AGENT_PROMPT_TEMPLATE = """{sys_prompt} @@ -2237,6 +2267,33 @@ async def upstream_search_preprocess( {{"thought": "Evidence gathered", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} """ +L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS = """ +1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +5. Code Keyword Search: use query field for broader searches. +6. Do NOT call the same tool with the same input twice. +7. FIRST search for VULNERABLE code (removed lines) - it SHOULD exist in target. +8. THEN search for FIX code (added lines) - it should NOT exist in target. +9. If a pattern contains special regex characters, escape them or use literal substrings. + + +{{"thought": "Search for the vulnerable code pattern from the patch to confirm it exists in target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code that should exist in unpatched target"}}, "final_answer": null}} + + +{{"thought": "Found vulnerable code. Now verify the fix is NOT present in target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if fix code is absent (confirms vulnerability)"}}, "final_answer": null}} + + +{{"thought": "Fix pattern not found. Search for file from patch to verify code context", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Verify we are looking at the correct file"}}, "final_answer": null}} + + +{{"thought": "Evidence confirms vulnerability: found vulnerable code, fix is absent", "mode": "finish", "actions": null, "final_answer": "The package is VULNERABLE. Found vulnerable code pattern at [file:line]: [quote code]. The fix from the patched version is NOT present - searched for [fix pattern] with no matches. The target package lacks the security fix."}} + + +{{"thought": "Unexpected: fix code found despite no CVE patch file", "mode": "finish", "actions": null, "final_answer": "The package appears PATCHED via rebase. Found fix code at [file:line]: [quote code]. Although no CVE-specific patch exists, the fix may have been included via upstream version update."}} +""" + # --------------------------------------------------------------------------- # L1 Agent Helper Functions diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 787c50fec..8a0fa06c1 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -42,20 +42,16 @@ from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, CodeAgentReport, - IdentifyReport, - LocateReport, - VerifyReport, DownstreamSearchReport, UpstreamSearchReport, - build_identify_subgraph, - build_locate_pipeline, - build_verify_pipeline, generate_code_agent_report, downstream_search_preprocss, upstream_search_preprocess, L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, + L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, L1_AGENT_PROMPT_TEMPLATE, L1_AGENT_THOUGHT_INSTRUCTIONS, + L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, format_patch_files_summary, format_patch_hunks_summary, ) @@ -309,6 +305,33 @@ async def L1_agent(state: CodeAgentState) -> dict: "patch_filename": downstream_report.patch_file_name, "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) + elif upstream_report and upstream_report.fixed_parsed_patch: + parsed_patch = upstream_report.fixed_parsed_patch + + patch_files_summary = format_patch_files_summary(parsed_patch) + patch_hunks_summary = format_patch_hunks_summary(parsed_patch) + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + tools_str = "\n".join(tool_descriptions_list) + tool_strategy = _build_tool_strategy(enabled_tool_names) + + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( + sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + patch_filename=upstream_report.fixed_srpm_file_name, + patch_files_summary=patch_files_summary, + patch_hunks_summary=patch_hunks_summary, + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, + ) + + span.set_output({ + "mode": "upstream_patch_verification", + "patch_filename": upstream_report.fixed_srpm_file_name, + "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, + }) else: cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) runtime_prompt = ( @@ -477,7 +500,6 @@ async def should_continue(state: CodeAgentState) -> str: app = flow.compile() - #app.get_graph().draw_mermaid_png(output_file_path="code_agent_flow.png") return app From 96cf96e95bf0a411e77bd0fe46da38d8a71bf88d Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 3 May 2026 12:22:28 +0300 Subject: [PATCH 19/46] fix report --- .../functions/code_agent_graph_defs.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index eebd562c3..408b814b7 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -948,7 +948,11 @@ def _cap_snippets_by_type( def _extract_downstream_patch_code_snippets( downstream_report: DownstreamSearchReport | None, ) -> list[CodeSnippet]: - """Extract vulnerable/fix snippets from the downstream parsed patch only.""" + """Extract vulnerable/fix snippets from the downstream parsed patch only. + + For purely additive patches (no removed lines), shows context lines + as "vulnerable" since they represent the code lacking the fix. + """ if not downstream_report or not downstream_report.parsed_patch: return [] snippets: list[CodeSnippet] = [] @@ -962,6 +966,14 @@ def _extract_downstream_patch_code_snippets( snippet_type="vulnerable", source="downstream_patch", )) + elif hunk.context_lines and hunk.added_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.source_start, + code="\n".join(hunk.context_lines[:10]), + snippet_type="vulnerable", + source="downstream_patch", + )) if hunk.added_lines: snippets.append(CodeSnippet( file_path=pf.target_path.lstrip("ab/"), @@ -977,7 +989,11 @@ def _extract_code_snippets( downstream_report: DownstreamSearchReport | None, upstream_report: UpstreamSearchReport | None, ) -> list[CodeSnippet]: - """Extract code snippets from parsed patches.""" + """Extract code snippets from parsed patches. + + For purely additive patches (no removed lines), shows context lines + as "vulnerable" since they represent the code lacking the fix. + """ snippets: list[CodeSnippet] = _extract_downstream_patch_code_snippets(downstream_report) if upstream_report and upstream_report.fixed_parsed_patch: @@ -991,6 +1007,14 @@ def _extract_code_snippets( snippet_type="vulnerable", source="upstream_patch", )) + elif hunk.context_lines and hunk.added_lines: + snippets.append(CodeSnippet( + file_path=pf.target_path.lstrip("ab/"), + line_number=hunk.source_start, + code="\n".join(hunk.context_lines[:10]), + snippet_type="vulnerable", + source="upstream_patch", + )) if hunk.added_lines: snippets.append(CodeSnippet( file_path=pf.target_path.lstrip("ab/"), @@ -1099,6 +1123,11 @@ async def generate_code_agent_report( ranked = _rank_patch_snippets_for_relevance(raw, report.affected_files) report.code_snippets = _cap_snippets_by_type(ranked) snippet_source = "downstream_patch" + elif upstream_report and upstream_report.fixed_parsed_patch: + raw = _extract_code_snippets(downstream_report, upstream_report) + ranked = _rank_patch_snippets_for_relevance(raw, report.affected_files) + report.code_snippets = _cap_snippets_by_type(ranked) + snippet_source = "upstream_patch" elif not report.code_snippets: report.code_snippets = _extract_code_snippets(downstream_report, upstream_report) From 18e46672ed08f7681ca70f2d18a6b551cc12f981 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 3 May 2026 14:09:38 +0000 Subject: [PATCH 20/46] cleanup and fix bug --- .../functions/code_agent_graph_defs.py | 1192 +---------------- .../functions/cve_package_code_agent.py | 36 +- src/vuln_analysis/tools/source_inspector.py | 19 +- 3 files changed, 115 insertions(+), 1132 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 408b814b7..fed06ed8f 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -14,10 +14,11 @@ # limitations under the License. """ -Graph definitions for the L1 Package Code Agent (Identify -> Locate -> Verify). +Graph definitions for the L1 Package Code Agent. -Houses the LangGraph state schema, structured-output schemas, the -``build_identify_subgraph`` pipeline, and per-node prompt templates. +Houses the LangGraph state schema, structured-output schemas for +DownstreamSearchReport/UpstreamSearchReport pipelines, CodeAgentReport, +and L1 agent prompt templates. """ from __future__ import annotations @@ -46,17 +47,14 @@ class CodeAgentState(MessagesState): - """LangGraph state for the Identify -> Locate -> Verify graph.""" - identify_report: NotRequired[IdentifyReport | None] - locate_report: NotRequired[LocateReport | None] - verify_report: NotRequired[VerifyReport | None] + """LangGraph state for the L1 Code Agent (DownstreamSearch -> UpstreamSearch).""" downstream_report: NotRequired[DownstreamSearchReport | None] upstream_report: NotRequired[UpstreamSearchReport | None] runtime_prompt: NotRequired[str | None] last_thought: NotRequired[CheckerThought | None] - step: int = Field(default=0, description="The current step number") - max_steps: int = Field(default=10, description="The maximum number of steps") - output: str = Field(default="", description="The output of the last step") + step: NotRequired[int] + max_steps: NotRequired[int] + output: NotRequired[str] thought: NotRequired[CheckerThought | None] @@ -98,7 +96,6 @@ class UpstreamSearchReport(BaseModel): is_fixed_srpm_is_needed: bool = Field(default=False, description="True if a fixed SRPM is needed downstream style patch files") fixed_srpm_file_name: str = Field(default="", description="The name of the fixed SRPM file") fixed_parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed fixed SRPM patch file") - vulnerable_locations: list[FileLocation] = Field(default_factory=list) reason_cve_code: str = Field( default="", description="Does the CVE description match the code which is vulnerable", @@ -132,113 +129,8 @@ class ReflectionBase(BaseModel): description="True if results are good enough to proceed.") -class IdentifyKeywords(BaseModel): - """Generator output: candidate keywords extracted from one intel description.""" - keywords: list[str] = Field( - description=( - "Keywords to search for the vulnerability: " - "function names, symbols, file patterns, etc." - )) - reasoning: str = Field( - description=( - "Why these keywords were chosen, or " - "'no new keywords found' if prior report covered everything." - )) - - -class Keyword(BaseModel): - """Single keyword with reflector-assigned classification and confidence.""" - term: str - keyword_type: Literal["function", "variable", "file", "symbol", "concept"] - confidence: float = Field(default=0.0, ge=0.0, le=1.0) - file_paths: list[str] = Field( - default_factory=list, - description="Source files where this keyword was found during Identify", - ) - hit_content: list[str] = Field( - default_factory=list, - description="Snippet content from Tantivy hits (parallel to file_paths)", - ) - - -class KeywordJudgment(BaseModel): - """LLM output for judging a single keyword against its search results.""" - term: str - keyword_type: Literal["function", "variable", "file", "symbol", "concept"] - confidence: float = Field(default=0.0, ge=0.0, le=1.0) - approved: bool = Field(description="True if search results show code related to the vulnerability") - reasoning: str = Field(description="Brief explanation of the decision") - file_paths: list[str] = Field( - default_factory=list, - description="Source files where this keyword was found during Identify",) - -class KeywordHit(BaseModel): - judgment: KeywordJudgment - - hit_content: list[str] = Field( - default_factory=list, - description="Snippet content from Tantivy hits (parallel to file_paths)", - ) - - -class IdentifyReport(ReflectionBase): - """Aggregated report for the Identify phase.""" - approved: list[Keyword] = Field( - description="Keywords that found relevant hits in the source code.") - rejected: list[Keyword] = Field( - description="Keywords with no hits or irrelevant results.") - - # --------------------------------------------------------------------------- -# Locate schemas -# --------------------------------------------------------------------------- - - -class DiffHunk(BaseModel): - """A single hunk from a unified diff between target and patched source trees.""" - file_path: str - source_start_line: int = Field(description="Start line in the target (investigated) source") - source_end_line: int = Field(description="End line in the target (investigated) source") - patch_start_line: int = Field(description="Start line in the patched (fixed) source") - patch_end_line: int = Field(description="End line in the patched (fixed) source") - content: str = Field(description="Raw diff hunk text including +/- lines") - - -class FileLocation(BaseModel): - """A source file location where vulnerable code was found.""" - file_path: str - line_number: int | None = None - snippet: str = Field(description="Code context around the match") - matched_keywords: list[str] - source: Literal["diff", "tantivy","grep"] = Field( - description="How this location was discovered") - - -class LocationJudgment(BaseModel): - """LLM output: judge whether a located code region is vulnerability-relevant.""" - file_path: str - relevant: bool = Field( - description="True if this location contains vulnerability-relevant code") - confidence: float = Field(default=0.0, ge=0.0, le=1.0) - reasoning: str = Field(description="Brief explanation of the decision") - - -class LocateReport(ReflectionBase): - """Locate phase report (upstream search: diffs target against fixed SRPM).""" - locations: list[FileLocation] = Field( - description="Source locations where vulnerable code was found.") - diff_available: bool = Field( - description="Whether a diff against the fixed SRPM was available for location.") - evidence: str = Field( - description="Human-readable summary for justification.") - evidence_level: Literal["high", "medium", "low"] = Field( - default="medium", - description="high=diff+LLM confirmed, medium=keyword hits only, low=no locations", - ) - - -# --------------------------------------------------------------------------- -# Verify schemas +# Patch schemas (used by DownstreamSearchReport and UpstreamSearchReport) # --------------------------------------------------------------------------- @@ -268,48 +160,6 @@ class ParsedPatch(BaseModel): files: list[PatchFile] -class DownstreamPatchDetail(BaseModel): - """Extracted from a .patch file: shows vulnerable code and the fix.""" - patch_filename: str - file_path: str - vulnerable_code: str = Field( - description="Context lines (no +/- prefix) around the change") - fix_code: str = Field( - description="Added lines (+ prefix stripped)") - line_number: int | None = None - - -class PatchEvidence(BaseModel): - """Evidence of a downstream patch for the CVE.""" - source: Literal[ - "spec_patch_directive", "spec_changelog", "patch_file_content", "build_log" - ] - detail: str - confidence: float = Field(ge=0.0, le=1.0) - patch_details: list[DownstreamPatchDetail] = Field(default_factory=list) - - -class ChangelogJudgment(BaseModel): - """LLM output: judge whether changelog entries describe a fix for the CVE.""" - relevant: bool - confidence: float = Field(ge=0.0, le=1.0) - reasoning: str - - -class PatchContentJudgment(BaseModel): - """LLM output: judge whether patch content fixes the CVE.""" - relevant: bool - confidence: float = Field(ge=0.0, le=1.0) - reasoning: str - - -class VerifyReport(ReflectionBase): - """Verify phase report (downstream search: checks this build's patches and changelog).""" - verdict: Literal["PATCHED", "VULNERABLE", "INCONCLUSIVE"] - evidence: list[PatchEvidence] - summary: str - - # --------------------------------------------------------------------------- # Code Agent Report schema # --------------------------------------------------------------------------- @@ -430,88 +280,6 @@ def to_markdown( # Prompt templates # --------------------------------------------------------------------------- -IDENTIFY_SYSTEM_PROMPT = ( - "You are a vulnerability analyst performing the **Identify** phase.\n\n" - "You will receive CVE intelligence (descriptions, advisories, affected packages) " - "in the KNOWLEDGE block. Your task is to extract **search keywords** that can be " - "used to locate the vulnerable code in a source tree.\n\n" - "KEYWORD EXTRACTION RULES:\n" - "1. Extract specific function names, method names, and symbols mentioned in the CVE description.\n" - "2. Extract package/module names that contain the vulnerable code.\n" - "3. Extract file name patterns if mentioned (e.g. 'parse.c', 'auth.go').\n" - "4. Prefer exact identifiers over generic terms. 'PQescapeLiteral' is a good keyword; " - "'SQL injection' is not.\n" - "5. If the description mentions a specific API, endpoint, or configuration key, include it.\n" - "6. Include both the short function name and the fully-qualified name if available " - "(e.g. both 'Parse' and 'encoding/xml.Decoder.Parse').\n" - "7. Do NOT include CVE IDs, GHSA IDs, or version numbers as keywords.\n" - "8. Do NOT include the TARGET PACKAGE name as a keyword - you are already searching within " - "its source tree. Generic package names produce too many matches.\n\n" - "Output a structured list of keywords with reasoning." -) - -IDENTIFY_REFLECT_PROMPT = ( - "You are reviewing search results for a single keyword from the **Identify** phase.\n\n" - "You will receive:\n" - "1. The CVE description for context\n" - "2. A keyword extracted from that CVE description\n" - "3. Lexical search results from the source code for that keyword\n\n" - "Classify the keyword:\n" - "- keyword_type: one of function, variable, file, symbol, concept\n" - "- confidence: 0.0-1.0 how confident this keyword points to vulnerable code\n" - "- approved: True if the search results show code related to the vulnerability\n" - "- reasoning: brief explanation" -) - -LOCATE_SYSTEM_PROMPT = ( - "You are a vulnerability analyst performing the **Locate** phase.\n\n" - "You will receive a code region (either a diff hunk from the patched RPM " - "or a search result from the source code index) along with the CVE context " - "and keywords from the Identify phase.\n\n" - "Your task is to judge whether this code region is **relevant to the " - "vulnerability**:\n" - "- relevant: True if the code change or code region relates to fixing or " - "containing the vulnerability described in the CVE.\n" - "- confidence: 0.0-1.0 how confident you are in the judgment.\n" - "- reasoning: brief explanation of why this region is or is not relevant.\n\n" - "Focus on: function names, variable names, control flow changes, " - "added bounds checks, input validation, or security-relevant API calls " - "that match the CVE description." -) - -VERIFY_CHANGELOG_PROMPT = ( - "You are a vulnerability analyst performing the **Verify** phase.\n\n" - "You will receive:\n" - "1. A CVE ID and its description.\n" - "2. Recent changelog entries from an RPM .spec file.\n\n" - "Your task is to determine whether any changelog entry describes a fix " - "or backport for this specific CVE. Entries may reference CVEs explicitly " - "(e.g. 'Fixes: CVE-2026-5121') or describe the fix without naming the CVE " - "(e.g. 'add bounds check in iso9660 ZER header parsing').\n\n" - "RULES:\n" - "- An explicit CVE ID mention is strong evidence (confidence >= 0.9).\n" - "- A description that matches the vulnerability semantics without naming " - "the CVE is weaker evidence (confidence 0.5-0.8).\n" - "- Unrelated entries should be marked as not relevant.\n\n" - "Output a structured judgment." -) - -VERIFY_PATCH_CONTENT_PROMPT = ( - "You are a vulnerability analyst performing the **Verify** phase.\n\n" - "You will receive:\n" - "1. A CVE ID and its description.\n" - "2. Patch content (diff hunks) from a patch file.\n\n" - "Your task is to determine whether this patch fixes or mitigates the CVE.\n\n" - "RULES:\n" - "- Look for code changes that address the vulnerability (bounds checks, input validation, " - "null checks, API changes, etc.).\n" - "- If the patch modifies functions/files mentioned in the CVE description, confidence is higher.\n" - "- If the patch adds security-relevant logic matching the CVE fix pattern, confidence >= 0.8.\n" - "- If the patch touches related code but the fix is unclear, confidence 0.5-0.7.\n" - "- If the patch is unrelated to the vulnerability, mark as not relevant.\n\n" - "Output a structured judgment." -) - CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. @@ -694,109 +462,6 @@ def _format_extracted_facts_section( return lines -def _format_identify_for_report(report: IdentifyReport | None) -> str: - """Format Identify phase results for prompt injection.""" - if report is None: - return "Identify phase did not produce results." - - lines = [] - if report.approved: - lines.append(f"**Approved Keywords ({len(report.approved)}):**") - for kw in report.approved: - files_str = ", ".join(kw.file_paths[:3]) if kw.file_paths else "no files" - if len(kw.file_paths) > 3: - files_str += f" (+{len(kw.file_paths) - 3} more)" - lines.append( - f"- `{kw.term}` ({kw.keyword_type}, confidence={kw.confidence:.2f})" - ) - lines.append(f" Found in: {files_str}") - if kw.hit_content: - snippet = kw.hit_content[0][:MAX_SNIPPET_CHARS] - if len(kw.hit_content[0]) > MAX_SNIPPET_CHARS: - snippet += "..." - lines.append(f" Snippet: {snippet}") - else: - lines.append("**No approved keywords found.**") - - if report.rejected: - lines.append(f"\n**Rejected Keywords ({len(report.rejected)}):** " - + ", ".join(kw.term for kw in report.rejected[:5])) - if len(report.rejected) > 5: - lines.append(f" (+{len(report.rejected) - 5} more)") - - lines.append(f"\nPhase sufficient: {report.is_sufficient}") - if report.instructions: - lines.append(f"Instructions: {report.instructions}") - - return "\n".join(lines) - - -def _format_locate_for_report(report: LocateReport | None) -> str: - """Format Locate phase results for prompt injection.""" - if report is None: - return "Locate phase did not produce results." - - lines = [] - lines.append(f"**Evidence Level:** {report.evidence_level}") - lines.append(f"**Diff Available:** {report.diff_available}") - lines.append(f"**Evidence Summary:** {report.evidence}") - - if report.locations: - lines.append(f"\n**Located Code Regions ({len(report.locations)}):**") - for loc in report.locations[:10]: - line_info = f":{loc.line_number}" if loc.line_number else "" - lines.append(f"- `{loc.file_path}{line_info}` (source: {loc.source})") - lines.append(f" Keywords: {', '.join(loc.matched_keywords)}") - snippet = loc.snippet[:MAX_SNIPPET_CHARS] - if len(loc.snippet) > MAX_SNIPPET_CHARS: - snippet += "..." - lines.append(f" ```\n{snippet}\n ```") - if len(report.locations) > 10: - lines.append(f" (+{len(report.locations) - 10} more locations)") - else: - lines.append("\n**No vulnerability-relevant code locations found.**") - - lines.append(f"\nPhase sufficient: {report.is_sufficient}") - if report.instructions: - lines.append(f"Instructions: {report.instructions}") - - return "\n".join(lines) - - -def _format_verify_for_report(report: VerifyReport | None) -> str: - """Format Verify phase results for prompt injection.""" - if report is None: - return "Verify phase did not produce results." - - lines = [] - lines.append(f"**Verdict:** {report.verdict}") - lines.append(f"**Summary:** {report.summary}") - - if report.evidence: - lines.append(f"\n**Evidence Items ({len(report.evidence)}):**") - for ev in report.evidence: - lines.append(f"- Source: {ev.source} (confidence={ev.confidence:.2f})") - lines.append(f" Detail: {ev.detail}") - if ev.patch_details: - lines.append(f" Patch details ({len(ev.patch_details)} hunks):") - for pd in ev.patch_details[:3]: - lines.append(f" - {pd.patch_filename}: {pd.file_path}:{pd.line_number or 'N/A'}") - fix_snippet = pd.fix_code[:200] - if len(pd.fix_code) > 200: - fix_snippet += "..." - lines.append(f" Fix: {fix_snippet}") - if len(ev.patch_details) > 3: - lines.append(f" (+{len(ev.patch_details) - 3} more hunks)") - else: - lines.append("\n**No patch evidence found.**") - - lines.append(f"\nPhase sufficient: {report.is_sufficient}") - if report.instructions: - lines.append(f"Instructions: {report.instructions}") - - return "\n".join(lines) - - def _format_downstream_for_report(report: DownstreamSearchReport | None) -> str: """Format Downstream search results for prompt injection.""" if report is None: @@ -854,15 +519,6 @@ def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str: if report.spec_fixed_srpm_change: lines.append(f"**Rebase Changes:**\n```\n{report.spec_fixed_srpm_change[:500]}\n```") - if report.vulnerable_locations: - lines.append(f"\n**Vulnerable Locations ({len(report.vulnerable_locations)}):**") - for loc in report.vulnerable_locations[:5]: - line_info = f":{loc.line_number}" if loc.line_number else "" - lines.append(f"- `{loc.file_path}{line_info}`") - if loc.snippet: - snippet = loc.snippet[:200] + "..." if len(loc.snippet) > 200 else loc.snippet - lines.append(f" ```\n{snippet}\n ```") - if report.reason_code_fixed_by_rebase: lines.append(f"\n**Rebase Reasoning:** {report.reason_code_fixed_by_rebase}") @@ -1150,301 +806,10 @@ async def generate_code_agent_report( # --------------------------------------------------------------------------- -# Identify subgraph pipeline -# --------------------------------------------------------------------------- - - -async def build_identify_subgraph( - *, - llm, - descriptions: list[tuple[str, str]], - vuln_id: str, - package_name: str, - lexical_search_fn, - tracer, - identify_prompt: str = IDENTIFY_SYSTEM_PROMPT, - reflect_prompt: str = IDENTIFY_REFLECT_PROMPT, -) -> IdentifyReport: - """Extract keywords from CVE descriptions, search for each, and judge relevance. - - Orchestrates a linear pipeline (no LangGraph subgraph): - A) One LLM call per intel source to extract ``IdentifyKeywords``. - B) One lexical search + one LLM judge call per unique keyword. - C) Aggregate judgments into an ``IdentifyReport`` in Python. - - Parameters - ---------- - llm: - LangChain LLM used for both keyword extraction and per-keyword judgment. - descriptions: - ``(source_name, text)`` pairs built from CVE intel (e.g. GHSA, Ubuntu). - vuln_id: - CVE identifier, included in context blocks sent to the LLM. - package_name: - Target package name (e.g. 'libarchive'). Passed to the LLM to avoid - extracting it as a keyword since we are already searching within its source. - lexical_search_fn: - ``async (str) -> list[dict]`` -- calls the Tantivy index. - tracer: - Request-scoped ``nat.builder.context.Context`` for span creation. - identify_prompt: - System prompt for keyword extraction. - reflect_prompt: - System prompt for per-keyword judgment. - """ - MAX_CONTENT_CHARS = 500 - - # -- Step A: extract keywords per source ----------------------------------- - gen_llm = llm.with_structured_output(IdentifyKeywords) - all_keywords: list[str] = [] - all_reasoning: list[str] = [] - - for source_name, desc_text in descriptions: - context_block = f"CVE: {vuln_id}\nTARGET PACKAGE: {package_name}\nKNOWLEDGE:\n{desc_text}" - if all_keywords: - context_block += ( - "\n\nALREADY IDENTIFIED KEYWORDS (do not repeat): " - + ", ".join(all_keywords) - ) - messages = [ - SystemMessage(content=identify_prompt), - SystemMessage(content=context_block), - ] - with tracer.push_active_function( - f"identify_{source_name}", input_data={"source": source_name} - ) as span: - response: IdentifyKeywords = await gen_llm.ainvoke(messages) - span.set_output({"keywords": response.keywords, "reasoning": response.reasoning}) - logger.info("build_identify_subgraph [%s]: %s", source_name, response.model_dump_json()) - all_keywords.extend(response.keywords) - all_reasoning.append(f"[{source_name}] {response.reasoning}") - - seen: set[str] = set() - unique_keywords = [ - kw for kw in all_keywords - if not (kw.lower() in seen or seen.add(kw.lower())) - ] - - # -- Step B: per-keyword search + judge ------------------------------------ - judge_llm = llm.with_structured_output(KeywordJudgment) - judgments: list[KeywordHit] = [] - cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) - - for kw in unique_keywords: - with tracer.push_active_function(f"judge_{kw}", input_data={"keyword": kw}) as span: - try: - results_text = await lexical_search_fn(kw) - except Exception as e: - logger.warning("lexical search failed for '%s': %s", kw, e) - results_text = "No results found." - - if results_text is None: - results_text = "No results found." - - messages = [ - SystemMessage(content=reflect_prompt), - HumanMessage(content=( - f"CVE: {vuln_id}\n{cve_context}\n\n" - f"Keyword: {kw}\n\n" - f"Search results:\n{results_text}" - )), - ] - try: - judgment: KeywordJudgment = await judge_llm.ainvoke(messages) - except Exception as e: - logger.warning("LLM judgment failed for keyword '%s': %s", kw, e) - span.set_output({"error": str(e)}) - continue - - judgmentHit = KeywordHit(judgment=judgment, hit_content=[results_text]) - span.set_output({"approved": judgment.approved, "confidence": judgment.confidence}) - judgments.append(judgmentHit) - - # -- Step C: aggregate into IdentifyReport --------------------------------- - approved: list[Keyword] = [] - rejected: list[Keyword] = [] - for jh in judgments: - j = jh.judgment - kw_obj = Keyword( - term=j.term, keyword_type=j.keyword_type, - confidence=j.confidence, file_paths=j.file_paths, - hit_content=jh.hit_content, - ) - if j.approved: - approved.append(kw_obj) - else: - rejected.append(kw_obj) - - return IdentifyReport( - approved=approved, - rejected=rejected, - is_sufficient=len(approved) > 0, - instructions="" if approved else "No keywords matched vulnerable code. Consider broader terms.", - ) - - -# --------------------------------------------------------------------------- -# Locate helpers +# Diff and patch helpers # --------------------------------------------------------------------------- -def parse_unified_diff(diff_text: str, tracer) -> list[DiffHunk]: - """Parse unified diff text into structured DiffHunk objects using unidiff.""" - # Remove "\ No newline at end of file" markers that unidiff can't parse - cleaned_diff = "\n".join( - line for line in diff_text.splitlines() - if not line.startswith("\\ No newline at end of file") - ) - - try: - patch = PatchSet.from_string(cleaned_diff) - except Exception as e: - with tracer.push_active_function("parse_unified_diff_error", input_data={"error": str(e)}) as span: - span.set_output({"error": str(e), "error_type": type(e).__name__}) - logger.warning("Failed to parse diff: %s", e) - return [] - - hunks: list[DiffHunk] = [] - for patched_file in patch: - if patched_file.is_binary_file: - continue - if patched_file.is_added_file or patched_file.is_removed_file: - continue - if patched_file.added > 0 or patched_file.removed > 0: - for hunk in patched_file: - hunks.append(DiffHunk( - file_path=patched_file.path, - source_start_line=hunk.source_start, - source_end_line=hunk.source_start + hunk.source_length, - patch_start_line=hunk.target_start, - patch_end_line=hunk.target_start + hunk.target_length, - content=str(hunk), - )) - return hunks - - -_DIFF_EXCLUDE_PATTERNS = [ - ":(exclude)*.S", ":(exclude)*.s", ":(exclude)*.asm", # Assembly (causes parser errors) - ":(exclude)*.pod", ":(exclude)*.pl", # Perl docs/scripts - ":(exclude)*.pem", ":(exclude)*.cnf", # Certificates/config - ":(exclude)*.sh", ":(exclude)*.bat", ":(exclude)*.com", # Shell/batch scripts -] - - -def _generate_tree_diff(source_dir: Path, patch_dir: Path) -> str: - """Run ``git diff --no-index`` between two directory trees. - - Uses git's built-in binary detection to automatically skip binary files - (RPMs, tarballs, images, etc.) without needing an explicit exclude list. - Excludes assembly and non-source files that cause parser issues. - """ - result = subprocess.run( - ["git", "diff", "--no-index", "--no-color", - str(source_dir), str(patch_dir), "--"] + _DIFF_EXCLUDE_PATTERNS, - capture_output=True, timeout=300, - ) - return result.stdout.decode("utf-8", errors="replace") - - -def _generate_targeted_diff(source_dir: Path, patch_dir: Path, target_basenames: set[str]) -> str: - """Generate diff only for specific files between two trees. - - Instead of diffing all files and filtering afterward, this runs git diff - with file patterns to only diff the files we care about. Much faster and - avoids parser issues from problematic files (assembly, scripts, etc.). - """ - if not target_basenames: - return "" - - # Build file patterns for git diff - match files by basename anywhere in tree - file_patterns = [f"*/{basename}" for basename in target_basenames] - - result = subprocess.run( - ["git", "diff", "--no-index", "--no-color", - str(source_dir), str(patch_dir), "--"] + file_patterns, - capture_output=True, timeout=300, - ) - return result.stdout.decode("utf-8", errors="replace") - - -_NON_SOURCE_EXTENSIONS = frozenset({ - ".md", ".txt", ".rst", ".html", ".xml", ".json", ".yaml", ".yml", - ".sgml", ".po", ".pot", ".spec", ".cfg", ".conf", ".1", ".man","*.in", -}) - -LANG_PARSER_EXTENSIONS = {".c", ".h", ".cpp", ".hpp", ".py", ".go", ".java", ".js", ".ts"} - -def _hunks_to_locations( - hunks: list[DiffHunk], - approved_keywords: list[Keyword], -) -> list[FileLocation]: - """Convert hunks to FileLocations, filtering by file and content. - - Applies two filters: - 1. File filter - hunk's file must be in approved_keywords.file_paths - 2. Content filter - at least one keyword term must appear in hunk content - """ - # Build basename -> keywords lookup - basename_to_keywords: dict[str, list[str]] = {} - for kw in approved_keywords: - for fp in kw.file_paths: - basename = Path(fp).name - basename_to_keywords.setdefault(basename, []).append(kw.term) - - locations: list[FileLocation] = [] - for hunk in hunks: - basename = Path(hunk.file_path).name - if basename not in basename_to_keywords: - continue - # Extension filter to skip non-code files - if Path(hunk.file_path).suffix.lower() not in LANG_PARSER_EXTENSIONS: - continue - - # Content filter - check which keywords appear in hunk content - searchable = (hunk.content + " " + hunk.file_path).lower() - matched_keywords = [ - kw for kw in basename_to_keywords[basename] - if kw.lower() in searchable - ] - - # Only include hunk if at least one keyword matches content - if not matched_keywords: - continue - - locations.append(FileLocation( - file_path=hunk.file_path, - line_number=hunk.source_start_line, - snippet=hunk.content, - matched_keywords=matched_keywords, - source="diff", - )) - return locations - - -def _tantivy_candidates(approved_keywords: list[Keyword]) -> list[FileLocation]: - """Build FileLocation objects from Identify phase hits (no re-search). - - Uses hit_content stored on each Keyword during the Identify phase, - avoiding redundant Tantivy queries. - """ - locations: list[FileLocation] = [] - for kw in approved_keywords: - for file_path, content in zip(kw.file_paths, kw.hit_content): - if Path(file_path).suffix.lower() in _NON_SOURCE_EXTENSIONS: - continue - locations.append(FileLocation( - file_path=file_path, - line_number=None, - snippet=content, - matched_keywords=[kw.term], - source="tantivy", - )) - return locations - - -# --------------------------------------------------------------------------- -# Locate subgraph pipeline -# --------------------------------------------------------------------------- def download_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, source_dir: Path, patch_dir: Path) -> Path | None: """Download the patched SRPM and generate the diff file between the source and the patched SRPM.""" from exploit_iq_commons.utils.source_rpm_downloader import SourceRPMDownloader @@ -1463,200 +828,12 @@ def download_patch_and_gen_diff(fix_info: dict, brew_downloader: BrewDownloader, #return diff_output_path return None -async def build_locate_pipeline( - *, - llm, - identify_report: IdentifyReport, - vuln_id: str, - descriptions: list[tuple[str, str]], - lexical_search_fn, - fix_info: dict, - brew_downloader, - source_dir: Path, - patch_dir: Path, - tracer, - locate_prompt: str = LOCATE_SYSTEM_PROMPT, -) -> LocateReport: - """Locate vulnerable code by diffing patched RPM against target source, - or falling back to keyword-only Tantivy search. - - Steps: - A) Resolve fix NVR from identify_result.fixed_rpm_list. - B) Download patched SRPM via BrewDownloader. - C) Extract to patch_dir. - D) Generate unified diff between source_dir and patch_dir. - E) Parse diff into DiffHunk objects via unidiff. - F) Cross-reference diff hunks with Identify approved keywords. - G) LLM judges each matched location for relevance. - - Fallback: if no fix RPM is available, search Tantivy for each - approved keyword and judge those results instead. - - Parameters - ---------- - llm: - LangChain LLM for per-location judgment. - identify_report: - Output of the Identify phase with approved/rejected keywords. - vuln_id: - CVE identifier. - descriptions: - ``(source_name, text)`` pairs from CVE intel for LLM context. - lexical_search_fn: - ``async (str) -> list[dict]`` -- Tantivy index search. - fix_info: - ``{name, version, release}`` dict from RHSA, or empty dict. - brew_downloader: - ``BrewDownloader`` instance, or ``None`` if Brew is unavailable. - source_dir: - Path to the extracted target source tree. - patch_dir: - Path where the patched source tree will be extracted. - tracer: - Request-scoped ``nat.builder.context.Context`` for span creation. - locate_prompt: - System prompt for per-location LLM judgment. - """ - approved = identify_report.approved - diff_available = False - candidate_locations: list[FileLocation] = [] - - # -- Download patched SRPM if available and not cached -- - if fix_info and brew_downloader is not None and not patch_dir.exists(): - with tracer.push_active_function( - "locate_download_patch", input_data={"fix_info": fix_info} - ) as span: - try: - download_patch_and_gen_diff(fix_info, brew_downloader, source_dir, patch_dir) - span.set_output({"patch_dir_exists": patch_dir.exists()}) - except Exception as e: - logger.warning("locate: failed to download/extract patched SRPM: %s", e) - span.set_output({"error": str(e), "patch_dir_exists": False}) - - # -- Generate targeted diff for only the files from approved keywords -------------------------------- - # Extract target basenames from approved keywords - target_basenames: set[str] = set() - for kw in approved: - for fp in kw.file_paths: - target_basenames.add(Path(fp).name) - - # Check if we can generate targeted diff (need both source and patch dirs) - can_diff = source_dir.exists() and patch_dir.exists() and target_basenames - - if can_diff: - with tracer.push_active_function( - "locate_targeted_diff", - input_data={"target_files_count": len(target_basenames), "target_basenames": list(target_basenames)[:10]} - ) as span: - # Generate diff only for target files - much faster than full diff - diff_text = _generate_targeted_diff(source_dir, patch_dir, target_basenames) - if diff_text: - diff_output_path = patch_dir.parent / "locate.diff" - diff_output_path.write_text(diff_text, encoding="utf-8") - hunks = parse_unified_diff(diff_text, tracer) - candidate_locations = _hunks_to_locations(hunks, approved) - diff_available = True - span.set_output({ - "source": "targeted_diff", - "hunks_total": len(hunks), - "target_files_count": len(target_basenames), - "candidate_locations_count": len(candidate_locations), - }) - logger.info("locate: targeted diff for %d files (%d hunks, %d candidates)", - len(target_basenames), len(hunks), len(candidate_locations)) - else: - logger.info("locate: no diff file found, falling back to Tantivy") - - tantivy_locations = _tantivy_candidates(approved) - - # -- Step G: LLM judges each candidate location --------------------------- - confirmed: list[FileLocation] = [] - if candidate_locations or tantivy_locations: - judge_llm = llm.with_structured_output(LocationJudgment) - cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) - - for loc in candidate_locations: - with tracer.push_active_function( - f"locate_judge_{loc.file_path}", input_data={"file": loc.file_path} - ) as span: - messages = [ - SystemMessage(content=locate_prompt), - HumanMessage(content=( - f"CVE: {vuln_id}\n{cve_context}\n\n" - f"File: {loc.file_path} (line {loc.line_number or 'N/A'})\n" - f"Source: {loc.source}\n\n" - f"Code region:\n{loc.snippet}" - )), - ] - judgment: LocationJudgment = await judge_llm.ainvoke(messages) - span.set_output({"relevant": judgment.relevant, "confidence": judgment.confidence}) - if judgment.relevant: - confirmed.append(loc) - - # -- Aggregate into LocateReport ------------------------------------------ - with tracer.push_active_function( - "locate_aggregate", - input_data={ - "confirmed_count": len(confirmed), - "tantivy_count": len(tantivy_locations), - }, - ) as span: - if confirmed: - final_locations = confirmed - evidence_level: Literal["high", "medium", "low"] = "high" - file_list = ", ".join(sorted({loc.file_path for loc in confirmed})) - evidence = ( - f"Located {len(confirmed)} vulnerability-relevant code region(s) " - f"in: {file_list}. Evidence sourced from patched RPM diff (high confidence)." - ) - instructions = "" - elif tantivy_locations: - final_locations = tantivy_locations - evidence_level = "medium" - file_list = ", ".join(sorted({loc.file_path for loc in tantivy_locations})) - evidence = ( - f"Located {len(tantivy_locations)} candidate code region(s) via keyword search " - f"in: {file_list}. No upstream diff available; downstream verification required." - ) - instructions = "Keyword-only locations require downstream patch/changelog verification." - else: - final_locations = [] - evidence_level = "low" - evidence = "No vulnerability-relevant code locations found in the source tree." - instructions = "No code locations found. Verify phase should assess based on available intel." - - span.set_output({ - "source": "diff" if confirmed else ("tantivy" if tantivy_locations else "none"), - "evidence_level": evidence_level, - "final_locations_count": len(final_locations), - "evidence": evidence, - }) - - logger.info( - "locate: aggregation complete (source=%s, evidence_level=%s, locations=%d)", - "diff" if confirmed else ("tantivy" if tantivy_locations else "none"), - evidence_level, - len(final_locations), - ) - - return LocateReport( - locations=final_locations, - diff_available=diff_available, - evidence=evidence, - evidence_level=evidence_level, - is_sufficient=len(final_locations) > 0, - instructions=instructions, - ) - # --------------------------------------------------------------------------- -# Verify pipeline +# Spec/build log parsing helpers # --------------------------------------------------------------------------- _SPEC_PATCH_RE = re.compile(r"^Patch(\d+)\s*:\s*(.+)$", re.IGNORECASE) -_BUILDLOG_PATCH_RE = re.compile( - r"/usr/bin/cat\s+/builddir/build/SOURCES/(\S+\.patch)", -) def _parse_spec_patch_directives( @@ -1681,19 +858,6 @@ def _extract_spec_changelog(inspector, spec_path: Path) -> str | None: return content[idx + len("%changelog"):] -def _parse_build_log_applied_patches( - inspector, build_log_path: Path, -) -> list[str]: - """Return patch filenames that were applied during the RPM build.""" - matches = inspector.grep_content(_BUILDLOG_PATCH_RE.pattern, build_log_path) - filenames: list[str] = [] - for m in matches: - hit = _BUILDLOG_PATCH_RE.search(m.line_content) - if hit: - filenames.append(hit.group(1)) - return filenames - - def parse_patch_file(patch_path: Path) -> ParsedPatch | None: """Parse a downstream .patch file into structured data. @@ -1743,284 +907,9 @@ def parse_patch_file(patch_path: Path) -> ParsedPatch | None: return ParsedPatch(patch_filename=patch_path.name, files=files) -def _extract_patch_details( - patch_path: Path, locate_report: LocateReport | None, -) -> list[DownstreamPatchDetail]: - """Parse a ``.patch`` file into ``DownstreamPatchDetail`` entries.""" - try: - diff_text = patch_path.read_text(encoding="utf-8", errors="replace") - patch_set = PatchSet.from_string(diff_text) - except Exception: - logger.warning("verify: failed to parse patch file %s", patch_path) - return [] - - locate_files = set() - if locate_report: - locate_files = {loc.file_path for loc in locate_report.locations} - - details: list[DownstreamPatchDetail] = [] - for patched_file in patch_set: - if patched_file.is_binary_file: - continue - - file_path = patched_file.path - if locate_files and not any( - Path(file_path).name in lf for lf in locate_files - ): - continue - - for hunk in patched_file: - context_lines: list[str] = [] - added_lines: list[str] = [] - for line in hunk: - if line.is_context: - context_lines.append(str(line.value).rstrip("\n")) - elif line.is_added: - added_lines.append(str(line.value).rstrip("\n")) - - if added_lines: - details.append(DownstreamPatchDetail( - patch_filename=patch_path.name, - file_path=file_path, - vulnerable_code="\n".join(context_lines[-10:]), - fix_code="\n".join(added_lines), - line_number=hunk.target_start, - )) - return details - - -async def _analyze_suspected_patches( - *, - llm, - evidence: list[PatchEvidence], - vuln_id: str, - descriptions: list[tuple[str, str]], - tracer, -) -> list[PatchEvidence]: - """Analyze suspected patches (confidence=0.6) with LLM and return updated evidence.""" - SUSPECTED_CONFIDENCE = 0.6 - - suspected = [e for e in evidence if e.confidence == SUSPECTED_CONFIDENCE and e.patch_details] - if not suspected: - return evidence - - cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) - patch_judge_llm = llm.with_structured_output(PatchContentJudgment) - - updated_evidence = [e for e in evidence if e.confidence != SUSPECTED_CONFIDENCE or not e.patch_details] - - for ev in suspected: - patch_content = "\n".join( - f"File: {d.file_path}\nContext:\n{d.vulnerable_code}\nFix:\n{d.fix_code}" - for d in ev.patch_details - ) - patch_name = ev.detail.split(":")[0].replace("Patch ", "") - - with tracer.push_active_function( - f"verify_llm_patch_{patch_name}", input_data={"patch": patch_name} - ) as patch_span: - messages = [ - SystemMessage(content=VERIFY_PATCH_CONTENT_PROMPT), - HumanMessage(content=( - f"CVE: {vuln_id}\n{cve_context}\n\n" - f"Patch: {patch_name}\n\nPatch content:\n{patch_content}" - )), - ] - judgment: PatchContentJudgment = await patch_judge_llm.ainvoke(messages) - patch_span.set_output({ - "relevant": judgment.relevant, - "confidence": judgment.confidence, - }) - - if judgment.relevant: - updated_evidence.append(PatchEvidence( - source="patch_file_content", - detail=f"LLM: {patch_name}: {judgment.reasoning}", - confidence=judgment.confidence, - patch_details=ev.patch_details, - )) - - return updated_evidence - - -async def build_verify_pipeline( - *, - inspector, - llm, - vuln_id: str, - descriptions: list[tuple[str, str]], - identify_report: IdentifyReport | None, - locate_report: LocateReport | None, - build_log_path: Path | None, - tracer, -) -> VerifyReport: - """Check whether the target RPM build already applied a downstream patch. - - Three subphases: - 1. **Spec + build-log scan** (deterministic) -- grep for CVE ID in spec - patch directives, changelog, and build log. - 2. **LLM changelog interpretation** -- only when subphase 1 is - inconclusive; feeds changelog entries to the LLM. - 3. **Patch-file evidence extraction** -- parse ``.patch`` files, cross-ref - against Locate results, and extract vulnerable/fix code. - - Parameters - ---------- - inspector: - ``SourceInspector`` scoped to the extracted source directory. - llm: - LangChain LLM for changelog interpretation. - vuln_id: - CVE identifier (e.g. ``"CVE-2026-5121"``). - descriptions: - ``(source_name, text)`` pairs from CVE intel. - identify_report: - Output of the Identify phase (may be ``None``). - locate_report: - Output of the Locate phase (may be ``None``). - build_log_path: - Path to the Koji/Brew build log, or ``None``. - tracer: - Request-scoped tracing context. - """ - evidence: list[PatchEvidence] = [] - cve_pattern = re.escape(vuln_id) - - # ── Subphase 1: deterministic spec + build-log scan ─────────────────── - with tracer.push_active_function( - "verify_spec_scan", input_data={"vuln_id": vuln_id} - ) as span: - spec_files = inspector.find_files("*.spec", recursive=False) - spec_path = spec_files[0] if spec_files else None - - if spec_path: - directives = _parse_spec_patch_directives(inspector, spec_path) - for idx, filename, raw_line in directives: - if re.search(cve_pattern, filename, re.IGNORECASE): - evidence.append(PatchEvidence( - source="spec_patch_directive", - detail=f"Patch{idx}: {filename}", - confidence=0.95, - )) - - changelog = _extract_spec_changelog(inspector, spec_path) - if changelog and re.search(cve_pattern, changelog, re.IGNORECASE): - evidence.append(PatchEvidence( - source="spec_changelog", - detail=f"CVE ID found in %changelog", - confidence=0.95, - )) - - if build_log_path and build_log_path.exists(): - from vuln_analysis.tools.source_inspector import SourceInspector as _SI - blog_inspector = _SI(build_log_path.parent) - applied = _parse_build_log_applied_patches(blog_inspector, build_log_path) - for patch_name in applied: - if re.search(cve_pattern, patch_name, re.IGNORECASE): - evidence.append(PatchEvidence( - source="build_log", - detail=f"Patch applied during build: {patch_name}", - confidence=0.98, - )) - - span.set_output({"evidence_count": len(evidence)}) - - # ── Subphase 2: LLM changelog interpretation (if subphase 1 empty) ─── - if not evidence and spec_path: - changelog = changelog if spec_path and changelog else _extract_spec_changelog(inspector, spec_path) # type: ignore[possibly-undefined] - if changelog: - recent_entries = "\n".join(changelog.strip().splitlines()[:50]) - with tracer.push_active_function( - "verify_llm_changelog", input_data={"vuln_id": vuln_id} - ) as span: - cve_context = "\n\n".join(f"[{src}] {txt}" for src, txt in descriptions) - judge_llm = llm.with_structured_output(ChangelogJudgment) - messages = [ - SystemMessage(content=VERIFY_CHANGELOG_PROMPT), - HumanMessage(content=( - f"CVE: {vuln_id}\n{cve_context}\n\n" - f"Changelog entries:\n{recent_entries}" - )), - ] - judgment: ChangelogJudgment = await judge_llm.ainvoke(messages) - span.set_output({ - "relevant": judgment.relevant, - "confidence": judgment.confidence, - }) - if judgment.relevant: - evidence.append(PatchEvidence( - source="spec_changelog", - detail=f"LLM: {judgment.reasoning}", - confidence=judgment.confidence, - )) - - # ── Subphase 3: patch-file evidence extraction ──────────────────────── - with tracer.push_active_function( - "verify_patch_extraction", input_data={"vuln_id": vuln_id} - ) as span: - patch_files = inspector.find_files("*.patch", recursive=False) - cve_patches = [p for p in patch_files if re.search(cve_pattern, p.name, re.IGNORECASE)] - - if cve_patches: - for pp in cve_patches: - details = _extract_patch_details(pp, locate_report) - if details: - evidence.append(PatchEvidence( - source="patch_file_content", - detail=f"Patch {pp.name}: {len(details)} hunk(s) with fix code", - confidence=0.95, - patch_details=details, - )) - elif patch_files and locate_report and locate_report.locations: - for pp in patch_files: - details = _extract_patch_details(pp, locate_report) - if details: - evidence.append(PatchEvidence( - source="patch_file_content", - detail=f"Patch {pp.name}: overlaps Locate files ({len(details)} hunk(s))", - confidence=0.6, - patch_details=details, - )) - - span.set_output({ - "cve_patches": len(cve_patches), - "total_patches": len(patch_files), - "evidence_items": len(evidence), - }) - - # ── Aggregate verdict ───────────────────────────────────────────────── - if evidence: - max_conf = max(e.confidence for e in evidence) - - if max_conf >= 0.8: - verdict = "PATCHED" - else: - verdict = "VULNERABLE" - - else: - verdict = "VULNERABLE" - - patch_sources = ", ".join(sorted({e.source for e in evidence})) if evidence else "none" - summary = ( - f"Verify verdict={verdict} for {vuln_id}. " - f"Evidence sources: {patch_sources}. " - f"{len(evidence)} evidence item(s) found." - ) - - return VerifyReport( - verdict=verdict, - evidence=evidence, - summary=summary, - is_sufficient=verdict != "INCONCLUSIVE", - instructions="" if verdict != "INCONCLUSIVE" - else "Downstream evidence is ambiguous; manual review recommended.", - ) - - - -#--------------------------------------------------------------------- +# --------------------------------------------------------------------------- # Downstream search pipeline -#--------------------------------------------------------------------- +# --------------------------------------------------------------------------- async def downstream_search_preprocss( *, llm, @@ -2243,6 +1132,37 @@ async def upstream_search_preprocess( "- State confidence level based on evidence quality." ) +L1_AGENT_SYS_PROMPT_REBASE_FIX = ( + "You are a security analyst verifying that a CVE fix is PRESENT in a rebased package.\n" + "The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.\n" + "You have access to the upstream patch that shows what the fix looks like.\n\n" + "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" + "- VULNERABLE code (- lines that were removed in the fix)\n" + "- FIX code (+ lines that were added in the fix)\n\n" + "YOUR TASK: Verify the TARGET package contains the FIX code (proving rebase was effective).\n\n" + "VERIFICATION STRATEGY:\n" + "1. FIRST search for the FIX code pattern (added lines from the patch).\n" + " - Use function names, variable names, or unique code snippets from the '+ lines'.\n" + " - The fix code SHOULD exist in the target package (proving rebase worked).\n" + "2. If fix code is found, optionally confirm VULNERABLE code is ABSENT.\n" + " - The vulnerable code should NOT exist (was replaced by the fix).\n" + "3. CONCLUSION:\n" + " - If fix code EXISTS → Package is PATCHED via rebase.\n" + " - If vulnerable code still EXISTS and fix is ABSENT → Rebase may be incomplete.\n" + " - If neither is found → Use file paths from patch to locate relevant code.\n\n" + "CRITICAL RULES:\n" + "- The patch is from a FIXED version - expect the target to have the fix code.\n" + "- Use file paths and function names from the patch to locate code.\n" + "- Search for distinctive code patterns, not generic keywords.\n" + "- Base conclusions ONLY on tool results, not assumptions.\n\n" + "ANSWER QUALITY:\n" + "- Cite specific file paths and line numbers from tool results.\n" + "- Quote the actual code found, not just describe it.\n" + "- Compare found code against both vulnerable and fix patterns from the patch.\n" + "- Clearly state whether fix code exists, confirming the rebase.\n" + "- State confidence level based on evidence quality." +) + L1_AGENT_PROMPT_TEMPLATE = """{sys_prompt} @@ -2323,6 +1243,30 @@ async def upstream_search_preprocess( {{"thought": "Unexpected: fix code found despite no CVE patch file", "mode": "finish", "actions": null, "final_answer": "The package appears PATCHED via rebase. Found fix code at [file:line]: [quote code]. Although no CVE-specific patch exists, the fix may have been included via upstream version update."}} """ +L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS = """ +1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +5. Code Keyword Search: use query field for broader searches. +6. Do NOT call the same tool with the same input twice. +7. FIRST search for FIX code (added lines) - it SHOULD exist in rebased target. +8. THEN verify VULNERABLE code (removed lines) is ABSENT from target. +9. If a pattern contains special regex characters, escape them or use literal substrings. + + +{{"thought": "Search for the fix code pattern from the patch to confirm it exists in rebased target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate fix code that should exist after rebase"}}, "final_answer": null}} + + +{{"thought": "Found fix code. Now verify the vulnerable code is absent", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable code was removed (confirms fix)"}}, "final_answer": null}} + + +{{"thought": "Evidence confirms rebase fix: found fix code, vulnerable code is absent", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED via rebase. Found fix code at [file:line]: [quote code]. The vulnerable code pattern is NOT present - the rebase successfully included the security fix."}} + + +{{"thought": "Unexpected: vulnerable code still present despite rebase claim", "mode": "finish", "actions": null, "final_answer": "The rebase may be INCOMPLETE. Found vulnerable code still present at [file:line]: [quote code]. The fix code was not found despite spec indicating rebase fixed this CVE. Manual review required."}} +""" + # --------------------------------------------------------------------------- # L1 Agent Helper Functions diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 8a0fa06c1..6260f2dd1 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -49,9 +49,11 @@ upstream_search_preprocess, L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, + L1_AGENT_SYS_PROMPT_REBASE_FIX, L1_AGENT_PROMPT_TEMPLATE, L1_AGENT_THOUGHT_INSTRUCTIONS, L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, + L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, format_patch_files_summary, format_patch_hunks_summary, ) @@ -305,6 +307,33 @@ async def L1_agent(state: CodeAgentState) -> dict: "patch_filename": downstream_report.patch_file_name, "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) + elif upstream_report and upstream_report.is_code_fixed_by_rebase == "yes": + parsed_patch = upstream_report.fixed_parsed_patch + + patch_files_summary = format_patch_files_summary(parsed_patch) if parsed_patch else "No patch details available" + patch_hunks_summary = format_patch_hunks_summary(parsed_patch) if parsed_patch else "No patch content available" + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + tools_str = "\n".join(tool_descriptions_list) + tool_strategy = _build_tool_strategy(enabled_tool_names) + + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( + sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_FIX, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + patch_filename=upstream_report.spec_file_log_change[:100] if upstream_report.spec_file_log_change else "Rebase fix", + patch_files_summary=patch_files_summary, + patch_hunks_summary=patch_hunks_summary, + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, + ) + + span.set_output({ + "mode": "rebase_fix_verification", + "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "", + "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, + }) elif upstream_report and upstream_report.fixed_parsed_patch: parsed_patch = upstream_report.fixed_parsed_patch @@ -615,10 +644,15 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: l1_agent_graph = await create_graph_code_agent(config, builder, message, tracer) initial_state: CodeAgentState = { "messages": [HumanMessage(content="Begin L1 CVE investigation")], + "step": 0, + "max_steps": config.max_iterations, } with tracer.push_active_function("l1_agent_graph", input_data=initial_state["messages"][0].content): - result = await l1_agent_graph.ainvoke(initial_state) + result = await l1_agent_graph.ainvoke( + initial_state, + config={"recursion_limit": config.max_iterations * 4}, + ) logger.info("package_code_agent: L1 investigation finished") diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py index 29a23b89c..fb921086e 100644 --- a/src/vuln_analysis/tools/source_inspector.py +++ b/src/vuln_analysis/tools/source_inspector.py @@ -23,6 +23,7 @@ import asyncio import re +import subprocess from dataclasses import dataclass from pathlib import Path @@ -160,15 +161,19 @@ async def grep_native( cmd.extend(["-m", str(max_results), "--", pattern, str(self._root)]) - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, _ = await proc.communicate() + def _run_grep() -> str: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + errors="replace", + ) + return result.stdout + + stdout = await asyncio.to_thread(_run_grep) results: list[GrepMatch] = [] - for line in stdout.decode("utf-8", errors="replace").splitlines(): + for line in stdout.splitlines(): parts = line.split(":", 2) if len(parts) >= 3: try: From 2703240dfe7f7751bd566cd510c9c85f4d5ffa3b Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 4 May 2026 04:47:38 +0000 Subject: [PATCH 21/46] add observation logic --- .../functions/code_agent_graph_defs.py | 68 +++++++++- .../functions/cve_package_code_agent.py | 118 +++++++++++++++++- 2 files changed, 179 insertions(+), 7 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index fed06ed8f..bbbcc554e 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) -from vuln_analysis.functions.react_internals import CheckerThought +from vuln_analysis.functions.react_internals import CheckerThought,Observation # --------------------------------------------------------------------------- # Graph state # --------------------------------------------------------------------------- @@ -56,6 +56,7 @@ class CodeAgentState(MessagesState): max_steps: NotRequired[int] output: NotRequired[str] thought: NotRequired[CheckerThought | None] + observation: NotRequired[Observation | None] # --------------------------------------------------------------------------- @@ -1268,6 +1269,71 @@ async def upstream_search_preprocess( """ +# --------------------------------------------------------------------------- +# L1 Observation Node Prompts (Comprehension + Memory Update) +# --------------------------------------------------------------------------- + +L1_COMPREHENSION_PROMPT = """Analyze the tool output and extract key findings for CVE patch verification. +GOAL: Verify whether {vuln_id} fix is applied to {target_package} +PATCH CONTEXT: +- Vulnerable code patterns (removed lines): {vulnerable_patterns} +- Fix code patterns (added lines): {fix_patterns} + +TOOL USED: {tool_used} +TOOL INPUT: {tool_input} +THOUGHT: {last_thought} +NEW OUTPUT: +{tool_output} + +CODE ANALYSIS RULES: +1. READ the actual code snippets in NEW OUTPUT. Compare against the PATCH CONTEXT patterns. +2. For each match found, determine: + - Does it match the VULNERABLE pattern (code that should be removed/changed)? + - Does it match the FIX pattern (code that should be present after patching)? + - Is it a partial match, context match, or unrelated? +3. RECORD file paths and line numbers for all relevant matches. +4. If no matches found, note which patterns were searched and suggest alternative search terms. + +TOOL-SPECIFIC RULES: +- If NEW OUTPUT is empty or contains an error, findings must state: "FAILED: [tool] [input] - [reason]" +- Source Grep: Check if matches show vulnerable code (needs fix) or fixed code (already patched) +- Code Keyword Search: Use broader context to locate relevant files for follow-up grep + +OUTPUT RULES: +- findings: 2-4 key observations about what the code shows +- tool_outcome: Record the search pattern and result (e.g., "Source Grep [pattern] -> found in file.c:123") +RESPONSE: +{{""" + +L1_MEMORY_UPDATE_PROMPT = """Merge new findings into the CVE patch investigation memory. +GOAL: Verify whether {vuln_id} fix is applied to {target_package} +PREVIOUS MEMORY: {previous_memory} +NEW FINDINGS (from tool analysis): +{findings} +TOOL CALL RECORD: {tool_outcome} + +MEMORY RULES: +1. Start from PREVIOUS MEMORY. Append new facts from NEW FINDINGS. No duplicates. +2. Add TOOL CALL RECORD verbatim so future steps know what was already searched. +3. If NEW FINDINGS report a failure, add the failure to memory. Do NOT infer positive findings. + +PATCH VERIFICATION TRACKING: +- If vulnerable code pattern FOUND: add "VULNERABLE_CODE_FOUND: [pattern] in [file:line]" +- If fix code pattern FOUND: add "FIX_CODE_FOUND: [pattern] in [file:line]" +- If vulnerable code NOT FOUND after searching: add "VULNERABLE_CODE_ABSENT: [pattern] not found" +- If fix code NOT FOUND after searching: add "FIX_CODE_ABSENT: [pattern] not found" + +VERDICT EVIDENCE: +- PATCHED evidence: fix code found AND/OR vulnerable code absent +- VULNERABLE evidence: vulnerable code found AND fix code absent +- INCONCLUSIVE: neither pattern found, or conflicting evidence + +- results: copy the NEW FINDINGS as-is. +- memory: updated cumulative findings with search results and evidence tags. +RESPONSE: +{{""" + + # --------------------------------------------------------------------------- # L1 Agent Helper Functions # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 6260f2dd1..ce033d6a1 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -28,7 +28,7 @@ from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode -from langchain_core.messages import HumanMessage, AIMessage, SystemMessage +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage from nat.builder.context import Context from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput @@ -54,18 +54,21 @@ L1_AGENT_THOUGHT_INSTRUCTIONS, L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, + L1_COMPREHENSION_PROMPT, + L1_MEMORY_UPDATE_PROMPT, format_patch_files_summary, format_patch_hunks_summary, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError from vuln_analysis.utils.full_text_search import FullTextSearch -from vuln_analysis.functions.react_internals import CheckerThought +from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation from vuln_analysis.runtime_context import ctx_state logger = LoggingFactory.get_agent_logger(__name__) import uuid +import tiktoken _RPM_NEVRA_RE = re.compile(r"^(.+?)-(?:(\d+):)?(\d\S*?)-(\S+)$") @@ -213,6 +216,7 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent llm_name: str = Field(description="The LLM model to use with the L1 code agent.") tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") max_iterations: int = Field(default=10, description="The maximum number of iterations for the agent.") + context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.") async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer): @@ -220,7 +224,7 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu THOUGHT_NODE = "think_node" TOOL_NODE = "tool" FORCED_FINISH_NODE = "forced_finish" - OBSERVATION_NODE = "observation" + OBSERVATION_NODE = "observation_node" DOWNSTREAM_SEARCH_NODE = "downstream_search" GATHER_MORE_INFO_NODE = "gather_more_info" L1_AGENT_NODE = "L1_agent" @@ -229,6 +233,8 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) thought_llm = llm.with_structured_output(CheckerThought) + comprehension_llm = llm.with_structured_output(CodeFindings) + observation_llm = llm.with_structured_output(Observation) # Get tool names after filtering for dynamic guidance enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] @@ -247,7 +253,28 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu async def lexical_search_fn(query: str) -> list: return fts.search_index(query, top_k=5) + + _tiktoken_enc = tiktoken.get_encoding("cl100k_base") + def _count_tokens(text: str) -> int: + """Count tokens using tiktoken cl100k_base encoding (~90-95% accurate for Llama 3.1).""" + try: + return len(_tiktoken_enc.encode(text)) + except Exception: + return len(text) // 4 + + def _estimate_tokens(runtime_prompt: str, messages: list, observation: Observation | None) -> int: + """Estimate the token count thought_node will send to the LLM.""" + parts = [runtime_prompt] + for msg in messages: + if hasattr(msg, "content") and isinstance(msg.content, str): + parts.append(msg.content) + if observation is not None: + for item in (observation.memory or []): + parts.append(item) + for item in (observation.results or []): + parts.append(item) + return _count_tokens("\n".join(parts)) # -- Locate setup: fix info + BrewDownloader + paths ----------------------- aIntel = intel[0] fix_info = _parse_fix_info_from_context(ctx, target_package.name) @@ -471,8 +498,33 @@ async def forced_finish_node(state: CodeAgentState) -> dict: "messages": [AIMessage(content="Max iterations reached, forcing finish")], } + def _extract_patch_patterns(state: CodeAgentState) -> tuple[str, str]: + """Extract vulnerable and fix patterns from parsed patch in state.""" + downstream_report = state.get("downstream_report") + upstream_report = state.get("upstream_report") + + parsed_patch = None + if downstream_report and downstream_report.parsed_patch: + parsed_patch = downstream_report.parsed_patch + elif upstream_report and upstream_report.fixed_parsed_patch: + parsed_patch = upstream_report.fixed_parsed_patch + + if not parsed_patch or not parsed_patch.files: + return "No patch available", "No patch available" + + vulnerable_lines = [] + fix_lines = [] + for pf in parsed_patch.files: + for hunk in pf.hunks: + vulnerable_lines.extend(line.strip() for line in hunk.removed_lines[:3] if line.strip()) + fix_lines.extend(line.strip() for line in hunk.added_lines[:3] if line.strip()) + + vulnerable_patterns = "\n".join(vulnerable_lines[:10]) if vulnerable_lines else "No vulnerable code patterns identified" + fix_patterns = "\n".join(fix_lines[:10]) if fix_lines else "No fix code patterns identified" + return vulnerable_patterns, fix_patterns + async def observation_node(state: CodeAgentState) -> dict: - """Process tool output and add observation to state.""" + """Process tool output: comprehension -> memory update with patch context.""" logger.info("observation_node: starting") tool_message = state["messages"][-1] last_thought = state.get("thought") @@ -483,14 +535,68 @@ async def observation_node(state: CodeAgentState) -> dict: last_thought_text = last_thought.thought tool_used = last_thought.actions.tool tool_input_detail = last_thought.actions.query + previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."] + + vulnerable_patterns, fix_patterns = _extract_patch_patterns(state) + target_package_name = target_package.name if target_package else "unknown" + with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: tool_output_for_llm = tool_message.content + + # Step 1: Comprehension - extract key findings from raw tool output + comp_prompt = L1_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + vulnerable_patterns=vulnerable_patterns, + fix_patterns=fix_patterns, + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=tool_output_for_llm[:8000], + ) + code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) + findings_text = "\n".join(f"- {f}" for f in code_findings.findings) + + # Step 2: Memory update - merge findings into cumulative memory + mem_prompt = L1_MEMORY_UPDATE_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, + findings=findings_text, + tool_outcome=code_findings.tool_outcome, + ) + new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) + + messages = state["messages"] + active_prompt = state.get("runtime_prompt") + estimated = _estimate_tokens(active_prompt, messages, new_observation) + prune_messages = [] + orig_estimated = estimated + + if estimated > config.context_window_token_limit and len(messages) > 3: + prunable = messages[1:-2] + for msg in prunable: + prune_messages.append(RemoveMessage(id=msg.id)) + estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0 + if estimated <= config.context_window_token_limit: + break + logger.info( + "Context pruning: removed %d messages, estimated tokens now ~%d (limit %d)", + len(prune_messages), estimated, config.context_window_token_limit, + ) + span.set_output({ "last_thought_text": last_thought_text, - "tool_output_for_llm": tool_output_for_llm, + "tool_output_for_llm": tool_output_for_llm[:500], + "findings": code_findings.findings, + "tool_outcome": code_findings.tool_outcome, + "new_memory": new_observation.memory, + "amount_of_orig_tokens": orig_estimated, + "amount_of_estimated_tokens": estimated, }) return { - "messages": [AIMessage(content="Observation recorded")], + "messages": prune_messages + [AIMessage(content=f"Observation: {code_findings.tool_outcome}")], + "observation": new_observation, } async def should_continue(state: CodeAgentState) -> str: From e07ab79c8210232ac1c5e7f6f126e559e60b26ca Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 4 May 2026 11:28:42 +0000 Subject: [PATCH 22/46] observation node part2 --- src/exploit_iq_commons/data_models/common.py | 2 +- .../functions/code_agent_graph_defs.py | 141 ++++++++++++++ .../functions/cve_package_code_agent.py | 173 +++++++++++++----- 3 files changed, 271 insertions(+), 45 deletions(-) diff --git a/src/exploit_iq_commons/data_models/common.py b/src/exploit_iq_commons/data_models/common.py index 248e0469c..fd4681109 100644 --- a/src/exploit_iq_commons/data_models/common.py +++ b/src/exploit_iq_commons/data_models/common.py @@ -35,7 +35,7 @@ class PipelineMode(str, Enum): Orthogonal to AnalysisType (input format) -- any combination is valid. """ FULL_PIPELINE = "full_pipeline" - PACKAGE_CHECKER = "package_checker" + PACKAGE_CHECKER = "rpm_package_checker" diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index bbbcc554e..ed06ee666 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -57,6 +57,8 @@ class CodeAgentState(MessagesState): output: NotRequired[str] thought: NotRequired[CheckerThought | None] observation: NotRequired[Observation | None] + patch_search_mode: NotRequired[str | None] # "patch_patterns" or "cve_description" + cve_description: NotRequired[str | None] # CVE description for observation_node # --------------------------------------------------------------------------- @@ -1334,6 +1336,145 @@ async def upstream_search_preprocess( {{""" +# --------------------------------------------------------------------------- +# L1 Observation Node Prompts (CVE-Description Mode - No Patch Available) +# --------------------------------------------------------------------------- + +L1_COMPREHENSION_PROMPT_CVE_DESC = """Analyze the tool output for CVE patch verification using CVE description context. +GOAL: Verify whether {vuln_id} fix is applied to {target_package} + +CVE DESCRIPTION: +{cve_description} + +SPEC CHANGELOG (rebase info): +{spec_log_change} + +NOTE: No patch file available. Extract search terms from CVE description. + +TOOL USED: {tool_used} +TOOL INPUT: {tool_input} +THOUGHT: {last_thought} +NEW OUTPUT: +{tool_output} + +CODE ANALYSIS RULES (CVE-Description Mode): +1. EXTRACT key identifiers from the CVE description: + - Function names, variable names, API calls + - File paths or component names mentioned + - Error conditions or attack vectors + +2. For each code match in NEW OUTPUT: + - Does it relate to the vulnerability described? + - Does it show defensive patterns (bounds checking, null validation, error handling)? + - Record file path and line number as evidence + +3. DEFENSIVE PATTERNS indicating a fix: + - Input validation, bounds checking, null guards + - Resource cleanup, error handling + - Security-related function calls + +TOOL-SPECIFIC RULES: +- If NEW OUTPUT is empty or contains an error: "FAILED: [tool] [input] - [reason]" +- Source Grep: Check if matches show vulnerable behavior or fixed/defensive code +- Code Keyword Search: Use to locate files containing CVE-related symbols + +OUTPUT: +- findings: 2-4 observations about code relative to CVE +- tool_outcome: "Source Grep [pattern] -> found in file.c:123" +RESPONSE: +{{""" + +L1_MEMORY_UPDATE_PROMPT_CVE_DESC = """Merge findings into CVE patch investigation memory. +GOAL: Verify whether {vuln_id} fix is applied to {target_package} +MODE: CVE-description based (no patch patterns) + +PREVIOUS MEMORY: {previous_memory} +NEW FINDINGS: {findings} +TOOL CALL RECORD: {tool_outcome} + +MEMORY RULES: +1. Append new facts from NEW FINDINGS to PREVIOUS MEMORY. No duplicates. +2. Add TOOL CALL RECORD verbatim. + +CVE-BASED TRACKING: +- CVE-related code FOUND: "CVE_CODE_FOUND: [symbol] in [file:line]" +- Defensive pattern FOUND: "DEFENSIVE_CODE_FOUND: [pattern] in [file:line]" +- Search no match: "SEARCH_NO_MATCH: [pattern]" + +VERDICT (CVE-description mode): +- LIKELY_PATCHED: defensive code found, no vulnerability indicators +- LIKELY_VULNERABLE: vulnerability patterns found, no defensive code +- INCONCLUSIVE: insufficient evidence + +- results: copy the NEW FINDINGS as-is. +- memory: updated cumulative findings with evidence tags. +RESPONSE: +{{""" + + +# --------------------------------------------------------------------------- +# L1 Agent Prompt Template (No Patch - CVE Description Mode) +# --------------------------------------------------------------------------- + +L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH = """You are a security analyst verifying that a CVE fix is PRESENT in a rebased package. +The TARGET package was REBASED to a newer upstream version that claims to fix this CVE. +NO PATCH FILE IS AVAILABLE - you must use the CVE description to guide your search. + +YOUR TASK: Verify the TARGET package contains the fix by searching for: +1. Code patterns mentioned in the CVE description +2. Defensive code that would mitigate the vulnerability +3. Function/symbol names related to the CVE + +VERIFICATION STRATEGY (No Patch Mode): +1. EXTRACT key identifiers from the CVE description: + - Function names, API calls, variable names + - Vulnerable code constructs described + - Fixed/secure code patterns described +2. SEARCH for these patterns in the target source code +3. ANALYZE the code to determine if it shows the fix behavior +4. CONCLUDE based on presence of defensive code and absence of vulnerability indicators""" + +L1_AGENT_PROMPT_TEMPLATE_NO_PATCH = """{sys_prompt} + +CVE ID: {vuln_id} +TARGET PACKAGE: {target_package} + +CVE DESCRIPTION: +{cve_description} + +SPEC CHANGELOG (indicates rebase fixed this CVE): +{spec_log_change} + + +{tools} + + +{tool_selection_strategy} + +{tool_instructions}""" + +L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS = """RESPONSE FORMAT (JSON): +You must respond with a JSON object with these fields: +- thought: Your reasoning about what to search for based on CVE description +- mode: "act" (to use a tool) or "finish" (to provide final answer) +- actions: (only if mode="act") {{"tool": "Tool Name", "query": "search term", "reason": "why this search"}} +- final_answer: (only if mode="finish") Your conclusion about patch status + +SEARCH STRATEGY: +1. Extract function names, API calls, or code patterns from CVE description +2. Search for these patterns in the source code +3. Look for defensive/secure coding patterns +4. Conclude based on evidence found + + +{{"thought": "CVE mentions SSL_OP_NO_TICKET as mitigation. Search for this option in source.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "SSL_OP_NO_TICKET", "reason": "Find CVE-related mitigation code"}}, "final_answer": null}} + + + +{{"thought": "Found defensive code: SSL_OP_NO_TICKET is used in ssl_lib.c. This matches the CVE fix pattern.", "mode": "finish", "actions": null, "final_answer": "The package is LIKELY PATCHED. Found CVE-related defensive code at ssl_lib.c:4190 showing SSL_OP_NO_TICKET usage, which matches the described fix for CVE-2024-2511."}} +""" + + # --------------------------------------------------------------------------- # L1 Agent Helper Functions # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index ce033d6a1..2771def65 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -50,12 +50,17 @@ L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, L1_AGENT_SYS_PROMPT_REBASE_FIX, + L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH, L1_AGENT_PROMPT_TEMPLATE, + L1_AGENT_PROMPT_TEMPLATE_NO_PATCH, L1_AGENT_THOUGHT_INSTRUCTIONS, L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, + L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, L1_COMPREHENSION_PROMPT, + L1_COMPREHENSION_PROMPT_CVE_DESC, L1_MEMORY_UPDATE_PROMPT, + L1_MEMORY_UPDATE_PROMPT_CVE_DESC, format_patch_files_summary, format_patch_hunks_summary, ) @@ -307,6 +312,7 @@ async def L1_agent(state: CodeAgentState) -> dict: upstream_report = state.get("upstream_report") with tracer.push_active_function("L1_agent", input_data={}) as span: + # Use case 1: Downstream patch file is available if downstream_report and downstream_report.is_patch_file_available: parsed_patch = downstream_report.parsed_patch @@ -315,6 +321,7 @@ async def L1_agent(state: CodeAgentState) -> dict: cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) tools_str = "\n".join(tool_descriptions_list) tool_strategy = _build_tool_strategy(enabled_tool_names) + patch_search_mode = "patch_patterns" runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( sys_prompt=L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, @@ -334,33 +341,57 @@ async def L1_agent(state: CodeAgentState) -> dict: "patch_filename": downstream_report.patch_file_name, "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) + # Use case 2: code is fixed by rebase elif upstream_report and upstream_report.is_code_fixed_by_rebase == "yes": - parsed_patch = upstream_report.fixed_parsed_patch - - patch_files_summary = format_patch_files_summary(parsed_patch) if parsed_patch else "No patch details available" - patch_hunks_summary = format_patch_hunks_summary(parsed_patch) if parsed_patch else "No patch content available" cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) tools_str = "\n".join(tool_descriptions_list) tool_strategy = _build_tool_strategy(enabled_tool_names) - runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( - sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_FIX, - vuln_id=vuln_id, - target_package=target_package.name, - cve_description=cve_description, - patch_filename=upstream_report.spec_file_log_change[:100] if upstream_report.spec_file_log_change else "Rebase fix", - patch_files_summary=patch_files_summary, - patch_hunks_summary=patch_hunks_summary, - tools=tools_str, - tool_selection_strategy=tool_strategy, - tool_instructions=L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, - ) - - span.set_output({ - "mode": "rebase_fix_verification", - "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "", - "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, - }) + if upstream_report.is_fixed_srpm_is_needed and upstream_report.fixed_parsed_patch: + # Has patch context - use patch-based verification + parsed_patch = upstream_report.fixed_parsed_patch + patch_files_summary = format_patch_files_summary(parsed_patch) + patch_hunks_summary = format_patch_hunks_summary(parsed_patch) + patch_search_mode = "patch_patterns" + + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( + sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_FIX, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + patch_filename=upstream_report.spec_file_log_change[:100] if upstream_report.spec_file_log_change else "Rebase fix", + patch_files_summary=patch_files_summary, + patch_hunks_summary=patch_hunks_summary, + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, + ) + + span.set_output({ + "mode": "rebase_fix_verification", + "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "", + "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, + }) + else: + # No patch context - use CVE description-based verification + patch_search_mode = "cve_description" + + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE_NO_PATCH.format( + sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + spec_log_change=upstream_report.spec_file_log_change or "", + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, + ) + + span.set_output({ + "mode": "rebase_fix_cve_description", + "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "", + }) + # use case 3: in target patch was not found but patch is found in the rpm that was mention in cve that is fixed elif upstream_report and upstream_report.fixed_parsed_patch: parsed_patch = upstream_report.fixed_parsed_patch @@ -369,6 +400,7 @@ async def L1_agent(state: CodeAgentState) -> dict: cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) tools_str = "\n".join(tool_descriptions_list) tool_strategy = _build_tool_strategy(enabled_tool_names) + patch_search_mode = "patch_patterns" runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, @@ -389,7 +421,9 @@ async def L1_agent(state: CodeAgentState) -> dict: "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) else: + # Default prompt - no patch context, use CVE description cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + patch_search_mode = "cve_description" runtime_prompt = ( "You are a security analyst investigating a CVE.\n\n" f"CVE ID: {vuln_id}\n" @@ -403,6 +437,8 @@ async def L1_agent(state: CodeAgentState) -> dict: return { "runtime_prompt": runtime_prompt, + "patch_search_mode": patch_search_mode, + "cve_description": cve_description, } async def should_continue_downstream(state: CodeAgentState) -> str: @@ -440,6 +476,7 @@ async def downstream_search(state: CodeAgentState) -> dict: "messages": [AIMessage(content="Downstream flow preprocess completed")], } + async def gather_more_info(state: CodeAgentState) -> dict: logger.info("gather_more_info: starting") with tracer.push_active_function("gather_more_info", input_data={}) as span: @@ -451,8 +488,12 @@ async def gather_more_info(state: CodeAgentState) -> dict: source_path=Path(source_dir), tracer=tracer, ) + + + span.set_output({ "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed, + "is_rebase_fix": report.is_code_fixed_by_rebase == "yes", }) return { "messages": [AIMessage(content="Gathering more information...")], @@ -466,6 +507,14 @@ async def thought_node(state: CodeAgentState) -> dict: runtime_prompt = state.get("runtime_prompt") or "You are a security analyst investigating a CVE." messages = [SystemMessage(content=runtime_prompt)] + state["messages"] with tracer.push_active_function("thought_node", input_data=messages) as span: + obs = state.get("observation", None) + if obs is not None: + memory_list = obs.memory if obs.memory else ["No prior knowledge."] + recent_findings = obs.results if obs.results else ["No recent findings."] + memory_context = "\n".join(f"- {m}" for m in memory_list) + findings_context = "\n".join(f"- {f}" for f in recent_findings) + context_block = f"KNOWLEDGE:\n{memory_context}\nLATEST FINDINGS:\n{findings_context}" + messages.append(SystemMessage(content=context_block)) response: CheckerThought = await thought_llm.ainvoke(messages) if response.mode == "finish": ai_message = AIMessage(content=response.final_answer) @@ -539,33 +588,69 @@ async def observation_node(state: CodeAgentState) -> dict: vulnerable_patterns, fix_patterns = _extract_patch_patterns(state) target_package_name = target_package.name if target_package else "unknown" + patch_search_mode = state.get("patch_search_mode", "patch_patterns") with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: tool_output_for_llm = tool_message.content - # Step 1: Comprehension - extract key findings from raw tool output - comp_prompt = L1_COMPREHENSION_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - vulnerable_patterns=vulnerable_patterns, - fix_patterns=fix_patterns, - tool_used=tool_used, - tool_input=tool_input_detail, - last_thought=last_thought_text, - tool_output=tool_output_for_llm[:8000], - ) - code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) - findings_text = "\n".join(f"- {f}" for f in code_findings.findings) + if patch_search_mode == "cve_description": + # CVE-description based prompts (no patch patterns available) + cve_description = state.get("cve_description", "") + spec_log_change = "" + upstream_report = state.get("upstream_report") + if upstream_report: + spec_log_change = upstream_report.spec_file_log_change or "" + + # Step 1: Comprehension - extract findings using CVE description context + comp_prompt = L1_COMPREHENSION_PROMPT_CVE_DESC.format( + vuln_id=vuln_id, + target_package=target_package_name, + cve_description=cve_description, + spec_log_change=spec_log_change, + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=tool_output_for_llm[:8000], + ) + code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) + findings_text = "\n".join(f"- {f}" for f in code_findings.findings) - # Step 2: Memory update - merge findings into cumulative memory - mem_prompt = L1_MEMORY_UPDATE_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, - findings=findings_text, - tool_outcome=code_findings.tool_outcome, - ) - new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) + # Step 2: Memory update - merge findings with CVE-based tracking + mem_prompt = L1_MEMORY_UPDATE_PROMPT_CVE_DESC.format( + vuln_id=vuln_id, + target_package=target_package_name, + previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, + findings=findings_text, + tool_outcome=code_findings.tool_outcome, + ) + new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) + else: + # Patch-based prompts (default) + vulnerable_patterns, fix_patterns = _extract_patch_patterns(state) + + # Step 1: Comprehension - extract key findings from raw tool output + comp_prompt = L1_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + vulnerable_patterns=vulnerable_patterns, + fix_patterns=fix_patterns, + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=tool_output_for_llm[:8000], + ) + code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) + findings_text = "\n".join(f"- {f}" for f in code_findings.findings) + + # Step 2: Memory update - merge findings into cumulative memory + mem_prompt = L1_MEMORY_UPDATE_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, + findings=findings_text, + tool_outcome=code_findings.tool_outcome, + ) + new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) messages = state["messages"] active_prompt = state.get("runtime_prompt") From 0070e579516d31982d640e454808613476f9fb17 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 4 May 2026 13:08:30 +0000 Subject: [PATCH 23/46] Integration L2 with flow --- .../data_models/checker_status.py | 73 ++++ .../configs/config-http-openai.yml | 5 + .../functions/code_agent_graph_defs.py | 66 ++- .../functions/cve_checker_report.py | 386 ++++++++++++++++++ .../functions/cve_package_code_agent.py | 257 +++--------- .../functions/react_internals.py | 20 + src/vuln_analysis/register.py | 61 ++- 7 files changed, 671 insertions(+), 197 deletions(-) create mode 100644 src/vuln_analysis/functions/cve_checker_report.py diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 1d9dcfe5e..2dbfc6f93 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -15,6 +15,7 @@ from enum import Enum from enum import IntEnum from pathlib import Path +from typing import Any, Literal from pydantic import BaseModel, Field @@ -64,9 +65,81 @@ class AcquiredArtifacts(BaseModel): patch_diff_path: Path | None = None +class L1InvestigationResult(BaseModel): + """Intermediate result from L1 investigation, input to L2 or report generation.""" + downstream_report: dict[str, Any] | None = Field( + default=None, + description="Serialized DownstreamSearchReport from L1 investigation", + ) + upstream_report: dict[str, Any] | None = Field( + default=None, + description="Serialized UpstreamSearchReport from L1 investigation", + ) + l1_agent_answer: str | None = Field( + default=None, + description="Final answer from the L1 ReAct agent", + ) + affected_files: list[str] = Field( + default_factory=list, + description="Source files identified as CVE-relevant by L1", + ) + preliminary_verdict: Literal["vulnerable", "protected", "not_present", "uncertain"] = Field( + default="uncertain", + description="L1 verdict before L2 refinement", + ) + confidence: float = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Confidence in the preliminary verdict", + ) + + +class L2BuildResult(BaseModel): + """Result from L2 Build Agent (BuildCompilationCheck + HardeningCheck).""" + compilation_status: Literal["compiled", "not_compiled", "unknown"] = Field( + default="unknown", + description="Whether vulnerable code is compiled into the binary", + ) + compilation_confidence: float = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Confidence in compilation status", + ) + compilation_evidence: str | None = Field( + default=None, + description="Evidence supporting compilation status", + ) + hardening_relevant: bool | None = Field( + default=None, + description="Whether detected hardening flags are relevant to the CVE", + ) + hardening_flags: list[str] = Field( + default_factory=list, + description="Hardening flags detected in build log or binary", + ) + hardening_rationale: str | None = Field( + default=None, + description="Rationale for hardening relevance judgment", + ) + l2_override_verdict: Literal["not_vulnerable", "vulnerable_mitigated", None] = Field( + default=None, + description="L2 verdict override (if any)", + ) + + class PackageCheckerContext(BaseModel): """Consolidates all checker-specific state on AgentMorpheusInfo.""" status: PackageCheckerStatus | None = None source_key: str | None = None artifacts: AcquiredArtifacts = Field(default_factory=AcquiredArtifacts) identify_result: PackageIdentifyResult = Field(default_factory=PackageIdentifyResult) + l1_result: L1InvestigationResult | None = Field( + default=None, + description="Result from L1 Code Agent investigation", + ) + l2_result: L2BuildResult | None = Field( + default=None, + description="Result from L2 Build Agent (optional)", + ) diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index 6326cbbcb..f2961c600 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -170,6 +170,10 @@ functions: tool_names: - Source Grep - Code Keyword Search + cve_checker_report: + _type: cve_checker_report + llm_name: cve_agent_executor_llm + base_checker_dir: .cache/am_cache/checker health_check: _type: health_check @@ -264,6 +268,7 @@ workflow: cve_source_acquisition_name: cve_source_acquisition cve_checker_segmentation_name: cve_checker_segmentation cve_package_code_agent_name: cve_package_code_agent + cve_checker_report_name: cve_checker_report eval: general: diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index ed06ee666..b85988bff 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) -from vuln_analysis.functions.react_internals import CheckerThought,Observation +from vuln_analysis.functions.react_internals import CheckerThought, Observation, L1VerdictExtraction # --------------------------------------------------------------------------- # Graph state # --------------------------------------------------------------------------- @@ -283,6 +283,24 @@ def to_markdown( # Prompt templates # --------------------------------------------------------------------------- +L1_VERDICT_EXTRACTION_PROMPT = """\ +Extract the security verdict from this L1 agent investigation conclusion. + +CVE: {vuln_id} +Package: {target_package} + +L1 Agent Final Answer: +{final_answer} + +Classify the conclusion into one of these categories: +- "protected": The package is protected (patch applied, fix backported, or mitigating control present) +- "not_present": The vulnerable code/function is not present in this version +- "vulnerable": The vulnerable code is confirmed present and unpatched +- "uncertain": Insufficient evidence or conflicting findings + +Provide your confidence level (0.0-1.0) based on the strength of evidence in the answer. +""" + CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. @@ -686,6 +704,52 @@ def _extract_code_snippets( return snippets +async def extract_l1_verdict( + llm, + vuln_id: str, + target_package: str, + final_answer: str, + tracer, +) -> L1VerdictExtraction: + """Use LLM to extract structured verdict from L1 agent's final answer. + + Parameters + ---------- + llm: + LangChain LLM for verdict extraction. + vuln_id: + CVE identifier (e.g. "CVE-2026-5121"). + target_package: + Name of the package being investigated. + final_answer: + The L1 agent's final answer text. + tracer: + Request-scoped tracing context. + + Returns + ------- + L1VerdictExtraction + Structured verdict with confidence and reasoning. + """ + verdict_llm = llm.with_structured_output(L1VerdictExtraction) + prompt = L1_VERDICT_EXTRACTION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package, + final_answer=final_answer, + ) + with tracer.push_active_function("extract_l1_verdict", input_data={"vuln_id": vuln_id}) as span: + result = await verdict_llm.ainvoke([SystemMessage(content=prompt)]) + span.set_output({ + "preliminary_verdict": result.preliminary_verdict, + "confidence": result.confidence, + }) + logger.info( + "extract_l1_verdict: verdict=%s confidence=%.2f", + result.preliminary_verdict, result.confidence, + ) + return result + + async def generate_code_agent_report( *, llm, diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py new file mode 100644 index 000000000..f49b40789 --- /dev/null +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -0,0 +1,386 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +CVE Checker Report Generation Function. + +This module provides the report generation node for the L1/L2 pipeline. +It consumes L1InvestigationResult (and optionally L2BuildResult) from +checker_context and produces the final AgentMorpheusOutput. +""" + +from pathlib import Path +from typing import Literal + +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id +from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.checker_status import L1InvestigationResult, L2BuildResult + +from nat.builder.context import Context +from vuln_analysis.data_models.output import ( + AgentMorpheusEngineOutput, + AgentMorpheusOutput, + ChecklistItemOutput, + JustificationOutput, + OutputPayload, +) +from vuln_analysis.functions.code_agent_graph_defs import ( + CodeAgentReport, + DownstreamSearchReport, + UpstreamSearchReport, + generate_code_agent_report, +) + +logger = LoggingFactory.get_agent_logger(__name__) + + +_StatusLiteral = Literal["TRUE", "FALSE", "UNKNOWN"] + +_JUSTIFICATION_LABEL_TO_STATUS: dict[str, _StatusLiteral] = { + "code_not_present": "FALSE", + "code_not_reachable": "FALSE", + "protected_by_mitigating_control": "FALSE", + "protected_by_compiler": "FALSE", + "vulnerable": "TRUE", + "uncertain": "UNKNOWN", +} + + +_POLICY_MAX_RPM_LIST_ITEMS = 5 +_POLICY_RHSA_STATEMENT_CAP = 400 +_POLICY_MAX_PACKAGE_STATE_ITEMS = 8 + + +def _format_policy_context_for_report( + *, + target_nvr: str, + identify_result, + intel, +) -> str: + """Build a context block for the LLM prompt covering NVR posture and RHSA excerpts.""" + lines: list[str] = [] + + if target_nvr: + lines.append(f"**Scanned target NVR:** `{target_nvr}`") + + if identify_result: + affected = identify_result.affected_rpm_list or [] + fixed = identify_result.fixed_rpm_list or [] + + if affected: + shown = affected[:_POLICY_MAX_RPM_LIST_ITEMS] + suffix = f" (+ {len(affected) - len(shown)} more)" if len(affected) > len(shown) else "" + lines.append(f"**Affected NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}") + lines.append(f" - is_target_package_affected: `{identify_result.is_target_package_affected.value}`") + + if fixed: + shown = fixed[:_POLICY_MAX_RPM_LIST_ITEMS] + suffix = f" (+ {len(fixed) - len(shown)} more)" if len(fixed) > len(shown) else "" + lines.append(f"**Fixed NVRs from identify:** {', '.join(f'`{n}`' for n in shown)}{suffix}") + lines.append(f" - is_target_package_fixed: `{identify_result.is_target_package_fixed.value}`") + + rhsa = None + if intel and len(intel) > 0: + rhsa = intel[0].rhsa + + if rhsa: + if rhsa.statement: + stmt = rhsa.statement + if len(stmt) > _POLICY_RHSA_STATEMENT_CAP: + stmt = stmt[:_POLICY_RHSA_STATEMENT_CAP] + " …" + lines.append(f"**RHSA statement excerpt:** {stmt}") + + if rhsa.upstream_fix: + lines.append(f"**RHSA upstream_fix:** `{rhsa.upstream_fix}`") + + pkg_states = rhsa.package_state or [] + if pkg_states: + lines.append("**RHSA package_state:**") + for ps in pkg_states[:_POLICY_MAX_PACKAGE_STATE_ITEMS]: + parts = [] + if ps.product_name: + parts.append(ps.product_name) + if ps.package_name: + parts.append(f"pkg={ps.package_name}") + if ps.fix_state: + parts.append(f"fix_state={ps.fix_state}") + if parts: + lines.append(f" - {' | '.join(parts)}") + if len(pkg_states) > _POLICY_MAX_PACKAGE_STATE_ITEMS: + lines.append(f" - (+ {len(pkg_states) - _POLICY_MAX_PACKAGE_STATE_ITEMS} more)") + + return "\n".join(lines) + + +def _apply_l2_verdict( + report: CodeAgentReport, + l2_result: L2BuildResult, +) -> CodeAgentReport: + """Apply L2 Build Agent verdict overrides to the CodeAgentReport.""" + if l2_result.l2_override_verdict is None: + return report + + updated_fields = {} + + if l2_result.l2_override_verdict == "not_vulnerable": + if l2_result.compilation_status == "not_compiled": + updated_fields["justification_label"] = "code_not_present" + updated_fields["executive_summary"] = ( + f"{report.executive_summary}\n\n" + f"**L2 Override:** Vulnerable code is NOT compiled into the binary. " + f"Evidence: {l2_result.compilation_evidence or 'Build analysis confirmed exclusion.'}" + ) + else: + updated_fields["justification_label"] = "code_not_reachable" + updated_fields["executive_summary"] = ( + f"{report.executive_summary}\n\n" + f"**L2 Override:** Code determined not vulnerable by L2 analysis." + ) + + elif l2_result.l2_override_verdict == "vulnerable_mitigated": + if l2_result.hardening_relevant and l2_result.hardening_flags: + updated_fields["justification_label"] = "protected_by_compiler" + flags_str = ", ".join(l2_result.hardening_flags[:5]) + updated_fields["executive_summary"] = ( + f"{report.executive_summary}\n\n" + f"**L2 Override:** Vulnerability mitigated by compiler hardening flags: {flags_str}. " + f"Rationale: {l2_result.hardening_rationale or 'Hardening flags provide protection.'}" + ) + else: + updated_fields["justification_label"] = "protected_by_mitigating_control" + updated_fields["executive_summary"] = ( + f"{report.executive_summary}\n\n" + f"**L2 Override:** Vulnerability mitigated by build-time controls." + ) + + if updated_fields: + evidence = list(report.evidence_chain) + evidence.append(f"L2 Build Agent: {l2_result.l2_override_verdict}") + if l2_result.compilation_evidence: + evidence.append(f"L2 compilation evidence: {l2_result.compilation_evidence}") + if l2_result.hardening_rationale: + evidence.append(f"L2 hardening rationale: {l2_result.hardening_rationale}") + updated_fields["evidence_chain"] = evidence + + return report.model_copy(update=updated_fields) + + return report + + +def _build_analysis( + message: AgentMorpheusEngineInput, + code_agent_report: CodeAgentReport, + l1_result: L1InvestigationResult, +) -> list[AgentMorpheusEngineOutput]: + """Build the final analysis output from the code agent report.""" + label = code_agent_report.justification_label + status: _StatusLiteral = _JUSTIFICATION_LABEL_TO_STATUS.get(label, "UNKNOWN") + + reason_parts = [code_agent_report.executive_summary] + if code_agent_report.evidence_chain: + reason_parts.append("\n\nEvidence chain:") + reason_parts.extend(f"- {ev}" for ev in code_agent_report.evidence_chain[:5]) + if code_agent_report.patch_analysis: + reason_parts.append(f"\n\nPatch analysis: {code_agent_report.patch_analysis}") + if code_agent_report.code_snippets: + reason_parts.append("\n\nCode snippets:") + for snippet in code_agent_report.code_snippets[:3]: + reason_parts.append(f"- [{snippet.snippet_type}] {snippet.file_path}:{snippet.line_number or 'N/A'}") + if code_agent_report.limitations: + reason_parts.append("\n\nLimitations:") + reason_parts.extend(f"- {lim}" for lim in code_agent_report.limitations) + reason_parts.append(f"\n\nRecommendation: {code_agent_report.recommendation}") + reason = "\n".join(reason_parts) + summary = code_agent_report.executive_summary + + response_text = l1_result.l1_agent_answer or "L1 agent completed investigation." + + return [ + AgentMorpheusEngineOutput( + vuln_id=intel.vuln_id, + checklist=[ + ChecklistItemOutput( + input="L1 Package Code Agent analysis", + response=response_text, + ), + ], + summary=summary, + justification=JustificationOutput( + label=label, + reason=reason, + status=status, + ), + intel_score=0, + cvss=None, + ) + for intel in (message.info.intel if message.info and message.info.intel else []) + ] + + +class CVECheckerReportConfig(FunctionBaseConfig, name="cve_checker_report"): + """Configuration for the CVE Checker Report generation function.""" + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts.", + ) + llm_name: str = Field(description="The LLM model to use for report generation.") + + +@register_function(config_type=CVECheckerReportConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def cve_checker_report(config: CVECheckerReportConfig, builder: Builder): + """Report generation function for the L1/L2 checker pipeline.""" + + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + """Generate the final checker report from L1 (and optionally L2) results.""" + trace_id.set(message.input.scan.id) + tracer = Context.get() + + logger.info("cve_checker_report: starting report generation") + + ctx = message.info.checker_context + if ctx is None or ctx.l1_result is None: + logger.error("cve_checker_report: no L1 result available") + return AgentMorpheusOutput( + input=message.input, + info=message.info, + output=OutputPayload( + analysis=[ + AgentMorpheusEngineOutput( + vuln_id=intel.vuln_id, + checklist=[], + summary="L1 investigation did not produce results.", + justification=JustificationOutput( + label="uncertain", + reason="L1 result not available for report generation.", + status="UNKNOWN", + ), + intel_score=0, + cvss=None, + ) + for intel in (message.info.intel if message.info and message.info.intel else []) + ], + vex=None, + ), + ) + + l1_result = ctx.l1_result + l2_result = ctx.l2_result + + downstream_report: DownstreamSearchReport | None = None + upstream_report: UpstreamSearchReport | None = None + + if l1_result.downstream_report: + downstream_report = DownstreamSearchReport.model_validate(l1_result.downstream_report) + if l1_result.upstream_report: + upstream_report = UpstreamSearchReport.model_validate(l1_result.upstream_report) + + vuln_id = message.input.scan.vulns[0].vuln_id + target_package = message.input.image.target_package + target_package_name = target_package.name if target_package else "unknown" + intel = message.info.intel + + descriptions: list[tuple[str, str]] = [] + if intel: + a_intel = intel[0] + if a_intel.ghsa: + cve_text = a_intel.ghsa.description or a_intel.ghsa.summary or "" + if cve_text: + descriptions.append(("ghsa", cve_text)) + if a_intel.ubuntu and a_intel.ubuntu.description: + descriptions.append(("ubuntu", a_intel.ubuntu.description)) + + version = (target_package.version or "") if target_package else "" + release = (target_package.release or "") if target_package else "" + target_nvr = f"{target_package_name}-{version}-{release}" if target_package_name else "" + + policy_context = _format_policy_context_for_report( + target_nvr=target_nvr, + identify_result=ctx.identify_result, + intel=intel, + ) + + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + with tracer.push_active_function("generate_code_agent_report", input_data={"vuln_id": vuln_id}): + code_agent_report: CodeAgentReport = await generate_code_agent_report( + llm=llm, + vuln_id=vuln_id, + target_package=target_package_name, + descriptions=descriptions, + downstream_report=downstream_report, + upstream_report=upstream_report, + l1_agent_answer=l1_result.l1_agent_answer, + tracer=tracer, + policy_context=policy_context, + ) + + if l2_result: + logger.info("cve_checker_report: applying L2 verdict override") + code_agent_report = _apply_l2_verdict(code_agent_report, l2_result) + + source_key = ctx.source_key + if source_key: + report_dir = Path(config.base_checker_dir) / source_key / "report" + report_dir.mkdir(parents=True, exist_ok=True) + suffix = f"-{target_package_name}" if target_package_name else "" + if version: + suffix += f"-{version}" + if release: + suffix += f"-{release}" + report_path = report_dir / f"L1_report_{vuln_id}{suffix}.md" + report_path.write_text(code_agent_report.to_markdown( + vuln_id=vuln_id, + target_package=target_package_name, + version=version, + release=release, + downstream_report=downstream_report, + policy_context=policy_context, + )) + logger.info("cve_checker_report: wrote report to %s", report_path) + + with tracer.push_active_function( + "report_finish", + input_data={ + "confidence": code_agent_report.confidence, + "justification_label": code_agent_report.justification_label, + "has_l2_override": l2_result is not None and l2_result.l2_override_verdict is not None, + }, + ) as span: + span.set_output({ + "executive_summary": code_agent_report.executive_summary, + "affected_files": code_agent_report.affected_files, + "recommendation": code_agent_report.recommendation, + }) + + return AgentMorpheusOutput( + input=message.input, + info=message.info, + output=OutputPayload( + analysis=_build_analysis(message, code_agent_report, l1_result), + vex=None, + ), + ) + + yield FunctionInfo.from_fn( + _arun, + description="Generate final checker report from L1/L2 investigation results", + ) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 2771def65..56617f2f9 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -25,6 +25,7 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id +from exploit_iq_commons.data_models.checker_status import L1InvestigationResult from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode @@ -32,21 +33,13 @@ from nat.builder.context import Context from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput -from vuln_analysis.data_models.output import ( - AgentMorpheusEngineOutput, - AgentMorpheusOutput, - ChecklistItemOutput, - JustificationOutput, - OutputPayload, -) from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, - CodeAgentReport, DownstreamSearchReport, UpstreamSearchReport, - generate_code_agent_report, downstream_search_preprocss, upstream_search_preprocess, + extract_l1_verdict, L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, L1_AGENT_SYS_PROMPT_REBASE_FIX, @@ -65,7 +58,6 @@ format_patch_hunks_summary, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError -from vuln_analysis.utils.full_text_search import FullTextSearch from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation from vuln_analysis.runtime_context import ctx_state @@ -244,20 +236,14 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] tools_node = ToolNode(tools, handle_tool_errors=True) - + tool_strategy = _build_tool_strategy(enabled_tool_names) + tools_str = "\n".join(tool_descriptions_list) vuln_id = state.input.scan.vulns[0].vuln_id ctx = state.info.checker_context intel = state.info.intel target_package = state.input.image.target_package source_key = ctx.source_key - index_path = FullTextSearch.get_index_directory(config.base_code_index_dir, source_key) - fts = FullTextSearch(cache_path=str(index_path)) - if fts.is_empty(): - raise ValueError(f"Invalid code index at: {index_path}, index is empty") - - async def lexical_search_fn(query: str) -> list: - return fts.search_index(query, top_k=5) _tiktoken_enc = tiktoken.get_encoding("cl100k_base") @@ -305,6 +291,8 @@ def _estimate_tokens(runtime_prompt: str, messages: list, observation: Observati descriptions.append(("ghsa", cve_text)) if aIntel.ubuntu and aIntel.ubuntu.description: descriptions.append(("ubuntu", aIntel.ubuntu.description)) + + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) async def L1_agent(state: CodeAgentState) -> dict: logger.info("L1_agent: starting") @@ -318,9 +306,8 @@ async def L1_agent(state: CodeAgentState) -> dict: patch_files_summary = format_patch_files_summary(parsed_patch) patch_hunks_summary = format_patch_hunks_summary(parsed_patch) - cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) - tools_str = "\n".join(tool_descriptions_list) - tool_strategy = _build_tool_strategy(enabled_tool_names) + + patch_search_mode = "patch_patterns" runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( @@ -343,9 +330,6 @@ async def L1_agent(state: CodeAgentState) -> dict: }) # Use case 2: code is fixed by rebase elif upstream_report and upstream_report.is_code_fixed_by_rebase == "yes": - cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) - tools_str = "\n".join(tool_descriptions_list) - tool_strategy = _build_tool_strategy(enabled_tool_names) if upstream_report.is_fixed_srpm_is_needed and upstream_report.fixed_parsed_patch: # Has patch context - use patch-based verification @@ -397,9 +381,7 @@ async def L1_agent(state: CodeAgentState) -> dict: patch_files_summary = format_patch_files_summary(parsed_patch) patch_hunks_summary = format_patch_hunks_summary(parsed_patch) - cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) - tools_str = "\n".join(tool_descriptions_list) - tool_strategy = _build_tool_strategy(enabled_tool_names) + patch_search_mode = "patch_patterns" runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( @@ -422,7 +404,6 @@ async def L1_agent(state: CodeAgentState) -> dict: }) else: # Default prompt - no patch context, use CVE description - cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) patch_search_mode = "cve_description" runtime_prompt = ( "You are a security analyst investigating a CVE.\n\n" @@ -723,105 +704,34 @@ async def should_continue(state: CodeAgentState) -> str: return app -_StatusLiteral = Literal["TRUE", "FALSE", "UNKNOWN"] - -_VERDICT_TO_JUSTIFICATION: dict[str, tuple[str, _StatusLiteral]] = { - "PATCHED": ("protected_by_mitigating_control", "FALSE"), - "VULNERABLE": ("vulnerable", "TRUE"), - "INCONCLUSIVE": ("uncertain", "UNKNOWN"), -} - -_JUSTIFICATION_LABEL_TO_STATUS: dict[str, _StatusLiteral] = { - "code_not_present": "FALSE", - "code_not_reachable": "FALSE", - "protected_by_mitigating_control": "FALSE", - "protected_by_compiler": "FALSE", - "vulnerable": "TRUE", - "uncertain": "UNKNOWN", -} - - -def _build_analysis( - message: AgentMorpheusEngineInput, - result: dict, - code_agent_report: CodeAgentReport | None = None, -) -> list[AgentMorpheusEngineOutput]: - downstream_report: DownstreamSearchReport | None = result.get("downstream_report") - - if code_agent_report is not None: - label = code_agent_report.justification_label - status: _StatusLiteral = _JUSTIFICATION_LABEL_TO_STATUS.get(label, "UNKNOWN") - - reason_parts = [code_agent_report.executive_summary] - if code_agent_report.evidence_chain: - reason_parts.append("\n\nEvidence chain:") - reason_parts.extend(f"- {ev}" for ev in code_agent_report.evidence_chain[:5]) - if code_agent_report.patch_analysis: - reason_parts.append(f"\n\nPatch analysis: {code_agent_report.patch_analysis}") - if code_agent_report.code_snippets: - reason_parts.append("\n\nCode snippets:") - for snippet in code_agent_report.code_snippets[:3]: - reason_parts.append(f"- [{snippet.snippet_type}] {snippet.file_path}:{snippet.line_number or 'N/A'}") - if code_agent_report.limitations: - reason_parts.append("\n\nLimitations:") - reason_parts.extend(f"- {lim}" for lim in code_agent_report.limitations) - reason_parts.append(f"\n\nRecommendation: {code_agent_report.recommendation}") - reason = "\n".join(reason_parts) - summary = code_agent_report.executive_summary - elif downstream_report is not None: - if downstream_report.is_patch_file_available and downstream_report.is_patch_applied_in_build: - label = "protected_by_mitigating_control" - status = "FALSE" - summary = f"CVE patch {downstream_report.patch_file_name} found and applied in build." - elif downstream_report.is_patch_file_available: - label = "protected_by_mitigating_control" - status = "FALSE" - summary = f"CVE patch {downstream_report.patch_file_name} found in package." - else: - label = "uncertain" - status = "UNKNOWN" - summary = "No CVE-specific patch file found." - - reason_parts = [summary] - if downstream_report.spec_file_log_change: - reason_parts.append(f"\n\nSpec changelog: {downstream_report.spec_file_log_change[:200]}") - if downstream_report.build_log_patch_applied: - reason_parts.append(f"\n\nBuild log: {downstream_report.build_log_patch_applied[:200]}") - reason = "\n".join(reason_parts) - else: - label = "uncertain" - status = "UNKNOWN" - reason = "No downstream search results available." - summary = "Investigation incomplete." - - last_msg = result["messages"][-1].content if result.get("messages") else "no result" - - return [ - AgentMorpheusEngineOutput( - vuln_id=intel.vuln_id, - checklist=[ - ChecklistItemOutput( - input="L1 Package Code Agent analysis", - response=last_msg, - ), - ], - summary=summary, - justification=JustificationOutput( - label=label, - reason=reason, - status=status, - ), - intel_score=0, - cvss=None, - ) - for intel in (message.info.intel if message.info and message.info.intel else []) - ] +def _extract_affected_files(result: dict) -> list[str]: + """Extract affected files from downstream and upstream reports.""" + affected = set() + downstream: DownstreamSearchReport | None = result.get("downstream_report") + upstream: UpstreamSearchReport | None = result.get("upstream_report") + + if downstream and downstream.parsed_patch: + for pf in downstream.parsed_patch.files: + if pf.new_file: + affected.add(pf.new_file) + elif pf.old_file: + affected.add(pf.old_file) + + if upstream and upstream.fixed_parsed_patch: + for pf in upstream.fixed_parsed_patch.files: + if pf.new_file: + affected.add(pf.new_file) + elif pf.old_file: + affected.add(pf.old_file) + + return sorted(affected) @register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder): - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Run L1 investigation and return intermediate result for routing to L2 or report generation.""" trace_id.set(message.input.scan.id) tracer = Context.get() @@ -847,93 +757,60 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: logger.info("package_code_agent: L1 investigation finished") - vuln_id = message.input.scan.vulns[0].vuln_id - target_package = message.input.image.target_package - target_package_name = target_package.name if target_package else "unknown" - intel = message.info.intel - - descriptions: list[tuple[str, str]] = [] - if intel: - a_intel = intel[0] - if a_intel.ghsa: - cve_text = a_intel.ghsa.description or a_intel.ghsa.summary or "" - if cve_text: - descriptions.append(("ghsa", cve_text)) - if a_intel.ubuntu and a_intel.ubuntu.description: - descriptions.append(("ubuntu", a_intel.ubuntu.description)) - - llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) - final_answer = None thought = result.get("thought") if thought and thought.mode == "finish": final_answer = thought.final_answer - # Build policy context for Feedback-2 gap coverage (NVR posture, RHSA excerpts) - ctx = message.info.checker_context - version = (target_package.version or "") if target_package else "" - release = (target_package.release or "") if target_package else "" - target_nvr = f"{target_package_name}-{version}-{release}" if target_package_name else "" - policy_context = _format_policy_context_for_l1_report( - target_nvr=target_nvr, - identify_result=ctx.identify_result if ctx else None, - intel=intel, - ) + vuln_id = message.input.scan.vulns[0].vuln_id + target_package = message.input.image.target_package + target_package_name = target_package.name if target_package else "unknown" - code_agent_report: CodeAgentReport = await generate_code_agent_report( + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + verdict_extraction = await extract_l1_verdict( llm=llm, vuln_id=vuln_id, target_package=target_package_name, - descriptions=descriptions, - downstream_report=result.get("downstream_report"), - upstream_report=result.get("upstream_report"), - l1_agent_answer=final_answer, + final_answer=final_answer or "No final answer produced.", tracer=tracer, - policy_context=policy_context, ) + preliminary_verdict = verdict_extraction.preliminary_verdict + affected_files = _extract_affected_files(result) + confidence = verdict_extraction.confidence - # Write markdown report for debug/dev - source_key = ctx.source_key - report_dir = Path(config.base_checker_dir) / source_key / "report" - report_dir.mkdir(parents=True, exist_ok=True) - # Build filename: L1_report_CVE-XXXX-package-version-release.md (version/release already extracted above) - suffix = f"-{target_package_name}" if target_package_name else "" - if version: - suffix += f"-{version}" - if release: - suffix += f"-{release}" - report_path = report_dir / f"L1_report_{vuln_id}{suffix}.md" - report_path.write_text(code_agent_report.to_markdown( - vuln_id=vuln_id, - target_package=target_package_name, - version=version, - release=release, - downstream_report=result.get("downstream_report"), - policy_context=policy_context, - )) - logger.info("package_code_agent: wrote report to %s", report_path) + downstream_report: DownstreamSearchReport | None = result.get("downstream_report") + upstream_report: UpstreamSearchReport | None = result.get("upstream_report") + + l1_result = L1InvestigationResult( + downstream_report=downstream_report.model_dump() if downstream_report else None, + upstream_report=upstream_report.model_dump() if upstream_report else None, + l1_agent_answer=final_answer, + affected_files=affected_files, + preliminary_verdict=preliminary_verdict, + confidence=confidence, + ) with tracer.push_active_function( - "agent_finish", - input_data={ - "confidence": code_agent_report.confidence, - "justification_label": code_agent_report.justification_label, - }, + "l1_agent_finish", + input_data={"preliminary_verdict": preliminary_verdict}, ) as span: span.set_output({ - "executive_summary": code_agent_report.executive_summary, - "affected_files": code_agent_report.affected_files, - "recommendation": code_agent_report.recommendation, + "l1_agent_answer": final_answer[:500] if final_answer else None, + "affected_files": affected_files, + "confidence": l1_result.confidence, }) - return AgentMorpheusOutput( - input=message.input, - info=message.info, - output=OutputPayload( - analysis=_build_analysis(message, result, code_agent_report), - vex=None, - ), + if message.info.checker_context is not None: + message.info.checker_context.l1_result = l1_result + else: + logger.warning("package_code_agent: checker_context is None, cannot store l1_result") + logger.info( + "package_code_agent: L1 result - verdict=%s, confidence=%.2f, affected_files=%d", + preliminary_verdict, + l1_result.confidence, + len(affected_files), ) + return message yield FunctionInfo.from_fn( _arun, diff --git a/src/vuln_analysis/functions/react_internals.py b/src/vuln_analysis/functions/react_internals.py index 5de50a13e..834fffd86 100644 --- a/src/vuln_analysis/functions/react_internals.py +++ b/src/vuln_analysis/functions/react_internals.py @@ -98,6 +98,26 @@ class Classification(BaseModel): ) +class L1VerdictExtraction(BaseModel): + """Lightweight structured output for extracting verdict from L1 final answer.""" + preliminary_verdict: Literal["vulnerable", "protected", "not_present", "uncertain"] = Field( + description=( + "Classify the L1 agent's conclusion: " + "'protected' if fix/patch applied or code mitigated, " + "'not_present' if vulnerable code not found in this version, " + "'vulnerable' if vulnerable code confirmed present, " + "'uncertain' if evidence is insufficient or conflicting" + ) + ) + confidence: float = Field( + ge=0.0, le=1.0, + description="Confidence in the verdict based on evidence strength in the answer" + ) + reasoning: str = Field( + description="Brief explanation of why this verdict was chosen" + ) + + class PackageSelection(BaseModel): """Structured output for selecting the most relevant package from multiple candidates.""" selected_package: str = Field( diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 826aa1f81..241112e0d 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -44,6 +44,7 @@ from vuln_analysis.functions import cve_source_acquisition from vuln_analysis.functions import cve_process_sbom from vuln_analysis.functions import cve_summarize +from vuln_analysis.functions import cve_checker_report from vuln_analysis.functions import cve_generate_cvss from vuln_analysis.functions import cve_generate_vex from vuln_analysis.functions import health_endpoint @@ -94,6 +95,10 @@ class CVEAgentWorkflowConfig(FunctionBaseConfig, name="cve_agent"): default=None, description="Function name for the Level 1 Package Code Agent (source-level CVE investigation)", ) + cve_checker_report_name: str | None = Field( + default=None, + description="Function name for the checker report generation (L1/L2 report synthesis)", + ) description: str = Field(default="Vulnerability analysis for container security workflow", description="Workflow function description") @@ -130,6 +135,10 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder): builder.get_function(name=config.cve_package_code_agent_name) if config.cve_package_code_agent_name else None ) + cve_checker_report_fn = ( + builder.get_function(name=config.cve_checker_report_name) + if config.cve_checker_report_name else None + ) # Define langgraph node functions @catch_pipeline_errors_async @@ -267,11 +276,40 @@ async def checker_segmentation_node(state: AgentMorpheusEngineInput) -> AgentMor return state @catch_pipeline_errors_async - async def code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: - """Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index.""" + async def l1_code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index. + + Returns AgentMorpheusEngineInput with l1_result populated on checker_context. + """ if cve_package_code_agent_fn: return await cve_package_code_agent_fn.ainvoke(state.model_dump()) - logger.warning("Package code agent function not configured, producing empty output") + logger.warning("Package code agent function not configured, passing state through") + return state + + def route_after_l1(state: AgentMorpheusEngineInput) -> str: + """Route to L2 Build Agent if vulnerable or uncertain, else to report generation.""" + ctx = state.info.checker_context + if ctx and ctx.l1_result: + verdict = ctx.l1_result.preliminary_verdict + if verdict in ("vulnerable", "uncertain"): + return "l2_build_agent" + return "generate_report" + + @catch_pipeline_errors_async + async def l2_build_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Level 2 Build Agent: BuildCompilationCheck + HardeningCheck. + + TODO: Implement L2 Build Agent. Currently a pass-through placeholder. + """ + logger.info("l2_build_agent: placeholder - passing through state") + return state + + @catch_pipeline_errors_async + async def generate_report_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + """Generate the final checker report from L1/L2 investigation results.""" + if cve_checker_report_fn: + return await cve_checker_report_fn.ainvoke(state.model_dump()) + logger.warning("Checker report function not configured, producing empty output") return AgentMorpheusOutput( input=state.input, info=state.info, @@ -371,7 +409,9 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph_builder.add_node("source_acquisition", source_acquisition_node) graph_builder.add_node("checker_early_exit", checker_early_exit_node) graph_builder.add_node("checker_segmentation", checker_segmentation_node) - graph_builder.add_node("code_agent", code_agent_node) + graph_builder.add_node("l1_code_agent", l1_code_agent_node) + graph_builder.add_node("l2_build_agent", l2_build_agent_node) + graph_builder.add_node("generate_report", generate_report_node) graph_builder.add_edge(START, "add_start_time") # Conditional: route to full pipeline or package checker after add_start_time @@ -406,8 +446,17 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): }, ) graph_builder.add_edge("checker_early_exit", "add_completed_time") - graph_builder.add_edge("checker_segmentation", "code_agent") - graph_builder.add_edge("code_agent", "add_completed_time") + graph_builder.add_edge("checker_segmentation", "l1_code_agent") + graph_builder.add_conditional_edges( + "l1_code_agent", + route_after_l1, + { + "l2_build_agent": "l2_build_agent", + "generate_report": "generate_report", + }, + ) + graph_builder.add_edge("l2_build_agent", "generate_report") + graph_builder.add_edge("generate_report", "add_completed_time") # Shared tail graph_builder.add_edge("add_completed_time", "output_results") From c5b1149d27976b97fe3085dcd1615cae4a82b268 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 5 May 2026 08:50:00 +0300 Subject: [PATCH 24/46] skeleton L2 agent --- .../configs/config-http-openai.yml | 9 + .../functions/build_agent_graph_defs.py | 202 ++++++++++ .../functions/cve_build_agent.py | 361 ++++++++++++++++++ src/vuln_analysis/register.py | 17 +- src/vuln_analysis/tools/source_grep.py | 4 +- 5 files changed, 589 insertions(+), 4 deletions(-) create mode 100644 src/vuln_analysis/functions/build_agent_graph_defs.py create mode 100644 src/vuln_analysis/functions/cve_build_agent.py diff --git a/src/vuln_analysis/configs/config-http-openai.yml b/src/vuln_analysis/configs/config-http-openai.yml index f2961c600..9681002fd 100644 --- a/src/vuln_analysis/configs/config-http-openai.yml +++ b/src/vuln_analysis/configs/config-http-openai.yml @@ -174,6 +174,14 @@ functions: _type: cve_checker_report llm_name: cve_agent_executor_llm base_checker_dir: .cache/am_cache/checker + cve_build_agent: + _type: cve_build_agent + llm_name: cve_agent_executor_llm + base_checker_dir: .cache/am_cache/checker + max_iterations: 10 + tool_names: + - Source Grep + - Code Keyword Search health_check: _type: health_check @@ -269,6 +277,7 @@ workflow: cve_checker_segmentation_name: cve_checker_segmentation cve_package_code_agent_name: cve_package_code_agent cve_checker_report_name: cve_checker_report + cve_build_agent_name: cve_build_agent eval: general: diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py new file mode 100644 index 000000000..35c0729e5 --- /dev/null +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -0,0 +1,202 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Graph definitions for the L2 Build Agent (BuildCompilationCheck). + +Houses the LangGraph state schema, structured-output schemas for +BuildHarvestReport, and L2 agent prompt templates. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Literal, NotRequired + +from langgraph.graph import MessagesState +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Data Models +# --------------------------------------------------------------------------- + + +class CompiledFile(BaseModel): + """Source -> object file mapping extracted from build log.""" + + source_file: str = Field(description="Source file path (e.g., 'libarchive/archive_write.c')") + object_file: str = Field(description="Compiled object file path (e.g., 'libarchive/.libs/archive_write.o')") + + +class BuildHarvestReport(BaseModel): + """Deterministic data harvested from build artifacts. + + Extracted during the data_harvest_node before the ReAct loop begins. + """ + + compiled_files: list[CompiledFile] = Field( + default_factory=list, + description="Source -> object file mappings from build log compilation commands", + ) + disabled_features: list[str] = Field( + default_factory=list, + description="Feature-disabling defines (e.g., '-DOPENSSL_NO_SM2', '-DNO_GZIP')", + ) + hardening_flags: list[str] = Field( + default_factory=list, + description="Hardening flags detected (e.g., '-fstack-protector-strong', '-D_FORTIFY_SOURCE=2')", + ) + arch_flags: list[str] = Field( + default_factory=list, + description="Architecture flags (e.g., '-m64', '-mtune=generic')", + ) + build_log_available: bool = Field( + default=False, + description="Whether a build log was available for parsing", + ) + spec_build_section: str = Field( + default="", + description="Raw %build section from spec file (if available)", + ) + + +class BuilderThought(BaseModel): + """Structured output for L2 agent ReAct reasoning.""" + + thought: str = Field(description="Current reasoning about compilation status") + mode: Literal["act", "finish"] = Field(description="Whether to use a tool or finish") + action: str | None = Field(default=None, description="Tool to call if mode='act'") + action_input: str | None = Field(default=None, description="Input for the tool") + final_answer: str | None = Field(default=None, description="Final answer if mode='finish'") + + +class BuildObservation(BaseModel): + """Observation from tool execution in L2 agent.""" + + findings: list[str] = Field(default_factory=list, description="Key findings from tool output") + compilation_evidence: str | None = Field(default=None, description="Evidence about compilation status") + + +class L2VerdictExtraction(BaseModel): + """LLM-extracted verdict from L2 agent final answer.""" + + compilation_status: Literal["compiled", "not_compiled", "unknown"] = Field( + description="Whether vulnerable code is compiled into the binary" + ) + confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict") + reasoning: str = Field(description="Brief explanation of the verdict") + override_verdict: Literal["not_vulnerable", "vulnerable_mitigated", None] = Field( + default=None, + description="L2 verdict override if applicable", + ) + + +# --------------------------------------------------------------------------- +# Graph State +# --------------------------------------------------------------------------- + + +class BuildAgentState(MessagesState): + """LangGraph state for the L2 Build Agent.""" + + harvest_report: NotRequired[BuildHarvestReport | None] + l1_affected_files: NotRequired[list[str]] + l1_preliminary_verdict: NotRequired[str | None] + runtime_prompt: NotRequired[str | None] + thought: NotRequired[BuilderThought | None] + observation: NotRequired[BuildObservation | None] + step: NotRequired[int] + max_steps: NotRequired[int] + + +# --------------------------------------------------------------------------- +# Placeholder Functions (to be implemented) +# --------------------------------------------------------------------------- + + +async def harvest_build_data( + build_log_path: Path | None, + spec_path: Path | None, +) -> BuildHarvestReport: + """Extract structured data from build log and spec file. + + PLACEHOLDER - Implementation pending. + + Will parse: + - Compiled files from gcc/libtool commands + - Feature-disabling -D defines + - Hardening flags (-fstack-protector, -D_FORTIFY_SOURCE, etc.) + - Architecture flags (-m64, -mtune, etc.) + - %build section from spec file + """ + # TODO: Implement build log parsing with regex patterns + return BuildHarvestReport(build_log_available=build_log_path is not None and build_log_path.exists()) + + +def format_l2_runtime_prompt( + vuln_id: str, + target_package: str, + l1_affected_files: list[str], + harvest_report: BuildHarvestReport, +) -> str: + """Build runtime prompt for L2 agent thought node. + + PLACEHOLDER - Implementation pending. + """ + # TODO: Implement prompt formatting + return f"L2 Build Agent analyzing {vuln_id} for {target_package}. Affected files: {l1_affected_files}" + + +async def extract_l2_verdict( + llm, + final_answer: str, + harvest_report: BuildHarvestReport, + tracer, +) -> L2VerdictExtraction: + """Extract structured verdict from L2 agent final answer. + + PLACEHOLDER - Implementation pending. + """ + # TODO: Implement LLM-based verdict extraction + return L2VerdictExtraction( + compilation_status="unknown", + confidence=0.0, + reasoning="Placeholder - not implemented", + override_verdict=None, + ) + + +# --------------------------------------------------------------------------- +# Prompt Templates (placeholders) +# --------------------------------------------------------------------------- + +L2_AGENT_SYS_PROMPT = """You are an L2 Build Agent analyzing whether vulnerable code is compiled into a binary. + +PLACEHOLDER - Full prompt to be implemented. +""" + +L2_THOUGHT_INSTRUCTIONS = """Based on the build evidence, determine if the affected files are compiled. + +PLACEHOLDER - Full instructions to be implemented. +""" + +L2_VERDICT_EXTRACTION_PROMPT = """Extract the compilation verdict from the analysis. + +PLACEHOLDER - Full prompt to be implemented. +""" diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py new file mode 100644 index 000000000..485931695 --- /dev/null +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -0,0 +1,361 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Level 2 Build Agent for Package Vulnerability Checker. + +Performs BuildCompilationCheck (Phase 1) and HardeningCheck (Phase 2) to +determine if vulnerable code identified by L1 is actually compiled into +the binary and whether hardening flags provide mitigation. +""" + +from pathlib import Path + +from aiq.builder.builder import Builder +from aiq.builder.framework_enum import LLMFrameworkEnum +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from pydantic import Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id +from exploit_iq_commons.data_models.checker_status import L2BuildResult + +from langgraph.graph import StateGraph, START, END +from langgraph.prebuilt import ToolNode +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage + +from nat.builder.context import Context +from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + +from vuln_analysis.functions.build_agent_graph_defs import ( + BuildAgentState, + BuildHarvestReport, + BuilderThought, + BuildObservation, + harvest_build_data, + format_l2_runtime_prompt, + extract_l2_verdict, +) +from vuln_analysis.runtime_context import ctx_state + +logger = LoggingFactory.get_agent_logger(__name__) + + +class CVEBuildAgentConfig(FunctionBaseConfig, name="cve_build_agent"): + """ + Level 2 Build Agent. Analyzes build artifacts to determine if vulnerable + code is compiled into the binary and whether hardening flags mitigate. + + Phase 1: BuildCompilationCheck - Is vulnerable code compiled? + Phase 2: HardeningCheck - Do hardening flags mitigate the CVE? + """ + + base_checker_dir: str = Field( + default=".cache/am_cache/checker", + description="Root directory for checker-specific artifacts.", + ) + max_iterations: int = Field( + default=10, + description="The maximum number of iterations for the agent.", + ) + + +async def create_graph_build_agent( + config: CVEBuildAgentConfig, + builder: Builder, + state: AgentMorpheusEngineInput, + tracer, +): + """Build the L2 Build Agent LangGraph. + + Graph structure: + START -> data_harvest_node -> thought_node -+-> END (finish) + | + +-> tool_node -> observation_node -> thought_node + """ + # Node name constants + DATA_HARVEST_NODE = "data_harvest" + THOUGHT_NODE = "thought_node" + TOOL_NODE = "tool_node" + OBSERVATION_NODE = "observation_node" + FORCED_FINISH_NODE = "forced_finish" + + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + thought_llm = llm.with_structured_output(BuilderThought) # Reserved for implementation + tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None + + # Extract context from state (guaranteed by route_after_l1 in register.py) + ctx = state.info.checker_context + l1_result = ctx.l1_result + artifacts = ctx.artifacts + target_package = state.input.image.target_package + vuln_id = state.input.scan.vulns[0].vuln_id + + # Paths + source_key = ctx.source_key + checker_dir = Path(config.base_checker_dir) / source_key if source_key else None + build_log_path = Path(artifacts.build_log_path) if artifacts.build_log_path else None + + # L1 results + l1_affected_files = l1_result.affected_files + l1_preliminary_verdict = l1_result.preliminary_verdict + + # ------------------------------------------------------------------------- + # Node definitions (PLACEHOLDER implementations) + # ------------------------------------------------------------------------- + + async def data_harvest_node(state: BuildAgentState) -> dict: + """Harvest build data from artifacts. + + PLACEHOLDER - Extracts structured data from build log before ReAct loop. + """ + logger.info("data_harvest_node: starting") + + with tracer.push_active_function("data_harvest", input_data={}) as span: + # Find spec file if available + spec_path = None + if checker_dir and checker_dir.exists(): + spec_files = list((checker_dir / "source").glob("*.spec")) + spec_path = spec_files[0] if spec_files else None + + harvest_report = await harvest_build_data( + build_log_path=build_log_path, + spec_path=spec_path, + ) + + runtime_prompt = format_l2_runtime_prompt( + vuln_id=vuln_id, + target_package=target_package.name if target_package else "unknown", + l1_affected_files=l1_affected_files, + harvest_report=harvest_report, + ) + + span.set_output({ + "build_log_available": harvest_report.build_log_available, + "compiled_files_count": len(harvest_report.compiled_files), + "l1_affected_files_count": len(l1_affected_files), + }) + + return { + "harvest_report": harvest_report, + "l1_affected_files": l1_affected_files, + "l1_preliminary_verdict": l1_preliminary_verdict, + "runtime_prompt": runtime_prompt, + "messages": [AIMessage(content="Build data harvested, beginning analysis.")], + } + + async def thought_node(state: BuildAgentState) -> dict: + """Generate next thought/action using the LLM. + + PLACEHOLDER - ReAct reasoning for build analysis. + """ + step_num = state.get("step", 0) + logger.info("thought_node: starting step %d", step_num) + + runtime_prompt = state.get("runtime_prompt") or "Analyze build compilation status." + _messages = [SystemMessage(content=runtime_prompt)] + state["messages"] # Reserved for LLM call + + with tracer.push_active_function("thought_node", input_data={}) as span: + # TODO: Implement actual LLM call + # For skeleton, create a placeholder finish response + response = BuilderThought( + thought="Placeholder: Build analysis not yet implemented.", + mode="finish", + action=None, + action_input=None, + final_answer="L2 Build Agent skeleton - implementation pending.", + ) + + if response.mode == "finish": + ai_message = AIMessage(content=response.final_answer or "Analysis complete.") + else: + ai_message = AIMessage(content=response.thought) + + span.set_output({ + "thought": response.thought, + "mode": response.mode, + "final_answer": response.final_answer, + }) + + return { + "messages": [ai_message], + "thought": response, + "step": step_num + 1, + "max_steps": config.max_iterations, + } + + async def observation_node(state: BuildAgentState) -> dict: + """Process tool output. + + PLACEHOLDER - Process tool results and update observations. + """ + logger.info("observation_node: starting") + + with tracer.push_active_function("observation_node", input_data={}) as span: + # TODO: Implement tool output processing + observation = BuildObservation( + findings=["Placeholder observation"], + compilation_evidence=None, + ) + span.set_output({"findings": observation.findings}) + + return { + "messages": [AIMessage(content="Observation processed.")], + "observation": observation, + } + + async def forced_finish_node(state: BuildAgentState) -> dict: + """Force finish when max iterations reached.""" + logger.info("forced_finish_node: max iterations reached") + return { + "messages": [AIMessage(content="Max iterations reached, forcing finish.")], + "thought": BuilderThought( + thought="Max iterations reached.", + mode="finish", + final_answer="Unable to determine compilation status within iteration limit.", + ), + } + + async def should_continue(state: BuildAgentState) -> str: + """Route based on thought mode.""" + thought = state.get("thought") + if thought is not None and thought.mode == "finish": + return END + if state.get("step", 0) >= state.get("max_steps", config.max_iterations): + return FORCED_FINISH_NODE + return TOOL_NODE + + + # ------------------------------------------------------------------------- + # Build graph + # ------------------------------------------------------------------------- + + flow = StateGraph(BuildAgentState) + + flow.add_node(DATA_HARVEST_NODE, data_harvest_node) + flow.add_node(THOUGHT_NODE, thought_node) + flow.add_node(FORCED_FINISH_NODE, forced_finish_node) + flow.add_node(OBSERVATION_NODE, observation_node) + flow.add_node(TOOL_NODE, tools_node) + + flow.add_edge(START, DATA_HARVEST_NODE) + flow.add_edge(DATA_HARVEST_NODE, THOUGHT_NODE) + edge_map = {END: END, FORCED_FINISH_NODE: FORCED_FINISH_NODE, TOOL_NODE: TOOL_NODE} + flow.add_conditional_edges(THOUGHT_NODE, should_continue, edge_map) + flow.add_edge(TOOL_NODE, OBSERVATION_NODE) + flow.add_edge(OBSERVATION_NODE, THOUGHT_NODE) + flow.add_edge(FORCED_FINISH_NODE, END) + + app = flow.compile() + return app + + +@register_function(config_type=CVEBuildAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def cve_build_agent(config: CVEBuildAgentConfig, builder: Builder): + """Level 2 Build Agent entry point.""" + + async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Run L2 build analysis and populate l2_result on checker_context.""" + trace_id.set(message.input.scan.id) + tracer = Context.get() + + # Set ctx_state for tools + from types import SimpleNamespace + workflow_state = SimpleNamespace(original_input=message, info=message.info) + ctx_state.set(workflow_state) + + logger.info("build_agent: starting L2 investigation") + + ctx = message.info.checker_context + if not ctx or not ctx.l1_result: + logger.warning("build_agent: no L1 result available, skipping L2") + return message + + # Build and run the graph + build_agent_graph = await create_graph_build_agent(config, builder, message, tracer) + initial_state: BuildAgentState = { + "messages": [HumanMessage(content="Begin L2 build analysis")], + "step": 0, + "max_steps": config.max_iterations, + } + + with tracer.push_active_function("l2_build_agent_graph", input_data=initial_state["messages"][0].content): + result = await build_agent_graph.ainvoke( + initial_state, + config={"recursion_limit": config.max_iterations * 4}, + ) + + logger.info("build_agent: L2 investigation finished") + + # Extract verdict from result + final_answer = None + thought = result.get("thought") + if thought and thought.mode == "finish": + final_answer = thought.final_answer + + harvest_report = result.get("harvest_report") or BuildHarvestReport() + + # Extract L2 verdict (placeholder) + llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + verdict_extraction = await extract_l2_verdict( + llm=llm, + final_answer=final_answer or "No analysis produced.", + harvest_report=harvest_report, + tracer=tracer, + ) + + # Build L2 result + l2_result = L2BuildResult( + compilation_status=verdict_extraction.compilation_status, + compilation_confidence=verdict_extraction.confidence, + compilation_evidence=verdict_extraction.reasoning, + hardening_relevant=None, # Phase 2 - not implemented yet + hardening_flags=harvest_report.hardening_flags, + hardening_rationale=None, # Phase 2 - not implemented yet + l2_override_verdict=verdict_extraction.override_verdict, + ) + + with tracer.push_active_function( + "l2_agent_finish", + input_data={"compilation_status": l2_result.compilation_status}, + ) as span: + span.set_output({ + "compilation_status": l2_result.compilation_status, + "compilation_confidence": l2_result.compilation_confidence, + "l2_override_verdict": l2_result.l2_override_verdict, + }) + + # Store result on checker_context + if message.info.checker_context is not None: + message.info.checker_context.l2_result = l2_result + else: + logger.warning("build_agent: checker_context is None, cannot store l2_result") + + logger.info( + "build_agent: L2 result - status=%s, confidence=%.2f, override=%s", + l2_result.compilation_status, + l2_result.compilation_confidence, + l2_result.l2_override_verdict, + ) + + return message + + yield FunctionInfo.from_fn( + _arun, + description="Level 2 Build Agent: analyzes build artifacts for compilation status and hardening", + ) diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 241112e0d..6ee21f603 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -45,6 +45,7 @@ from vuln_analysis.functions import cve_process_sbom from vuln_analysis.functions import cve_summarize from vuln_analysis.functions import cve_checker_report +from vuln_analysis.functions import cve_build_agent from vuln_analysis.functions import cve_generate_cvss from vuln_analysis.functions import cve_generate_vex from vuln_analysis.functions import health_endpoint @@ -99,6 +100,10 @@ class CVEAgentWorkflowConfig(FunctionBaseConfig, name="cve_agent"): default=None, description="Function name for the checker report generation (L1/L2 report synthesis)", ) + cve_build_agent_name: str | None = Field( + default=None, + description="Function name for the Level 2 Build Agent (build compilation and hardening check)", + ) description: str = Field(default="Vulnerability analysis for container security workflow", description="Workflow function description") @@ -139,6 +144,10 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder): builder.get_function(name=config.cve_checker_report_name) if config.cve_checker_report_name else None ) + cve_build_agent_fn = ( + builder.get_function(name=config.cve_build_agent_name) + if config.cve_build_agent_name else None + ) # Define langgraph node functions @catch_pipeline_errors_async @@ -298,10 +307,12 @@ def route_after_l1(state: AgentMorpheusEngineInput) -> str: @catch_pipeline_errors_async async def l2_build_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: """Level 2 Build Agent: BuildCompilationCheck + HardeningCheck. - - TODO: Implement L2 Build Agent. Currently a pass-through placeholder. + + Returns AgentMorpheusEngineInput with l2_result populated on checker_context. """ - logger.info("l2_build_agent: placeholder - passing through state") + if cve_build_agent_fn: + return await cve_build_agent_fn.ainvoke(state.model_dump()) + logger.warning("Build agent function not configured, passing state through") return state @catch_pipeline_errors_async diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py index 508560b35..4ffe34aa3 100644 --- a/src/vuln_analysis/tools/source_grep.py +++ b/src/vuln_analysis/tools/source_grep.py @@ -102,6 +102,7 @@ async def _arun(query: str) -> str: - 'GENERAL_NAME_cmp' - search all source files - 'GENERAL_NAME_cmp,*.c' - search only .c files - 'archive_read_open,*.h' - search only headers + - 'archive_read_open,archive.c' - search only in files named archive.c """ workflow_state = ctx_state.get() @@ -140,6 +141,7 @@ async def _arun(query: str) -> str: "Fast grep search in source code using native Unix grep. " "Input: 'pattern' or 'pattern,file_glob'. " "Examples: 'GENERAL_NAME_cmp' searches all source files, " - "'GENERAL_NAME_cmp,*.c' searches only C files." + "'GENERAL_NAME_cmp,*.c' searches only C files, " + "'archive_read_open,archive.c' searches only in files named archive.c." ), ) From 74cdaa4d4c495793f3a0daed2607cf031b31c364 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 5 May 2026 09:38:55 +0300 Subject: [PATCH 25/46] save cwe-id and add hardening db file --- .../data/hardening_kb/__init__.py | 14 + .../data/hardening_kb/hardening_kb.json | 279 ++++++++++++++++++ .../data_models/cve_intel.py | 1 + src/vuln_analysis/utils/clients/nvd_client.py | 19 +- 4 files changed, 306 insertions(+), 7 deletions(-) create mode 100644 src/exploit_iq_commons/data/hardening_kb/__init__.py create mode 100644 src/exploit_iq_commons/data/hardening_kb/hardening_kb.json diff --git a/src/exploit_iq_commons/data/hardening_kb/__init__.py b/src/exploit_iq_commons/data/hardening_kb/__init__.py new file mode 100644 index 000000000..cf7c586a5 --- /dev/null +++ b/src/exploit_iq_commons/data/hardening_kb/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json new file mode 100644 index 000000000..ab91b941e --- /dev/null +++ b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json @@ -0,0 +1,279 @@ +{ + "kb_version": "1.0", + "last_updated": "2026-05-05", + "mappings": [ + { + "flag": "-Wall -Wextra", + "description": "Enable warnings for constructs often associated with defects.", + "vulnerability_category": "Defensive Coding", + "cwe_ids": [ + "CWE-563", + "CWE-457", + "CWE-480" + ], + "requires": {} + }, + { + "flag": "-Wformat -Wformat=2", + "description": "Enable additional format function warnings.", + "vulnerability_category": "Input Validation", + "cwe_ids": [ + "CWE-134" + ], + "requires": {} + }, + { + "flag": "-Wconversion -Wsign-conversion", + "description": "Enable implicit conversion warnings.", + "vulnerability_category": "Arithmetic Safety", + "cwe_ids": [ + "CWE-190", + "CWE-681" + ], + "requires": {} + }, + { + "flag": "-Wtrampolines", + "description": "Enable warnings about trampolines that require executable stacks.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693" + ], + "requires": {} + }, + { + "flag": "-Wimplicit-fallthrough", + "description": "Warn when a switch case falls through.", + "vulnerability_category": "Defensive Coding", + "cwe_ids": [ + "CWE-484" + ], + "requires": {} + }, + { + "flag": "-Wbidi-chars=any", + "description": "Enable warnings for possibly misleading Unicode bidirectional control characters.", + "vulnerability_category": "Code Integrity", + "cwe_ids": [ + "CWE-1301" + ], + "requires": {} + }, + { + "flag": "-Werror ", + "description": "Treat all or selected compiler warnings as errors.", + "vulnerability_category": "Policy Enforcement", + "cwe_ids": [ + "N/A" + ], + "requires": {} + }, + { + "flag": "-Werror=format-security", + "description": "Treat format strings that are not string literals and used without arguments as errors.", + "vulnerability_category": "Input Validation", + "cwe_ids": [ + "CWE-134" + ], + "requires": {} + }, + { + "flag": "-Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion ", + "description": "Treat obsolete C constructs as errors.", + "vulnerability_category": "Type Safety", + "cwe_ids": [ + "CWE-704", + "CWE-843" + ], + "requires": {} + }, + { + "flag": "-D_FORTIFY_SOURCE=3", + "description": "Fortify sources with compile- and run-time checks for unsafe libc usage and buffer overflows.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-119", + "CWE-120", + "CWE-121", + "CWE-122" + ], + "requires": {} + }, + { + "flag": "-D_GLIBCXX_ASSERTIONS", + "description": "Precondition checks for C++ standard library calls.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-119", + "CWE-125", + "CWE-787" + ], + "requires": {} + }, + { + "flag": "-fstrict-flex-arrays=3", + "description": "Consider a trailing array in a struct as a flexible array if declared as [].", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-119", + "CWE-125", + "CWE-787" + ], + "requires": {} + }, + { + "flag": "-fstack-clash-protection", + "description": "Enable run-time checks for variable-size stack allocation validity.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-785" + ], + "requires": {} + }, + { + "flag": "-fstack-protector-strong", + "description": "Enable run-time checks for stack-based buffer overflows.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-121" + ], + "requires": {} + }, + { + "flag": "-fcf-protection=full", + "description": "Enable control-flow protection against return-oriented programming (ROP) and jump-oriented programming (JOP) attacks on x86_64.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693" + ], + "requires": {} + }, + { + "flag": "-mbranch-protection=standard", + "description": "Enable branch protection against ROP and JOP attacks on AArch64.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693" + ], + "requires": {} + }, + { + "flag": "-Wl,-z,nodlopen", + "description": "Restrict dlopen(3) calls to shared objects.", + "vulnerability_category": "Policy Enforcement", + "cwe_ids": [ + "CWE-269" + ], + "requires": {} + }, + { + "flag": "-Wl,-z,noexecstack", + "description": "Enable data execution prevention by marking stack memory as non-executable.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693", + "CWE-94" + ], + "requires": {} + }, + { + "flag": "-Wl,-z,relro -Wl,-z,now", + "description": "Mark relocation table entries resolved at load-time as read-only.", + "vulnerability_category": "Code Integrity", + "cwe_ids": [ + "CWE-123" + ], + "requires": {} + }, + { + "flag": "-fPIE -pie", + "description": "Build as position-independent executable.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693" + ], + "requires": {} + }, + { + "flag": "-fPIC -shared", + "description": "Build as position-independent code.", + "vulnerability_category": "Control Flow Integrity", + "cwe_ids": [ + "CWE-693" + ], + "requires": {} + }, + { + "flag": "-fno-delete-null-pointer-checks", + "description": "Force retention of null pointer checks.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-476" + ], + "requires": {} + }, + { + "flag": "-fno-strict-overflow", + "description": "Define behavior for signed integer and pointer arithmetic overflows", + "vulnerability_category": "Arithmetic Safety", + "cwe_ids": [ + "CWE-190" + ], + "requires": {} + }, + { + "flag": "-fno-strict-aliasing", + "description": "Do not assume strict aliasing.", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-416" + ], + "requires": {} + }, + { + "flag": "-ftrivial-auto-var-init", + "description": "Initialize automatic variables that lack explicit initializers.", + "vulnerability_category": "Information Leakage", + "cwe_ids": [ + "CWE-457" + ], + "requires": {} + }, + { + "flag": "-fexceptions", + "description": "Enable exception propagation to harden multi-threaded C code.", + "vulnerability_category": "Error Handling", + "cwe_ids": [ + "CWE-391" + ], + "requires": {} + }, + { + "flag": "-fhardened", + "description": "Enable pre-determined set of hardening options in GCC.", + "vulnerability_category": "Full Hardening", + "cwe_ids": [ + "Multi" + ], + "requires": {} + }, + { + "flag": "-Wl,--as-needed -Wl,--no-copy-dt-needed-entries", + "description": "Allow linker to omit libraries specified on the command line to link against if they are not used.", + "vulnerability_category": "Supply Chain Safety", + "cwe_ids": [ + "N/A" + ], + "requires": {} + }, + { + "flag": "-fzero-init-padding-bits=all", + "description": "Guarantee zero initialization of padding bits in all automatic variable initializers.", + "vulnerability_category": "Information Leakage", + "cwe_ids": [ + "CWE-200" + ], + "requires": {} + } + ] +} \ No newline at end of file diff --git a/src/exploit_iq_commons/data_models/cve_intel.py b/src/exploit_iq_commons/data_models/cve_intel.py index 5ebdf23f6..36495c30b 100644 --- a/src/exploit_iq_commons/data_models/cve_intel.py +++ b/src/exploit_iq_commons/data_models/cve_intel.py @@ -110,6 +110,7 @@ class Configuration(BaseModel): cvss_vector: str | None = None cvss_base_score: float | None = None cvss_severity: str | None = None + cwe_id: str | None = None cwe_name: str | None = None cwe_description: str | None = None cwe_extended_description: str | None = None diff --git a/src/vuln_analysis/utils/clients/nvd_client.py b/src/vuln_analysis/utils/clients/nvd_client.py index 1b13ff51c..fd6a43d54 100644 --- a/src/vuln_analysis/utils/clients/nvd_client.py +++ b/src/vuln_analysis/utils/clients/nvd_client.py @@ -125,19 +125,21 @@ async def _get_cwe_elements(self, cve_obj: dict) -> dict: those CWEs. """ # Get CWE name - cwe_id = None + raw_cwe_id = None weaknesses = cve_obj.get('weaknesses', []) - cwe_id = self._get_cwe(weaknesses) + raw_cwe_id = self._get_cwe(weaknesses) cwe_link = None cwe_name = None cwe_description = None cwe_extended_description = None - if cwe_id is not None: - if cwe_id.startswith('CWE-'): - cwe_id = cwe_id.replace('CWE-', '', 1) + cwe_id_numeric = None + if raw_cwe_id is not None: + cwe_id_numeric = raw_cwe_id + if cwe_id_numeric.startswith('CWE-'): + cwe_id_numeric = cwe_id_numeric.replace('CWE-', '', 1) - if cwe_id.isnumeric(): - cwe_link = self._cwe_details_url_template.format(CWE_ID=cwe_id) + if cwe_id_numeric.isnumeric(): + cwe_link = self._cwe_details_url_template.format(CWE_ID=cwe_id_numeric) if cwe_link is not None: soup = await self._get_soup(cwe_link) @@ -155,7 +157,9 @@ async def _get_cwe_elements(self, cve_obj: dict) -> dict: if extended_description_div: cwe_extended_description = extended_description_div.find('div', class_='indent').text.strip() + cwe_id = f"CWE-{cwe_id_numeric}" if cwe_id_numeric and cwe_id_numeric.isnumeric() else raw_cwe_id return { + "cwe_id": cwe_id, "cwe_name": cwe_name, "cwe_description": cwe_description, "cwe_extended_description": cwe_extended_description, @@ -330,6 +334,7 @@ async def get_intel(self, cve_id: str) -> CveIntelNvd: cvss_vector=cvss_vector, cvss_base_score=cvss_base_score, cvss_severity=cvss_severity, + cwe_id=cwe_elements["cwe_id"], cwe_name=cwe_elements["cwe_name"], cwe_description=cwe_elements["cwe_description"], cwe_extended_description=cwe_elements["cwe_extended_description"], From 16b31468fc2dcd4cf12058dd960780e8cbc3c2ff Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 5 May 2026 11:11:51 +0300 Subject: [PATCH 26/46] Imp preprocess data --- .../functions/build_agent_graph_defs.py | 199 +++++++++++++++--- .../functions/cve_build_agent.py | 28 ++- 2 files changed, 188 insertions(+), 39 deletions(-) diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index 35c0729e5..72e6e6e76 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -23,12 +23,15 @@ from __future__ import annotations import logging +import re from pathlib import Path from typing import Literal, NotRequired from langgraph.graph import MessagesState from pydantic import BaseModel, Field +from exploit_iq_commons.utils.hardening_kb import HardeningEntry + logger = logging.getLogger(__name__) @@ -37,42 +40,34 @@ # --------------------------------------------------------------------------- -class CompiledFile(BaseModel): - """Source -> object file mapping extracted from build log.""" - - source_file: str = Field(description="Source file path (e.g., 'libarchive/archive_write.c')") - object_file: str = Field(description="Compiled object file path (e.g., 'libarchive/.libs/archive_write.o')") - - class BuildHarvestReport(BaseModel): """Deterministic data harvested from build artifacts. Extracted during the data_harvest_node before the ReAct loop begins. + + Key vulnerability-relevant data: + - Feature disable flags that prevent vulnerable code from being compiled: + - OpenSSL style: no-sm2, no-ssl3, no-md5, no-asm + - Autoconf style: --disable-feature, --without-feature + - CMake style: -DENABLE_FEATURE=OFF + - Architecture flags to understand target platform + - Hardening flags relevant to the CVE's CWE class + + Note: Compiled files are NOT pre-extracted. The LLM searches the build log + for affected files from l1_result.affected_files during the ReAct loop. """ - compiled_files: list[CompiledFile] = Field( - default_factory=list, - description="Source -> object file mappings from build log compilation commands", - ) disabled_features: list[str] = Field( default_factory=list, - description="Feature-disabling defines (e.g., '-DOPENSSL_NO_SM2', '-DNO_GZIP')", + description="Feature-disabling flags from build log (e.g., '-DOPENSSL_NO_SM2', '-DNO_GZIP')", ) - hardening_flags: list[str] = Field( + spec_disabled_features: list[str] = Field( default_factory=list, - description="Hardening flags detected (e.g., '-fstack-protector-strong', '-D_FORTIFY_SOURCE=2')", + description="Feature-disabling flags from spec %build section (e.g., 'no-sm2', '--disable-ssl3', '--without-openssl')", ) - arch_flags: list[str] = Field( + expected_hardening: list[HardeningEntry] = Field( default_factory=list, - description="Architecture flags (e.g., '-m64', '-mtune=generic')", - ) - build_log_available: bool = Field( - default=False, - description="Whether a build log was available for parsing", - ) - spec_build_section: str = Field( - default="", - description="Raw %build section from spec file (if available)", + description="Hardening flags relevant to the CVE's CWE, with descriptions for LLM context", ) @@ -126,27 +121,159 @@ class BuildAgentState(MessagesState): # --------------------------------------------------------------------------- -# Placeholder Functions (to be implemented) +# Spec File Parsing Helpers +# --------------------------------------------------------------------------- + + +def _extract_spec_build_section(spec_path: Path) -> str: + """Extract the %build section from an RPM spec file. + + The %build section contains configure/Configure commands with feature flags + that determine what code is compiled. + + Args: + spec_path: Path to the RPM spec file + + Returns: + The raw %build section content, or empty string if not found + """ + try: + content = spec_path.read_text(encoding="utf-8", errors="replace") + except OSError as e: + logger.warning("Failed to read spec file %s: %s", spec_path, e) + return "" + + # Find %build section (ends at next %section like %install, %check, or EOF) + match = re.search( + r"^%build\s*\n(.*?)(?=^%\w+|\Z)", + content, + re.MULTILINE | re.DOTALL, + ) + return match.group(1).strip() if match else "" + + +def _extract_spec_disabled_features(build_section: str) -> list[str]: + """Extract feature-disable flags from spec %build section. + + Recognizes patterns from common build systems: + - OpenSSL style: no-sm2, no-ssl3, no-asm + - Autoconf style: --disable-feature, --without-feature + - CMake style: -DENABLE_FEATURE=OFF + + Args: + build_section: The raw %build section content + + Returns: + Sorted list of disabled feature names (without prefix) + """ + disabled: set[str] = set() + + # OpenSSL style: no-feature (e.g., no-sm2, no-ssl3, no-asm) + disabled.update(re.findall(r"\bno-(\w+)", build_section)) + + # Autoconf style: --disable-feature (e.g., --disable-static) + disabled.update(re.findall(r"--disable-(\w+)", build_section)) + + # Autoconf style: --without-feature (e.g., --without-openssl) + disabled.update(re.findall(r"--without-(\w+)", build_section)) + + # CMake style: -DENABLE_FEATURE=OFF or =0 or =FALSE + disabled.update( + re.findall(r"-DENABLE_(\w+)=(?:OFF|0|FALSE)", build_section, re.IGNORECASE) + ) + + return sorted(disabled) + + +# --------------------------------------------------------------------------- +# Data Harvesting Functions # --------------------------------------------------------------------------- async def harvest_build_data( build_log_path: Path | None, spec_path: Path | None, + cwe_id: str | None = None, ) -> BuildHarvestReport: """Extract structured data from build log and spec file. - PLACEHOLDER - Implementation pending. + Parses: + - Feature-disabling -D defines (e.g., -DOPENSSL_NO_SM2, -DNO_GZIP) - Will parse: - - Compiled files from gcc/libtool commands - - Feature-disabling -D defines - - Hardening flags (-fstack-protector, -D_FORTIFY_SOURCE, etc.) - - Architecture flags (-m64, -mtune, etc.) - - %build section from spec file + Args: + build_log_path: Path to the build log file + spec_path: Path to the RPM spec file + cwe_id: CWE identifier to look up expected hardening flags (e.g., 'CWE-121') + + Returns: + BuildHarvestReport with harvested data and expected hardening flags """ - # TODO: Implement build log parsing with regex patterns - return BuildHarvestReport(build_log_available=build_log_path is not None and build_log_path.exists()) + from exploit_iq_commons.utils.hardening_kb import HardeningKB + from vuln_analysis.tools.source_inspector import SourceInspector + + # Lookup expected hardening flags from KB based on CWE + expected_hardening = [] + if cwe_id: + kb = HardeningKB.get_instance() + expected_hardening = kb.lookup_by_cwe(cwe_id) + logger.info( + "harvest_build_data: CWE %s maps to %d hardening flags", + cwe_id, + len(expected_hardening), + ) + + # Extract feature-disabling defines from build log + disabled_features: list[str] = [] + if build_log_path: + inspector = SourceInspector(build_log_path.parent) + + # Grep for lines containing -D defines + matches = inspector.grep_content(r"-D\w+", file_path=build_log_path) + + # Extract unique defines from matched lines + all_defines: set[str] = set() + for match in matches: + defines = re.findall(r"-D(\w+)", match.line_content) + all_defines.update(defines) + + # Filter for feature-disabling patterns: + # - NO_* prefix (e.g., NO_GZIP) + # - DISABLE_* prefix (e.g., DISABLE_SSL) + # - WITHOUT_* prefix (e.g., WITHOUT_FEATURE) + # - *_NO_* infix (e.g., OPENSSL_NO_SM2) + # - *_DISABLE_* infix + # - *_DISABLED suffix + disable_pattern = re.compile( + r"^(NO_|DISABLE_|WITHOUT_)|(_NO_|_DISABLE_)|(_DISABLED$)" + ) + disabled_features = sorted( + d for d in all_defines if disable_pattern.search(d) + ) + + if disabled_features: + logger.info( + "harvest_build_data: found %d disabled features in build log", + len(disabled_features), + ) + + # Extract %build section and features from spec file + spec_build_section = "" + spec_disabled_features: list[str] = [] + if spec_path and spec_path.exists(): + spec_build_section = _extract_spec_build_section(spec_path) + spec_disabled_features = _extract_spec_disabled_features(spec_build_section) + + if spec_disabled_features: + logger.info( + "harvest_build_data: found %d disabled features in spec", + len(spec_disabled_features), + ) + + return BuildHarvestReport( + disabled_features=disabled_features, + spec_disabled_features=spec_disabled_features, + expected_hardening=expected_hardening, + ) def format_l2_runtime_prompt( @@ -185,6 +312,10 @@ async def extract_l2_verdict( # --------------------------------------------------------------------------- # Prompt Templates (placeholders) # --------------------------------------------------------------------------- +# TODO: Add prompt instruction for LLM to search build log for affected files +# from l1_result.affected_files to verify they were actually compiled. +# TODO: Add prompt instruction for LLM to check architecture flags (-m32/-m64, +# -march, -mtune) when CVE is architecture-specific. L2_AGENT_SYS_PROMPT = """You are an L2 Build Agent analyzing whether vulnerable code is compiled into a binary. diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index 485931695..df8b21270 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -99,8 +99,13 @@ async def create_graph_build_agent( thought_llm = llm.with_structured_output(BuilderThought) # Reserved for implementation tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None - # Extract context from state (guaranteed by route_after_l1 in register.py) + # Extract context from state (guaranteed by early exit checks in _arun) ctx = state.info.checker_context + assert ctx is not None, "checker_context must exist (checked in _arun)" + assert ctx.l1_result is not None, "l1_result must exist (checked in _arun)" + assert ctx.artifacts.build_log_path, "build_log_path must exist (checked in _arun)" + assert ctx.source_key, "source_key must exist when artifacts exist" + l1_result = ctx.l1_result artifacts = ctx.artifacts target_package = state.input.image.target_package @@ -108,13 +113,22 @@ async def create_graph_build_agent( # Paths source_key = ctx.source_key - checker_dir = Path(config.base_checker_dir) / source_key if source_key else None - build_log_path = Path(artifacts.build_log_path) if artifacts.build_log_path else None + checker_dir = Path(config.base_checker_dir) / source_key + build_log_path = Path(artifacts.build_log_path) # L1 results l1_affected_files = l1_result.affected_files l1_preliminary_verdict = l1_result.preliminary_verdict + # Extract CWE ID from intel (if available) + cwe_id = None + intel_list = state.info.intel + if intel_list and len(intel_list) > 0: + intel = intel_list[0] + if intel.nvd and intel.nvd.cwe_id: + cwe_id = intel.nvd.cwe_id + logger.info("build_agent: CWE ID from intel: %s", cwe_id) + # ------------------------------------------------------------------------- # Node definitions (PLACEHOLDER implementations) # ------------------------------------------------------------------------- @@ -136,6 +150,7 @@ async def data_harvest_node(state: BuildAgentState) -> dict: harvest_report = await harvest_build_data( build_log_path=build_log_path, spec_path=spec_path, + cwe_id=cwe_id, ) runtime_prompt = format_l2_runtime_prompt( @@ -146,9 +161,9 @@ async def data_harvest_node(state: BuildAgentState) -> dict: ) span.set_output({ - "build_log_available": harvest_report.build_log_available, "compiled_files_count": len(harvest_report.compiled_files), "l1_affected_files_count": len(l1_affected_files), + "expected_hardening_count": len(harvest_report.expected_hardening), }) return { @@ -286,6 +301,10 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: logger.warning("build_agent: no L1 result available, skipping L2") return message + if not ctx.artifacts or not ctx.artifacts.build_log_path: + logger.warning("build_agent: no build log available, skipping L2") + return message + # Build and run the graph build_agent_graph = await create_graph_build_agent(config, builder, message, tracer) initial_state: BuildAgentState = { @@ -325,7 +344,6 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: compilation_confidence=verdict_extraction.confidence, compilation_evidence=verdict_extraction.reasoning, hardening_relevant=None, # Phase 2 - not implemented yet - hardening_flags=harvest_report.hardening_flags, hardening_rationale=None, # Phase 2 - not implemented yet l2_override_verdict=verdict_extraction.override_verdict, ) From f04a0d82eefbec45e8f4c608c13a9409d7d14521 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 5 May 2026 15:07:50 +0300 Subject: [PATCH 27/46] force node to code agent --- .../functions/cve_package_code_agent.py | 55 ++++++++++++++++--- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 56617f2f9..a74b26570 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -59,7 +59,7 @@ ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError -from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation +from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation, FORCED_FINISH_PROMPT from vuln_analysis.runtime_context import ctx_state logger = LoggingFactory.get_agent_logger(__name__) @@ -487,7 +487,7 @@ async def thought_node(state: CodeAgentState) -> dict: logger.info("thought_node: starting step %d", step_num) runtime_prompt = state.get("runtime_prompt") or "You are a security analyst investigating a CVE." messages = [SystemMessage(content=runtime_prompt)] + state["messages"] - with tracer.push_active_function("thought_node", input_data=messages) as span: + with tracer.push_active_function("thought_node", input_data={}) as span: obs = state.get("observation", None) if obs is not None: memory_list = obs.memory if obs.memory else ["No prior knowledge."] @@ -521,12 +521,51 @@ async def thought_node(state: CodeAgentState) -> dict: } async def forced_finish_node(state: CodeAgentState) -> dict: - """Force finish when max iterations reached.""" - logger.info("forced_finish_node: max iterations reached") - # TODO: Implement forced completion logic - return { - "messages": [AIMessage(content="Max iterations reached, forcing finish")], - } + """Force finish when max iterations reached. + + Invokes the LLM with FORCED_FINISH_PROMPT to generate a final answer + based on evidence gathered so far. + """ + step_num = state.get("step", 0) + with tracer.push_active_function("forced_finish_node", input_data=f"step:{step_num}") as span: + try: + active_prompt = state.get("runtime_prompt") + messages = [SystemMessage(content=active_prompt)] + state["messages"] + messages.append(HumanMessage(content=FORCED_FINISH_PROMPT)) + + obs = state.get("observation") + if obs is not None and obs.memory: + memory_context = "\n".join(f"- {m}" for m in obs.memory) + messages.append(SystemMessage(content=f"KNOWLEDGE:\n{memory_context}")) + + response: CheckerThought = await thought_llm.ainvoke(messages) + + if response.mode == "finish" and response.final_answer: + ai_message = AIMessage(content=response.final_answer) + final_answer = response.final_answer + else: + final_answer = "Failed to generate a final answer within the maximum allowed steps." + ai_message = AIMessage(content=final_answer) + response = CheckerThought( + thought=response.thought or "Max steps exceeded", + mode="finish", + actions=None, + final_answer=final_answer, + ) + + span.set_output({"final_answer_length": len(final_answer), "step": step_num}) + return { + "messages": [ai_message], + "thought": response, + "step": step_num, + "max_steps": state.get("max_steps", config.max_iterations), + "observation": state.get("observation"), + "output": final_answer, + } + except Exception as e: + logger.exception("forced_finish_node failed at step %d", step_num) + span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num}) + raise def _extract_patch_patterns(state: CodeAgentState) -> tuple[str, str]: """Extract vulnerable and fix patterns from parsed patch in state.""" From aea087ab8a62b57ba13a077559431d749faf857b Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 5 May 2026 17:35:33 +0300 Subject: [PATCH 28/46] L2 Agent build ,before loop --- .../functions/build_agent_graph_defs.py | 383 ++++++++++++++++-- .../functions/cve_build_agent.py | 311 +++++++++++--- .../functions/cve_package_code_agent.py | 2 +- 3 files changed, 597 insertions(+), 99 deletions(-) diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index 72e6e6e76..fd4c5e962 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -31,7 +31,7 @@ from pydantic import BaseModel, Field from exploit_iq_commons.utils.hardening_kb import HardeningEntry - +from vuln_analysis.functions.react_internals import CheckerThought, Observation logger = logging.getLogger(__name__) @@ -71,21 +71,6 @@ class BuildHarvestReport(BaseModel): ) -class BuilderThought(BaseModel): - """Structured output for L2 agent ReAct reasoning.""" - - thought: str = Field(description="Current reasoning about compilation status") - mode: Literal["act", "finish"] = Field(description="Whether to use a tool or finish") - action: str | None = Field(default=None, description="Tool to call if mode='act'") - action_input: str | None = Field(default=None, description="Input for the tool") - final_answer: str | None = Field(default=None, description="Final answer if mode='finish'") - - -class BuildObservation(BaseModel): - """Observation from tool execution in L2 agent.""" - - findings: list[str] = Field(default_factory=list, description="Key findings from tool output") - compilation_evidence: str | None = Field(default=None, description="Evidence about compilation status") class L2VerdictExtraction(BaseModel): @@ -114,8 +99,8 @@ class BuildAgentState(MessagesState): l1_affected_files: NotRequired[list[str]] l1_preliminary_verdict: NotRequired[str | None] runtime_prompt: NotRequired[str | None] - thought: NotRequired[BuilderThought | None] - observation: NotRequired[BuildObservation | None] + thought: NotRequired[CheckerThought | None] + observation: NotRequired[Observation | None] step: NotRequired[int] max_steps: NotRequired[int] @@ -276,18 +261,6 @@ async def harvest_build_data( ) -def format_l2_runtime_prompt( - vuln_id: str, - target_package: str, - l1_affected_files: list[str], - harvest_report: BuildHarvestReport, -) -> str: - """Build runtime prompt for L2 agent thought node. - - PLACEHOLDER - Implementation pending. - """ - # TODO: Implement prompt formatting - return f"L2 Build Agent analyzing {vuln_id} for {target_package}. Affected files: {l1_affected_files}" async def extract_l2_verdict( @@ -310,24 +283,346 @@ async def extract_l2_verdict( # --------------------------------------------------------------------------- -# Prompt Templates (placeholders) +# Investigation 1: Configuration Flags Prompts # --------------------------------------------------------------------------- -# TODO: Add prompt instruction for LLM to search build log for affected files -# from l1_result.affected_files to verify they were actually compiled. -# TODO: Add prompt instruction for LLM to check architecture flags (-m32/-m64, -# -march, -mtune) when CVE is architecture-specific. -L2_AGENT_SYS_PROMPT = """You are an L2 Build Agent analyzing whether vulnerable code is compiled into a binary. +L2_CONFIG_SYS_PROMPT = ( + "You are an L2 Build Agent investigating whether VULNERABLE CODE is DISABLED at build time.\n\n" + "GOAL: Determine if the CVE-affected feature/component is compiled into the binary.\n\n" + "EVIDENCE SOURCES:\n" + "1. Pre-harvested disabled features from build log (-D defines like OPENSSL_NO_SM2)\n" + "2. Pre-harvested disabled features from spec file (no-sm2, --disable-*, --without-*)\n" + "3. Build log (searchable) - verify affected source files were compiled\n\n" + "INVESTIGATION STEPS:\n" + "1. Identify the CVE-affected component/feature from the description\n" + "2. Check if that feature appears in disabled_features or spec_disabled_features\n" + "3. Search build log to verify affected files from L1 were actually compiled\n\n" + "VERDICTS:\n" + "- NOT_COMPILED: Feature is disabled OR affected files not in build log\n" + "- COMPILED: Feature is enabled AND affected files are compiled\n" + "- UNKNOWN: Cannot determine from available evidence" +) + +L2_CONFIG_PROMPT_TEMPLATE = """{sys_prompt} + + +CVE ID: {vuln_id} +Target Package: {target_package} +CVE Description: {cve_description} + + + +Affected Files (from L1 code analysis): +{l1_affected_files} + +L1 Preliminary Verdict: {l1_preliminary_verdict} + + + +Disabled Features (from build log -D defines): +{disabled_features} + +Disabled Features (from spec configure flags): +{spec_disabled_features} + + + +{tools} + + +{tool_instructions} + +RESPONSE: +{{""" + +L2_CONFIG_THOUGHT_INSTRUCTIONS = """ +1. You MUST select a tool ONLY from . Do NOT invent tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. FIRST check if CVE-affected feature appears in disabled_features or spec_disabled_features. +5. If feature not clearly disabled, search build log for affected files from L1. +6. Do NOT call the same tool with the same input twice. + + + +{{"thought": "CVE affects SM2 crypto. Checking if sm2 appears in disabled features list.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "sm2", "reason": "Search for SM2 in disabled features context"}}, "final_answer": null}} + + + +{{"thought": "SM2 not in disabled features. Now verify affected file crypto/sm2/sm2.c was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "sm2.c", "reason": "Check if affected source file appears in build log"}}, "final_answer": null}} + + + +{{"thought": "Found no-sm2 in spec_disabled_features. SM2 code is not compiled.", "mode": "finish", "actions": null, "final_answer": "NOT_COMPILED. The spec file configures with 'no-sm2' flag, which disables SM2 cryptographic functions. The vulnerable code in crypto/sm2/ is not compiled into the binary."}} + + + +{{"thought": "SM2 not disabled. Found sm2.c compilation in build log.", "mode": "finish", "actions": null, "final_answer": "COMPILED. SM2 is not in disabled features. Build log shows 'gcc -c crypto/sm2/sm2_crypt.c -o sm2_crypt.o', confirming the vulnerable code is compiled into the binary."}} + + + +{{"thought": "Cannot find evidence either way. Affected files not in build log but feature not disabled.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. The feature is not explicitly disabled, but the affected files do not appear in the build log. Cannot determine compilation status."}} +""" -PLACEHOLDER - Full prompt to be implemented. -""" -L2_THOUGHT_INSTRUCTIONS = """Based on the build evidence, determine if the affected files are compiled. +# --------------------------------------------------------------------------- +# Investigation 2: Hardening Flags Prompts +# --------------------------------------------------------------------------- -PLACEHOLDER - Full instructions to be implemented. -""" +L2_HARDENING_SYS_PROMPT = ( + "You are an L2 Build Agent investigating COMPILER HARDENING mitigations.\n\n" + "GOAL: Determine if hardening flags relevant to this CVE's vulnerability class are present.\n\n" + "CONTEXT: Investigation 1 determined the vulnerable code IS compiled. Now check if\n" + "compiler/linker hardening makes exploitation significantly harder.\n\n" + "EVIDENCE SOURCES:\n" + "1. Expected hardening flags (from CWE-based knowledge base)\n" + "2. Build log (searchable) - verify presence of hardening flags\n\n" + "INVESTIGATION STEPS:\n" + "1. Review expected_hardening flags for this CVE's CWE class\n" + "2. Search build log for each expected hardening flag\n" + "3. Determine if critical mitigations are present\n\n" + "VERDICTS:\n" + "- MITIGATED: Key hardening flags present that reduce exploitability\n" + "- NOT_MITIGATED: Hardening flags absent, vulnerability fully exploitable\n" + "- UNKNOWN: Cannot determine hardening status from available evidence" +) + +L2_HARDENING_PROMPT_TEMPLATE = """{sys_prompt} + + +CVE ID: {vuln_id} +Target Package: {target_package} +CWE: {cwe_id} + + + +The following compiler/linker flags mitigate this vulnerability class: + +{expected_hardening_table} + + + +{tools} + + +{tool_instructions} + +RESPONSE: +{{""" + +L2_HARDENING_THOUGHT_INSTRUCTIONS = """ +1. You MUST select a tool ONLY from . Do NOT invent tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. Search build log for EACH expected hardening flag from the table. +5. Focus on flags most relevant to the CWE (e.g., stack protector for CWE-121). +6. Do NOT call the same tool with the same input twice. + + + +{{"thought": "CWE-121 stack overflow. Searching for -fstack-protector in build log.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "-fstack-protector", "reason": "Check for stack protector hardening"}}, "final_answer": null}} + + + +{{"thought": "Found -fstack-protector-strong. Now check for FORTIFY_SOURCE.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "FORTIFY_SOURCE", "reason": "Check for FORTIFY_SOURCE buffer overflow protection"}}, "final_answer": null}} + + + +{{"thought": "Found both stack protector and FORTIFY_SOURCE. Critical mitigations present.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Build log shows '-fstack-protector-strong' and '-D_FORTIFY_SOURCE=2'. These flags detect stack buffer overflows at runtime, making exploitation of CWE-121 significantly harder."}} + + + +{{"thought": "No stack protector found. Vulnerability is fully exploitable.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. No stack protection flags found in build log. The CWE-121 stack overflow vulnerability can be exploited without runtime detection."}} + + + +{{"thought": "Build log incomplete. Cannot verify hardening status.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. Build log does not contain compilation commands with flag details. Cannot determine if hardening mitigations are present."}} +""" -L2_VERDICT_EXTRACTION_PROMPT = """Extract the compilation verdict from the analysis. -PLACEHOLDER - Full prompt to be implemented. -""" +# --------------------------------------------------------------------------- +# Verdict Extraction Prompts +# --------------------------------------------------------------------------- + +L2_CONFIG_VERDICT_PROMPT = """Extract the compilation verdict from the L2 Configuration investigation. + + +{final_answer} + + +Extract: +1. compilation_status: "compiled", "not_compiled", or "unknown" +2. confidence: 0.0 to 1.0 based on evidence strength +3. reasoning: Brief explanation (1-2 sentences) +4. override_verdict: "not_vulnerable" if not_compiled, null otherwise + +Output JSON only: +{{"compilation_status": "...", "confidence": 0.X, "reasoning": "...", "override_verdict": ...}}""" + +L2_HARDENING_VERDICT_PROMPT = """Extract the hardening verdict from the L2 Hardening investigation. + + +{final_answer} + + +Extract: +1. hardening_status: "mitigated", "not_mitigated", "not_applicable", or "unknown" + - "not_applicable": This CWE class has no compiler-level mitigations available +2. confidence: 0.0 to 1.0 based on evidence strength +3. reasoning: Brief explanation (1-2 sentences) +4. override_verdict: "vulnerable_mitigated" if mitigated, null otherwise (also null for not_applicable) + +Output JSON only: +{{"hardening_status": "...", "confidence": 0.X, "reasoning": "...", "override_verdict": ...}}""" + + +# --------------------------------------------------------------------------- +# L2 Observation Node Prompts (Comprehension & Memory Update) +# --------------------------------------------------------------------------- + +L2_COMPREHENSION_PROMPT = """Analyze the tool output for L2 build/compilation verification. +GOAL: Determine whether {vuln_id} vulnerable code is COMPILED in {target_package} + + +L1 Affected Files: {l1_affected_files} +Disabled Features (build log): {disabled_features} +Disabled Features (spec file): {spec_disabled_features} + + +TOOL USED: {tool_used} +TOOL INPUT: {tool_input} +THOUGHT: {last_thought} +NEW OUTPUT: +{tool_output} + +BUILD ANALYSIS RULES: +1. CHECK if tool output shows: + - Compilation commands for affected files (e.g., gcc -c file.c -o file.o) + - Feature-disable flags that match the CVE-affected component + - Object files or compilation artifacts for affected code + +2. COMPILATION EVIDENCE: + - COMPILED: Found gcc/compile commands for affected files + - NOT_COMPILED: Feature is disabled OR affected files not in build + - UNKNOWN: Insufficient evidence + +3. RECORD specific file paths, compile commands, or flag matches. + +TOOL-SPECIFIC RULES: +- If NEW OUTPUT is empty or error: "FAILED: [tool] [input] - [reason]" +- Source Grep: Check if matches show compilation or feature disabling +- Build log search: Look for compile commands and disabled features + +OUTPUT: +- findings: 2-4 key observations about compilation status +- tool_outcome: "Source Grep [pattern] -> found in build.log:123" +RESPONSE: +{{""" + +L2_MEMORY_UPDATE_PROMPT = """Merge findings into L2 build investigation memory. +GOAL: Determine whether {vuln_id} vulnerable code is COMPILED in {target_package} + +PREVIOUS MEMORY: {previous_memory} +NEW FINDINGS: {findings} +TOOL CALL RECORD: {tool_outcome} + +MEMORY RULES: +1. Append NEW FINDINGS to PREVIOUS MEMORY. No duplicates. +2. Add TOOL CALL RECORD verbatim. +3. If NEW FINDINGS report a failure, add the failure to memory. + +COMPILATION TRACKING: +- Affected file COMPILED: "FILE_COMPILED: [file] - evidence: [compile command]" +- Affected file NOT_COMPILED: "FILE_NOT_COMPILED: [file] - evidence: [disabled feature]" +- Feature DISABLED: "FEATURE_DISABLED: [feature] in [build_log/spec]" +- Feature ENABLED: "FEATURE_ENABLED: [feature] - no disable flag found" + +VERDICT EVIDENCE: +- NOT_COMPILED evidence: feature disabled OR affected files not compiled +- COMPILED evidence: affected files appear in compile commands +- UNKNOWN: conflicting evidence or no compilation info found + +- results: copy the NEW FINDINGS as-is. +- memory: updated cumulative findings with evidence tags. +RESPONSE: +{{""" + + +# --------------------------------------------------------------------------- +# L2 Hardening Observation Node Prompts (Comprehension & Memory Update) +# --------------------------------------------------------------------------- + +L2_HARDENING_COMPREHENSION_PROMPT = """Analyze the tool output for L2 hardening flag verification. +GOAL: Determine whether {vuln_id} has HARDENING mitigations in {target_package} + + +CWE: {cwe_id} +Expected Hardening Flags: +{expected_hardening} + + +TOOL USED: {tool_used} +TOOL INPUT: {tool_input} +THOUGHT: {last_thought} +NEW OUTPUT: +{tool_output} + +HARDENING ANALYSIS RULES: +1. FIRST CHECK Expected Hardening Flags above: + - If "None" or empty: This CWE has NO known compiler-level mitigations + - Mark findings as "NO_RELEVANT_HARDENING: {cwe_id} has no compiler mitigations" + - Skip searching for generic flags - they won't help this vulnerability class + +2. IF expected hardening flags exist, CHECK tool output for: + - Compiler hardening flags (e.g., -fstack-protector, -fPIE, -fstack-clash-protection) + - Preprocessor defines (e.g., -D_FORTIFY_SOURCE=2, -D_GLIBCXX_ASSERTIONS) + - Linker hardening flags (e.g., -Wl,-z,relro, -Wl,-z,now) + +3. HARDENING EVIDENCE: + - FLAG_PRESENT: Found expected hardening flag in build commands + - FLAG_ABSENT: Searched but did not find expected flag + - NOT_APPLICABLE: No compiler mitigations exist for this CWE class + - UNKNOWN: Insufficient evidence + +4. RECORD specific flags found and their context (compilation line). + +TOOL-SPECIFIC RULES: +- If NEW OUTPUT is empty or error: "FAILED: [tool] [input] - [reason]" +- Source Grep: Check if matches show hardening flags in gcc/clang commands +- Build log search: Look for -f*, -D*, -Wl,* patterns + +OUTPUT: +- findings: 2-4 key observations about hardening flags +- tool_outcome: "Source Grep [pattern] -> found in build.log:123" +RESPONSE: +{{""" + +L2_HARDENING_MEMORY_UPDATE_PROMPT = """Merge findings into L2 hardening investigation memory. +GOAL: Determine whether {vuln_id} has HARDENING mitigations in {target_package} + +CWE: {cwe_id} +PREVIOUS MEMORY: {previous_memory} +NEW FINDINGS: {findings} +TOOL CALL RECORD: {tool_outcome} + +MEMORY RULES: +1. Append NEW FINDINGS to PREVIOUS MEMORY. No duplicates. +2. Add TOOL CALL RECORD verbatim. +3. If NEW FINDINGS report a failure, add the failure to memory. + +HARDENING TRACKING: +- No relevant hardening: "NO_RELEVANT_HARDENING: [CWE] has no compiler mitigations" +- Flag FOUND: "HARDENING_PRESENT: [flag] - evidence: [build command excerpt]" +- Flag NOT FOUND: "HARDENING_ABSENT: [flag] - searched but not found" +- Critical mitigation: "CRITICAL_MITIGATION: [flag] for [CWE] - [present/absent]" + +VERDICT EVIDENCE: +- NOT_APPLICABLE evidence: this CWE class has no compiler-level mitigations +- MITIGATED evidence: key hardening flags present that reduce exploitability +- NOT_MITIGATED evidence: expected hardening flags absent +- UNKNOWN: build log incomplete or no compilation commands found + +- results: copy the NEW FINDINGS as-is. +- memory: updated cumulative findings with hardening evidence tags. +RESPONSE: +{{""" diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index df8b21270..06ccf09ae 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -35,22 +35,32 @@ from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode -from langchain_core.messages import HumanMessage, AIMessage, SystemMessage +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage from nat.builder.context import Context from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from vuln_analysis.functions.react_internals import CheckerThought, CodeFindings, Observation, FORCED_FINISH_PROMPT + from vuln_analysis.functions.build_agent_graph_defs import ( BuildAgentState, BuildHarvestReport, - BuilderThought, BuildObservation, harvest_build_data, format_l2_runtime_prompt, extract_l2_verdict, + L2_CONFIG_PROMPT_TEMPLATE, + L2_CONFIG_SYS_PROMPT, + L2_CONFIG_THOUGHT_INSTRUCTIONS, + L2_COMPREHENSION_PROMPT, + L2_MEMORY_UPDATE_PROMPT, + L2_HARDENING_PROMPT_TEMPLATE, + L2_HARDENING_SYS_PROMPT, + L2_HARDENING_THOUGHT_INSTRUCTIONS, ) from vuln_analysis.runtime_context import ctx_state - +import uuid +import tiktoken logger = LoggingFactory.get_agent_logger(__name__) @@ -71,6 +81,26 @@ class CVEBuildAgentConfig(FunctionBaseConfig, name="cve_build_agent"): default=10, description="The maximum number of iterations for the agent.", ) + context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.") + +def _build_tool_strategy(tool_names: list[str]) -> str: + """Generate tool usage guidance based on available tools.""" + strategies = [] + tool_names_lower = [t.lower().replace("_", " ") for t in tool_names] + + if any("grep" in t for t in tool_names_lower): + strategies.append("- Use Source Grep for exact code patterns from patch (function names, variable names, specific code)") + if any("keyword" in t or "search" in t for t in tool_names_lower): + strategies.append("- Use Code Keyword Search for broader concept searches when grep fails") + if any("read" in t for t in tool_names_lower): + strategies.append("- Use Read File to examine full context around matches") + + return "\n".join(strategies) if strategies else "Use available tools to search for vulnerable and fixed code patterns." + + +class L2InvestigationPhase(StrEnum): + CONFIGURATION = "configuration" + HARDENING = "hardening" async def create_graph_build_agent( @@ -96,9 +126,14 @@ async def create_graph_build_agent( llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) - thought_llm = llm.with_structured_output(BuilderThought) # Reserved for implementation + thought_llm = llm.with_structured_output(CheckerThought) + comprehension_llm = llm.with_structured_output(CodeFindings) + observation_llm = llm.with_structured_output(Observation) tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None - + enabled_tool_names = [tool.name for tool in tools] + tool_descriptions_list = [t.name + ": " + t.description for t in tools] + tools_str = "\n".join(tool_descriptions_list) + tool_strategy = _build_tool_strategy(enabled_tool_names) # Extract context from state (guaranteed by early exit checks in _arun) ctx = state.info.checker_context assert ctx is not None, "checker_context must exist (checked in _arun)" @@ -123,15 +158,81 @@ async def create_graph_build_agent( # Extract CWE ID from intel (if available) cwe_id = None intel_list = state.info.intel - if intel_list and len(intel_list) > 0: - intel = intel_list[0] - if intel.nvd and intel.nvd.cwe_id: + intel = intel_list[0] + if intel.nvd and intel.nvd.cwe_id: cwe_id = intel.nvd.cwe_id logger.info("build_agent: CWE ID from intel: %s", cwe_id) - + descriptions: list[tuple[str, str]] = [] + if intel.ghsa: + cve_text = intel.ghsa.description or intel.ghsa.summary or "" + if cve_text: + descriptions.append(("ghsa", cve_text)) + if intel.ubuntu and intel.ubuntu.description: + descriptions.append(("ubuntu", intel.ubuntu.description)) + + cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + _tiktoken_enc = tiktoken.get_encoding("cl100k_base") + + investigation_stack: list[L2InvestigationPhase] = [] + investigation_stack.append(L2InvestigationPhase.HARDENING) + investigation_stack.append(L2InvestigationPhase.CONFIGURATION) # ------------------------------------------------------------------------- # Node definitions (PLACEHOLDER implementations) # ------------------------------------------------------------------------- + def _count_tokens(text: str) -> int: + """Count tokens using tiktoken cl100k_base encoding (~90-95% accurate for Llama 3.1).""" + try: + return len(_tiktoken_enc.encode(text)) + except Exception: + return len(text) // 4 + + def _estimate_tokens(runtime_prompt: str, messages: list, observation: Observation | None) -> int: + """Estimate the token count thought_node will send to the LLM.""" + parts = [runtime_prompt] + for msg in messages: + if hasattr(msg, "content") and isinstance(msg.content, str): + parts.append(msg.content) + if observation is not None: + for item in (observation.memory or []): + parts.append(item) + for item in (observation.results or []): + parts.append(item) + return _count_tokens("\n".join(parts)) + # ------------------------------------------------------------------------- + # Data Harvest Node + # ------------------------------------------------------------------------- + async def build_runtime_prompt(harvest_report: BuildHarvestReport) -> str: + """Generate the runtime prompt for the current investigation phase.""" + current_phase = investigation_stack[-1] + runtime_prompt = "" + if current_phase == L2InvestigationPhase.CONFIGURATION: + runtime_prompt = L2_CONFIG_PROMPT_TEMPLATE.format( + sys_prompt=L2_CONFIG_SYS_PROMPT, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + l1_affected_files=l1_affected_files, + l1_preliminary_verdict=l1_preliminary_verdict, + disabled_features=harvest_report.disabled_features, + spec_disabled_features=harvest_report.spec_disabled_features, + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L2_CONFIG_THOUGHT_INSTRUCTIONS, + ) + return runtime_prompt + elif current_phase == L2InvestigationPhase.HARDENING: + runtime_prompt = L2_HARDENING_PROMPT_TEMPLATE.format( + sys_prompt=L2_HARDENING_SYS_PROMPT, + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + l1_affected_files=l1_affected_files, + l1_preliminary_verdict=l1_preliminary_verdict, + hardening_report=harvest_report.expected_hardening, + ) + return runtime_prompt + else: + raise ValueError(f"Unknown investigation phase: {current_phase}") async def data_harvest_node(state: BuildAgentState) -> dict: """Harvest build data from artifacts. @@ -153,17 +254,13 @@ async def data_harvest_node(state: BuildAgentState) -> dict: cwe_id=cwe_id, ) - runtime_prompt = format_l2_runtime_prompt( - vuln_id=vuln_id, - target_package=target_package.name if target_package else "unknown", - l1_affected_files=l1_affected_files, - harvest_report=harvest_report, - ) - + runtime_prompt = await build_runtime_prompt(harvest_report) + span.set_output({ - "compiled_files_count": len(harvest_report.compiled_files), - "l1_affected_files_count": len(l1_affected_files), + "disabled_features_count": len(harvest_report.disabled_features), + "spec_disabled_features_count": len(harvest_report.spec_disabled_features), "expected_hardening_count": len(harvest_report.expected_hardening), + "l1_affected_files_count": len(l1_affected_files), }) return { @@ -182,28 +279,35 @@ async def thought_node(state: BuildAgentState) -> dict: step_num = state.get("step", 0) logger.info("thought_node: starting step %d", step_num) - runtime_prompt = state.get("runtime_prompt") or "Analyze build compilation status." + runtime_prompt = state.get("runtime_prompt") _messages = [SystemMessage(content=runtime_prompt)] + state["messages"] # Reserved for LLM call with tracer.push_active_function("thought_node", input_data={}) as span: - # TODO: Implement actual LLM call - # For skeleton, create a placeholder finish response - response = BuilderThought( - thought="Placeholder: Build analysis not yet implemented.", - mode="finish", - action=None, - action_input=None, - final_answer="L2 Build Agent skeleton - implementation pending.", - ) - + obs = state.get("observation", None) + if obs is not None: + memory_list = obs.memory if obs.memory else ["No prior knowledge."] + recent_findings = obs.results if obs.results else ["No recent findings."] + memory_context = "\n".join(f"- {m}" for m in memory_list) + findings_context = "\n".join(f"- {f}" for f in recent_findings) + context_block = f"KNOWLEDGE:\n{memory_context}\nLATEST FINDINGS:\n{findings_context}" + _messages.append(SystemMessage(content=context_block)) + + response: CheckerThought = await thought_llm.ainvoke(_messages) + if response.mode == "finish": ai_message = AIMessage(content=response.final_answer or "Analysis complete.") else: - ai_message = AIMessage(content=response.thought) - + tool_name = response.actions.tool + arguments = response.actions.query + tool_call_id = str(uuid.uuid4()) + ai_message = AIMessage( + content=response.thought, + tool_calls=[{"name": tool_name, "args": {"query": arguments}, "id": tool_call_id}] + ) span.set_output({ "thought": response.thought, "mode": response.mode, + "actions": response.actions, "final_answer": response.final_answer, }) @@ -215,36 +319,135 @@ async def thought_node(state: BuildAgentState) -> dict: } async def observation_node(state: BuildAgentState) -> dict: - """Process tool output. - - PLACEHOLDER - Process tool results and update observations. - """ + """Process tool output: comprehension -> memory update for build analysis.""" logger.info("observation_node: starting") + tool_message = state["messages"][-1] + last_thought = state.get("thought") + if not last_thought: + return { + "messages": [AIMessage(content="No thought found")], + } + last_thought_text = last_thought.thought + tool_used = last_thought.actions.tool + tool_input_detail = last_thought.actions.query + previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."] + + harvest_report = state.get("harvest_report") or BuildHarvestReport() + target_package_name = target_package.name if target_package else "unknown" + + with tracer.push_active_function("observation_node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: + tool_output_for_llm = tool_message.content + + # Step 1: Comprehension - extract findings from tool output + comp_prompt = L2_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + l1_affected_files=", ".join(l1_affected_files) if l1_affected_files else "None", + disabled_features=", ".join(harvest_report.disabled_features) if harvest_report.disabled_features else "None", + spec_disabled_features=", ".join(harvest_report.spec_disabled_features) if harvest_report.spec_disabled_features else "None", + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=tool_output_for_llm[:8000], + ) + code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) + findings_text = "\n".join(f"- {f}" for f in code_findings.findings) - with tracer.push_active_function("observation_node", input_data={}) as span: - # TODO: Implement tool output processing - observation = BuildObservation( - findings=["Placeholder observation"], - compilation_evidence=None, + # Step 2: Memory update - merge findings into cumulative memory + mem_prompt = L2_MEMORY_UPDATE_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, + findings=findings_text, + tool_outcome=code_findings.tool_outcome, ) - span.set_output({"findings": observation.findings}) + new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) + + messages = state["messages"] + active_prompt = state.get("runtime_prompt") or "" + estimated = _estimate_tokens(active_prompt, messages, new_observation) + orig_estimated = estimated + prune_messages = [] + if estimated > config.context_window_token_limit and len(messages) > 3: + with tracer.push_active_function("context_pruning", input_data={"estimated_tokens": estimated, "limit": config.context_window_token_limit}) as prune_span: + prunable = messages[1:-2] + for msg in prunable: + prune_messages.append(RemoveMessage(id=msg.id)) + estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0 + if estimated <= config.context_window_token_limit: + break + logger.info( + "Context pruning: removed %d messages, estimated tokens now ~%d (limit %d)", + len(prune_messages), estimated, config.context_window_token_limit, + ) + prune_span.set_output({ + "pruning_triggered": len(prune_messages) > 0, + "messages_pruned": len(prune_messages), + "tokens_before": orig_estimated, + "tokens_after": estimated, + }) - return { - "messages": [AIMessage(content="Observation processed.")], - "observation": observation, - } - async def forced_finish_node(state: BuildAgentState) -> dict: - """Force finish when max iterations reached.""" - logger.info("forced_finish_node: max iterations reached") + span.set_output({ + "last_thought_text": last_thought_text, + "tool_output_for_llm": tool_output_for_llm[:500], + "findings": code_findings.findings, + "tool_outcome": code_findings.tool_outcome, + "new_memory": new_observation.memory, + "amount_of_orig_tokens": orig_estimated, + "amount_of_estimated_tokens": estimated, + }) return { - "messages": [AIMessage(content="Max iterations reached, forcing finish.")], - "thought": BuilderThought( - thought="Max iterations reached.", - mode="finish", - final_answer="Unable to determine compilation status within iteration limit.", - ), + "messages": prune_messages, + "observation": new_observation, } + + async def forced_finish_node(state: BuildAgentState) -> dict: + """Force finish when max iterations reached. + + Invokes the LLM with FORCED_FINISH_PROMPT to generate a final answer + based on evidence gathered so far. + """ + step_num = state.get("step", 0) + with tracer.push_active_function("forced_finish_node", input_data=f"step:{step_num}") as span: + try: + active_prompt = state.get("runtime_prompt") or "" + messages = [SystemMessage(content=active_prompt)] + state["messages"] + messages.append(HumanMessage(content=FORCED_FINISH_PROMPT)) + + obs = state.get("observation") + if obs is not None and obs.memory: + memory_context = "\n".join(f"- {m}" for m in obs.memory) + messages.append(SystemMessage(content=f"KNOWLEDGE:\n{memory_context}")) + + response: CheckerThought = await thought_llm.ainvoke(messages) + + if response.mode == "finish" and response.final_answer: + ai_message = AIMessage(content=response.final_answer) + final_answer = response.final_answer + else: + final_answer = "Unable to determine compilation status within iteration limit." + ai_message = AIMessage(content=final_answer) + response = CheckerThought( + thought=response.thought or "Max steps exceeded", + mode="finish", + actions=None, + final_answer=final_answer, + ) + + span.set_output({"final_answer_length": len(final_answer), "step": step_num}) + return { + "messages": [ai_message], + "thought": response, + "step": step_num, + "max_steps": state.get("max_steps", config.max_iterations), + "observation": state.get("observation"), + "output": final_answer, + } + except Exception as e: + logger.exception("forced_finish_node failed at step %d", step_num) + span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num}) + raise async def should_continue(state: BuildAgentState) -> str: """Route based on thought mode.""" diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index a74b26570..3a6e25a58 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -700,7 +700,7 @@ async def observation_node(state: CodeAgentState) -> dict: "amount_of_estimated_tokens": estimated, }) return { - "messages": prune_messages + [AIMessage(content=f"Observation: {code_findings.tool_outcome}")], + "messages": prune_messages, "observation": new_observation, } From 1b16ad7eabfc5b3130d5ab6f6ee438a4e9375e84 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 6 May 2026 10:48:05 +0300 Subject: [PATCH 29/46] not working yet --- .../functions/build_agent_graph_defs.py | 30 ++---------- .../functions/cve_build_agent.py | 48 ++++++++++++++++++- 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index fd4c5e962..be411a779 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -73,7 +73,7 @@ class BuildHarvestReport(BaseModel): -class L2VerdictExtraction(BaseModel): +class L2CompileVerdictExtraction(BaseModel): """LLM-extracted verdict from L2 agent final answer.""" compilation_status: Literal["compiled", "not_compiled", "unknown"] = Field( @@ -81,10 +81,6 @@ class L2VerdictExtraction(BaseModel): ) confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict") reasoning: str = Field(description="Brief explanation of the verdict") - override_verdict: Literal["not_vulnerable", "vulnerable_mitigated", None] = Field( - default=None, - description="L2 verdict override if applicable", - ) # --------------------------------------------------------------------------- @@ -103,7 +99,7 @@ class BuildAgentState(MessagesState): observation: NotRequired[Observation | None] step: NotRequired[int] max_steps: NotRequired[int] - + L2CompileVerdict: NotRequired[L2CompileVerdictExtraction | None] # --------------------------------------------------------------------------- # Spec File Parsing Helpers @@ -263,23 +259,6 @@ async def harvest_build_data( -async def extract_l2_verdict( - llm, - final_answer: str, - harvest_report: BuildHarvestReport, - tracer, -) -> L2VerdictExtraction: - """Extract structured verdict from L2 agent final answer. - - PLACEHOLDER - Implementation pending. - """ - # TODO: Implement LLM-based verdict extraction - return L2VerdictExtraction( - compilation_status="unknown", - confidence=0.0, - reasoning="Placeholder - not implemented", - override_verdict=None, - ) # --------------------------------------------------------------------------- @@ -444,7 +423,7 @@ async def extract_l2_verdict( # Verdict Extraction Prompts # --------------------------------------------------------------------------- -L2_CONFIG_VERDICT_PROMPT = """Extract the compilation verdict from the L2 Configuration investigation. +L2_COMPILATION_VERDICT_PROMPT = """Extract the compilation verdict from the L2 Configuration investigation. {final_answer} @@ -454,10 +433,9 @@ async def extract_l2_verdict( 1. compilation_status: "compiled", "not_compiled", or "unknown" 2. confidence: 0.0 to 1.0 based on evidence strength 3. reasoning: Brief explanation (1-2 sentences) -4. override_verdict: "not_vulnerable" if not_compiled, null otherwise Output JSON only: -{{"compilation_status": "...", "confidence": 0.X, "reasoning": "...", "override_verdict": ...}}""" +{{"compilation_status": "...", "confidence": 0.X, "reasoning": "..."}}""" L2_HARDENING_VERDICT_PROMPT = """Extract the hardening verdict from the L2 Hardening investigation. diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index 06ccf09ae..c0c427a3d 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -57,6 +57,8 @@ L2_HARDENING_PROMPT_TEMPLATE, L2_HARDENING_SYS_PROMPT, L2_HARDENING_THOUGHT_INSTRUCTIONS, + L2CompileVerdictExtraction, + L2_COMPILATION_VERDICT_PROMPT ) from vuln_analysis.runtime_context import ctx_state import uuid @@ -122,13 +124,14 @@ async def create_graph_build_agent( TOOL_NODE = "tool_node" OBSERVATION_NODE = "observation_node" FORCED_FINISH_NODE = "forced_finish" - + INVESTIGATION_PHASE_NODE = "investigation_phase" llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) thought_llm = llm.with_structured_output(CheckerThought) comprehension_llm = llm.with_structured_output(CodeFindings) observation_llm = llm.with_structured_output(Observation) + compilation_verdict_llm = llm.with_structured_output(L2CompileVerdictExtraction) tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] @@ -457,8 +460,48 @@ async def should_continue(state: BuildAgentState) -> str: if state.get("step", 0) >= state.get("max_steps", config.max_iterations): return FORCED_FINISH_NODE return TOOL_NODE - + + + async def investigation_phase_node(state: BuildAgentState) -> dict: + """Determine the next investigation phase.""" + if len(investigation_stack) == 0: + raise ValueError("Investigation stack is empty") + current_phase = investigation_stack[-1] + with tracer.push_active_function("investigation_phase_node", input_data=f"phase :{current_phase}") as span: + investigation_stack.pop() + if current_phase == L2InvestigationPhase.CONFIGURATION: + verdict: L2CompileVerdictExtraction = await compilation_verdict_llm.ainvoke([SystemMessage(content=L2_COMPILATION_VERDICT_PROMPT.format(final_answer=state.get("final_answer")))]) + span.set_output({ + "compilation_status": verdict.compilation_status, + "confidence": verdict.confidence, + "reasoning": verdict.reasoning, + }) + if verdict.compilation_status == "not_compiled": + return {"L2CompileVerdict": verdict,} + else: + # next state is the hardening phase + preprocess_data = state.get("harvest_report") or BuildHarvestReport() + runtime_prompt = await build_runtime_prompt(preprocess_data) + span.set_output({ + "runtime_prompt": runtime_prompt,}) + return { + "runtime_prompt": runtime_prompt, + "thought": None, + "observation": None, + "step": 0 + } + else: + #state that run was hardening need to extract the hardening verdict + verdict: L2HardeningVerdictExtraction = await hardening_verdict_llm.ainvoke([SystemMessage(content=L2_HARDENING_VERDICT_PROMPT.format(final_answer=state.get("final_answer")))]) + span.set_output({ + "hardening_status": verdict.hardening_status, + "confidence": verdict.confidence, + "reasoning": verdict.reasoning, + }) + return { + "L2HardeningVerdict": verdict, + } # ------------------------------------------------------------------------- # Build graph # ------------------------------------------------------------------------- @@ -470,6 +513,7 @@ async def should_continue(state: BuildAgentState) -> str: flow.add_node(FORCED_FINISH_NODE, forced_finish_node) flow.add_node(OBSERVATION_NODE, observation_node) flow.add_node(TOOL_NODE, tools_node) + flow.add_node(INVESTIGATION_PHASE_NODE, investigation_phase_node) flow.add_edge(START, DATA_HARVEST_NODE) flow.add_edge(DATA_HARVEST_NODE, THOUGHT_NODE) From d7794df4cc90b2824f099d8129cb0797294eb3bf Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 6 May 2026 20:49:25 +0300 Subject: [PATCH 30/46] E2E - with bugs --- .../functions/build_agent_graph_defs.py | 42 ++- .../functions/code_agent_graph_defs.py | 244 +++++++++++++----- .../functions/cve_build_agent.py | 105 +++++--- .../functions/cve_package_code_agent.py | 22 +- src/vuln_analysis/tools/source_grep.py | 9 +- src/vuln_analysis/tools/source_inspector.py | 22 +- 6 files changed, 304 insertions(+), 140 deletions(-) diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index be411a779..dd05b6506 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -82,6 +82,14 @@ class L2CompileVerdictExtraction(BaseModel): confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict") reasoning: str = Field(description="Brief explanation of the verdict") +class L2HardeningVerdictExtraction(BaseModel): + """LLM-extracted verdict from L2 Hardening investigation.""" + hardening_status: Literal["mitigated", "not_mitigated", "not_applicable", "unknown"] = Field( + description="Whether hardening flags mitigate the vulnerability" + ) + confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict") + reasoning: str = Field(description="Brief explanation of the verdict") + # --------------------------------------------------------------------------- # Graph State @@ -100,6 +108,7 @@ class BuildAgentState(MessagesState): step: NotRequired[int] max_steps: NotRequired[int] L2CompileVerdict: NotRequired[L2CompileVerdictExtraction | None] + L2HardeningVerdict: NotRequired[L2HardeningVerdictExtraction | None] # --------------------------------------------------------------------------- # Spec File Parsing Helpers @@ -314,7 +323,16 @@ async def harvest_build_data( RESPONSE: {{""" -L2_CONFIG_THOUGHT_INSTRUCTIONS = """ +L2_CONFIG_THOUGHT_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + 1. You MUST select a tool ONLY from . Do NOT invent tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. @@ -356,9 +374,15 @@ async def harvest_build_data( "EVIDENCE SOURCES:\n" "1. Expected hardening flags (from CWE-based knowledge base)\n" "2. Build log (searchable) - verify presence of hardening flags\n\n" + "IMPORTANT - RHEL/Fedora Specs Files:\n" + "When you see these specs files in build logs, hardening flags are IMPLICITLY enabled:\n" + "- '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' => -fPIE (position-independent code for ASLR)\n" + "- '-specs=/usr/lib/rpm/redhat/redhat-hardened-ld' => -pie + -z now (PIE linking + BIND_NOW/Full RELRO)\n" + "These flags will NOT appear explicitly in the build log - the specs file injects them.\n" + "If you see these specs files, count the corresponding protections as PRESENT.\n\n" "INVESTIGATION STEPS:\n" "1. Review expected_hardening flags for this CVE's CWE class\n" - "2. Search build log for each expected hardening flag\n" + "2. Search build log for each expected hardening flag OR corresponding specs file\n" "3. Determine if critical mitigations are present\n\n" "VERDICTS:\n" "- MITIGATED: Key hardening flags present that reduce exploitability\n" @@ -389,7 +413,16 @@ async def harvest_build_data( RESPONSE: {{""" -L2_HARDENING_THOUGHT_INSTRUCTIONS = """ +L2_HARDENING_THOUGHT_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + 1. You MUST select a tool ONLY from . Do NOT invent tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. @@ -448,10 +481,9 @@ async def harvest_build_data( - "not_applicable": This CWE class has no compiler-level mitigations available 2. confidence: 0.0 to 1.0 based on evidence strength 3. reasoning: Brief explanation (1-2 sentences) -4. override_verdict: "vulnerable_mitigated" if mitigated, null otherwise (also null for not_applicable) Output JSON only: -{{"hardening_status": "...", "confidence": 0.X, "reasoning": "...", "override_verdict": ...}}""" +{{"hardening_status": "...", "confidence": 0.X, "reasoning": "..."}}""" # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index b85988bff..dec0bf8c1 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -1260,78 +1260,156 @@ async def upstream_search_preprocess( RESPONSE: {{""" -L1_AGENT_THOUGHT_INSTRUCTIONS = """ +L1_AGENT_THOUGHT_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. 4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. -6. Do NOT call the same tool with the same input twice. +6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. 7. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. 8. If a pattern contains special regex characters, escape them or use literal substrings. + + +If a search returned results: +- Narrow down by searching within that specific file (e.g., "pattern,filename.c") +- Search for related symbols or variables from the code found +If a pattern wasn't found: +- Try simpler substrings or partial patterns +- Try a different tool (Source Grep <-> Code Keyword Search) +- Search for file paths from PATCH_CONTEXT + + -{{"thought": "Search for the vulnerable function from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code pattern"}}, "final_answer": null}} +{{"thought": "No prior searches in KNOWLEDGE. Search for the vulnerable function from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code pattern"}}, "final_answer": null}} -{{"thought": "Source Grep found no results. Try broader search with Code Keyword Search", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Broader search for the function"}}, "final_answer": null}} +{{"thought": "KNOWLEDGE shows Source Grep found no results for X. Try broader search with Code Keyword Search", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Broader search for the function"}}, "final_answer": null}} -{{"thought": "Found code. Now verify if it matches vulnerable or fixed version", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable pattern exists"}}, "final_answer": null}} +{{"thought": "KNOWLEDGE shows function found at file.c:123. Search for specific vulnerable pattern in that file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",file.c", "reason": "Check if vulnerable pattern exists in the found file"}}, "final_answer": null}} + +{{"thought": "KNOWLEDGE shows fix pattern not found. Search for the key variable from fix to understand current code", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Find how the vulnerable variable is currently handled"}}, "final_answer": null}} + -{{"thought": "Evidence gathered", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} +{{"thought": "KNOWLEDGE has sufficient evidence: vulnerable code at X, fix absent", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} """ -L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS = """ +L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. 4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. -6. Do NOT call the same tool with the same input twice. +6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. 7. FIRST search for VULNERABLE code (removed lines) - it SHOULD exist in target. 8. THEN search for FIX code (added lines) - it should NOT exist in target. 9. If a pattern contains special regex characters, escape them or use literal substrings. + + +If a search returned results: +- Narrow down by searching within that specific file (e.g., "pattern,filename.c") +- Search for related symbols or variables from the code found +If a pattern wasn't found: +- Try simpler substrings or partial patterns +- Try a different tool (Source Grep <-> Code Keyword Search) +- Search for file paths from PATCH_CONTEXT +If KNOWLEDGE shows partial evidence: +- Investigate other files mentioned in PATCH_CONTEXT +- Search for key variables from the fix pattern + + -{{"thought": "Search for the vulnerable code pattern from the patch to confirm it exists in target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code that should exist in unpatched target"}}, "final_answer": null}} +{{"thought": "No prior searches in KNOWLEDGE. Search for the vulnerable code pattern from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code that should exist in unpatched target"}}, "final_answer": null}} -{{"thought": "Found vulnerable code. Now verify the fix is NOT present in target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if fix code is absent (confirms vulnerability)"}}, "final_answer": null}} +{{"thought": "KNOWLEDGE shows function found at iso9660.c:2074. Now verify the fix is NOT present", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if fix code is absent (confirms vulnerability)"}}, "final_answer": null}} -{{"thought": "Fix pattern not found. Search for file from patch to verify code context", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Verify we are looking at the correct file"}}, "final_answer": null}} +{{"thought": "KNOWLEDGE shows fix pattern not found but need more evidence. Search for key variable in the found file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",", "reason": "Examine how the vulnerable variable is handled"}}, "final_answer": null}} + +{{"thought": "KNOWLEDGE shows Source Grep failed. Try Code Keyword Search for the file from patch", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Verify we are looking at the correct file"}}, "final_answer": null}} + -{{"thought": "Evidence confirms vulnerability: found vulnerable code, fix is absent", "mode": "finish", "actions": null, "final_answer": "The package is VULNERABLE. Found vulnerable code pattern at [file:line]: [quote code]. The fix from the patched version is NOT present - searched for [fix pattern] with no matches. The target package lacks the security fix."}} +{{"thought": "KNOWLEDGE has sufficient evidence: vulnerable code at X:Y, fix pattern absent", "mode": "finish", "actions": null, "final_answer": "The package is VULNERABLE. Found vulnerable code pattern at [file:line]: [quote code]. The fix from the patched version is NOT present - searched for [fix pattern] with no matches. The target package lacks the security fix."}} -{{"thought": "Unexpected: fix code found despite no CVE patch file", "mode": "finish", "actions": null, "final_answer": "The package appears PATCHED via rebase. Found fix code at [file:line]: [quote code]. Although no CVE-specific patch exists, the fix may have been included via upstream version update."}} +{{"thought": "KNOWLEDGE shows fix code found despite no CVE patch file", "mode": "finish", "actions": null, "final_answer": "The package appears PATCHED via rebase. Found fix code at [file:line]: [quote code]. Although no CVE-specific patch exists, the fix may have been included via upstream version update."}} """ -L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS = """ +L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. 4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. -6. Do NOT call the same tool with the same input twice. +6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. 7. FIRST search for FIX code (added lines) - it SHOULD exist in rebased target. 8. THEN verify VULNERABLE code (removed lines) is ABSENT from target. 9. If a pattern contains special regex characters, escape them or use literal substrings. + + +If a search returned results: +- Narrow down by searching within that specific file (e.g., "pattern,filename.c") +- Search for related symbols or variables from the code found +If a pattern wasn't found: +- Try simpler substrings or partial patterns +- Try a different tool (Source Grep <-> Code Keyword Search) +- Search for file paths from PATCH_CONTEXT +If KNOWLEDGE shows partial evidence: +- Investigate other files mentioned in PATCH_CONTEXT +- Search for key variables from the fix pattern + + -{{"thought": "Search for the fix code pattern from the patch to confirm it exists in rebased target", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate fix code that should exist after rebase"}}, "final_answer": null}} +{{"thought": "No prior searches in KNOWLEDGE. Search for the fix code pattern from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate fix code that should exist after rebase"}}, "final_answer": null}} -{{"thought": "Found fix code. Now verify the vulnerable code is absent", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable code was removed (confirms fix)"}}, "final_answer": null}} +{{"thought": "KNOWLEDGE shows fix pattern not found. Try searching for key variable from the fix", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Find how the fix-related variable is handled"}}, "final_answer": null}} + +{{"thought": "KNOWLEDGE shows variable found at file.c:100. Search for the full fix pattern in that file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",file.c", "reason": "Check if fix exists in the located file"}}, "final_answer": null}} + + +{{"thought": "KNOWLEDGE shows fix confirmed. Now verify the vulnerable code is absent", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if vulnerable code was removed (confirms fix)"}}, "final_answer": null}} + -{{"thought": "Evidence confirms rebase fix: found fix code, vulnerable code is absent", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED via rebase. Found fix code at [file:line]: [quote code]. The vulnerable code pattern is NOT present - the rebase successfully included the security fix."}} +{{"thought": "KNOWLEDGE has sufficient evidence: fix code at X:Y, vulnerable code absent", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED via rebase. Found fix code at [file:line]: [quote code]. The vulnerable code pattern is NOT present - the rebase successfully included the security fix."}} -{{"thought": "Unexpected: vulnerable code still present despite rebase claim", "mode": "finish", "actions": null, "final_answer": "The rebase may be INCOMPLETE. Found vulnerable code still present at [file:line]: [quote code]. The fix code was not found despite spec indicating rebase fixed this CVE. Manual review required."}} +{{"thought": "KNOWLEDGE shows vulnerable code still present despite rebase claim", "mode": "finish", "actions": null, "final_answer": "The rebase may be INCOMPLETE. Found vulnerable code still present at [file:line]: [quote code]. The fix code was not found despite spec indicating rebase fixed this CVE. Manual review required."}} """ @@ -1341,6 +1419,13 @@ async def upstream_search_preprocess( L1_COMPREHENSION_PROMPT = """Analyze the tool output and extract key findings for CVE patch verification. GOAL: Verify whether {vuln_id} fix is applied to {target_package} + +**CRITICAL FIRST CHECK - DO THIS BEFORE ANYTHING ELSE:** +Examine NEW OUTPUT below. If it is EMPTY, contains only whitespace, or shows an error: +- findings MUST be: ["FAILED: {tool_used} '{tool_input}' returned empty/no matches"] +- tool_outcome MUST be: "{tool_used} [{tool_input}] -> NO MATCHES" +- DO NOT fabricate, infer, or assume any results. STOP HERE. + PATCH CONTEXT: - Vulnerable code patterns (removed lines): {vulnerable_patterns} - Fix code patterns (added lines): {fix_patterns} @@ -1351,23 +1436,23 @@ async def upstream_search_preprocess( NEW OUTPUT: {tool_output} -CODE ANALYSIS RULES: -1. READ the actual code snippets in NEW OUTPUT. Compare against the PATCH CONTEXT patterns. -2. For each match found, determine: - - Does it match the VULNERABLE pattern (code that should be removed/changed)? - - Does it match the FIX pattern (code that should be present after patching)? - - Is it a partial match, context match, or unrelated? +**ANTI-HALLUCINATION RULES:** +1. You can ONLY report findings based on text that ACTUALLY APPEARS in NEW OUTPUT above. +2. Every finding claiming code was "found" MUST include a direct quote from NEW OUTPUT. +3. If NEW OUTPUT is empty, you CANNOT claim any code was found - report FAILED. +4. The tool_outcome MUST accurately reflect what NEW OUTPUT shows, not what you expect. + +CODE ANALYSIS RULES (only if NEW OUTPUT has content): +1. READ the actual code snippets in NEW OUTPUT. Compare against PATCH CONTEXT patterns. +2. For each match found: + - Quote the actual line from NEW OUTPUT + - State the file:line where it was found + - Determine if it matches VULNERABLE or FIX pattern 3. RECORD file paths and line numbers for all relevant matches. -4. If no matches found, note which patterns were searched and suggest alternative search terms. - -TOOL-SPECIFIC RULES: -- If NEW OUTPUT is empty or contains an error, findings must state: "FAILED: [tool] [input] - [reason]" -- Source Grep: Check if matches show vulnerable code (needs fix) or fixed code (already patched) -- Code Keyword Search: Use broader context to locate relevant files for follow-up grep OUTPUT RULES: -- findings: 2-4 key observations about what the code shows -- tool_outcome: Record the search pattern and result (e.g., "Source Grep [pattern] -> found in file.c:123") +- findings: 2-4 observations. Each positive finding MUST quote actual content from NEW OUTPUT. +- tool_outcome: "{tool_used} [pattern] -> found in file.c:123" OR "{tool_used} [pattern] -> NO MATCHES" RESPONSE: {{""" @@ -1378,16 +1463,22 @@ async def upstream_search_preprocess( {findings} TOOL CALL RECORD: {tool_outcome} +**CRITICAL: HANDLE FAILURES CORRECTLY** +If NEW FINDINGS contains "FAILED:" or TOOL CALL RECORD shows "NO MATCHES": +- Add the failure/no-match to memory verbatim +- Do NOT convert a failed search into a positive finding +- "NO MATCHES" for a fix pattern means FIX_CODE_ABSENT, not FIX_CODE_FOUND + MEMORY RULES: 1. Start from PREVIOUS MEMORY. Append new facts from NEW FINDINGS. No duplicates. 2. Add TOOL CALL RECORD verbatim so future steps know what was already searched. -3. If NEW FINDINGS report a failure, add the failure to memory. Do NOT infer positive findings. +3. If NEW FINDINGS report a failure or no matches, record it as-is. Do NOT infer positive findings. PATCH VERIFICATION TRACKING: -- If vulnerable code pattern FOUND: add "VULNERABLE_CODE_FOUND: [pattern] in [file:line]" -- If fix code pattern FOUND: add "FIX_CODE_FOUND: [pattern] in [file:line]" -- If vulnerable code NOT FOUND after searching: add "VULNERABLE_CODE_ABSENT: [pattern] not found" -- If fix code NOT FOUND after searching: add "FIX_CODE_ABSENT: [pattern] not found" +- If vulnerable code pattern FOUND (with file:line evidence): add "VULNERABLE_CODE_FOUND: [pattern] in [file:line]" +- If fix code pattern FOUND (with file:line evidence): add "FIX_CODE_FOUND: [pattern] in [file:line]" +- If search returned NO MATCHES for vulnerable code: add "VULNERABLE_CODE_ABSENT: [pattern] not found" +- If search returned NO MATCHES for fix code: add "FIX_CODE_ABSENT: [pattern] not found" VERDICT EVIDENCE: - PATCHED evidence: fix code found AND/OR vulnerable code absent @@ -1407,6 +1498,12 @@ async def upstream_search_preprocess( L1_COMPREHENSION_PROMPT_CVE_DESC = """Analyze the tool output for CVE patch verification using CVE description context. GOAL: Verify whether {vuln_id} fix is applied to {target_package} +**CRITICAL FIRST CHECK - DO THIS BEFORE ANYTHING ELSE:** +Examine NEW OUTPUT below. If it is EMPTY, contains only whitespace, or shows an error: +- findings MUST be: ["FAILED: {tool_used} '{tool_input}' returned empty/no matches"] +- tool_outcome MUST be: "{tool_used} [{tool_input}] -> NO MATCHES" +- DO NOT fabricate, infer, or assume any results. STOP HERE. + CVE DESCRIPTION: {cve_description} @@ -1421,30 +1518,30 @@ async def upstream_search_preprocess( NEW OUTPUT: {tool_output} -CODE ANALYSIS RULES (CVE-Description Mode): +**ANTI-HALLUCINATION RULES:** +1. You can ONLY report findings based on text that ACTUALLY APPEARS in NEW OUTPUT above. +2. Every finding claiming code was "found" MUST include a direct quote from NEW OUTPUT. +3. If NEW OUTPUT is empty, you CANNOT claim any code was found - report FAILED. +4. The tool_outcome MUST accurately reflect what NEW OUTPUT shows, not what you expect. + +CODE ANALYSIS RULES (only if NEW OUTPUT has content): 1. EXTRACT key identifiers from the CVE description: - Function names, variable names, API calls - File paths or component names mentioned - - Error conditions or attack vectors 2. For each code match in NEW OUTPUT: + - Quote the actual line from NEW OUTPUT - Does it relate to the vulnerability described? - - Does it show defensive patterns (bounds checking, null validation, error handling)? + - Does it show defensive patterns (bounds checking, null validation)? - Record file path and line number as evidence 3. DEFENSIVE PATTERNS indicating a fix: - Input validation, bounds checking, null guards - Resource cleanup, error handling - - Security-related function calls - -TOOL-SPECIFIC RULES: -- If NEW OUTPUT is empty or contains an error: "FAILED: [tool] [input] - [reason]" -- Source Grep: Check if matches show vulnerable behavior or fixed/defensive code -- Code Keyword Search: Use to locate files containing CVE-related symbols OUTPUT: -- findings: 2-4 observations about code relative to CVE -- tool_outcome: "Source Grep [pattern] -> found in file.c:123" +- findings: 2-4 observations. Each positive finding MUST quote actual content from NEW OUTPUT. +- tool_outcome: "{tool_used} [pattern] -> found in file.c:123" OR "{tool_used} [pattern] -> NO MATCHES" RESPONSE: {{""" @@ -1517,25 +1614,52 @@ async def upstream_search_preprocess( {tool_instructions}""" -L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS = """RESPONSE FORMAT (JSON): +L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + +RESPONSE FORMAT (JSON): You must respond with a JSON object with these fields: -- thought: Your reasoning about what to search for based on CVE description +- thought: Your reasoning based on KNOWLEDGE and CVE description (reference what was already found) - mode: "act" (to use a tool) or "finish" (to provide final answer) - actions: (only if mode="act") {{"tool": "Tool Name", "query": "search term", "reason": "why this search"}} - final_answer: (only if mode="finish") Your conclusion about patch status -SEARCH STRATEGY: -1. Extract function names, API calls, or code patterns from CVE description -2. Search for these patterns in the source code -3. Look for defensive/secure coding patterns -4. Conclude based on evidence found + +1. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. +2. If KNOWLEDGE shows a search was done, your next action must be DIFFERENT. +3. Output valid JSON only. thought < 100 words. + + + +If a search returned results: +- Narrow down by searching within that specific file (e.g., "pattern,filename.c") +- Search for related symbols or defensive patterns in the found code +If a pattern wasn't found: +- Try simpler substrings or partial patterns +- Try a different tool (Source Grep <-> Code Keyword Search) +- Search for related symbols from CVE description + + + +{{"thought": "No prior searches in KNOWLEDGE. CVE mentions zisofs block pointer overflow. Search for zisofs handling.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "zisofs", "reason": "Find CVE-related code handling zisofs"}}, "final_answer": null}} + - -{{"thought": "CVE mentions SSL_OP_NO_TICKET as mitigation. Search for this option in source.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "SSL_OP_NO_TICKET", "reason": "Find CVE-related mitigation code"}}, "final_answer": null}} - + +{{"thought": "KNOWLEDGE shows zisofs found at iso9660.c. CVE mentions bounds check fix. Search for defensive bounds checking in that file.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "pz_log2_bs,iso9660.c", "reason": "Check for bounds validation on the vulnerable variable"}}, "final_answer": null}} + + + +{{"thought": "KNOWLEDGE shows pz_log2_bs used but no bounds check found. Try Code Keyword Search for broader context.", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "pz_log2_bs", "reason": "Find all usages to verify no defensive checks exist"}}, "final_answer": null}} + -{{"thought": "Found defensive code: SSL_OP_NO_TICKET is used in ssl_lib.c. This matches the CVE fix pattern.", "mode": "finish", "actions": null, "final_answer": "The package is LIKELY PATCHED. Found CVE-related defensive code at ssl_lib.c:4190 showing SSL_OP_NO_TICKET usage, which matches the described fix for CVE-2024-2511."}} +{{"thought": "KNOWLEDGE shows: zisofs at iso9660.c, pz_log2_bs has no bounds check. Evidence sufficient.", "mode": "finish", "actions": null, "final_answer": "The package is LIKELY VULNERABLE. Found zisofs handling at iso9660.c but no bounds checking on pz_log2_bs variable. The CVE describes missing validation on block size which matches the observed code."}} """ diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index c0c427a3d..6a1f4cff2 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -22,6 +22,7 @@ """ from pathlib import Path +from enum import StrEnum from aiq.builder.builder import Builder from aiq.builder.framework_enum import LLMFrameworkEnum @@ -45,10 +46,7 @@ from vuln_analysis.functions.build_agent_graph_defs import ( BuildAgentState, BuildHarvestReport, - BuildObservation, harvest_build_data, - format_l2_runtime_prompt, - extract_l2_verdict, L2_CONFIG_PROMPT_TEMPLATE, L2_CONFIG_SYS_PROMPT, L2_CONFIG_THOUGHT_INSTRUCTIONS, @@ -58,7 +56,9 @@ L2_HARDENING_SYS_PROMPT, L2_HARDENING_THOUGHT_INSTRUCTIONS, L2CompileVerdictExtraction, - L2_COMPILATION_VERDICT_PROMPT + L2_COMPILATION_VERDICT_PROMPT, + L2_HARDENING_VERDICT_PROMPT, + L2HardeningVerdictExtraction, ) from vuln_analysis.runtime_context import ctx_state import uuid @@ -83,6 +83,8 @@ class CVEBuildAgentConfig(FunctionBaseConfig, name="cve_build_agent"): default=10, description="The maximum number of iterations for the agent.", ) + llm_name: str = Field(description="The LLM model to use with the L1 code agent.") + tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.") def _build_tool_strategy(tool_names: list[str]) -> str: @@ -132,6 +134,7 @@ async def create_graph_build_agent( comprehension_llm = llm.with_structured_output(CodeFindings) observation_llm = llm.with_structured_output(Observation) compilation_verdict_llm = llm.with_structured_output(L2CompileVerdictExtraction) + hardening_verdict_llm = llm.with_structured_output(L2HardeningVerdictExtraction) tools_node = ToolNode(tools, handle_tool_errors=True) if tools else None enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] @@ -228,10 +231,10 @@ async def build_runtime_prompt(harvest_report: BuildHarvestReport) -> str: sys_prompt=L2_HARDENING_SYS_PROMPT, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - l1_affected_files=l1_affected_files, - l1_preliminary_verdict=l1_preliminary_verdict, - hardening_report=harvest_report.expected_hardening, + cwe_id=cwe_id, + expected_hardening_table=harvest_report.expected_hardening, + tools=tools_str, + tool_instructions=L2_HARDENING_THOUGHT_INSTRUCTIONS, ) return runtime_prompt else: @@ -373,8 +376,8 @@ async def observation_node(state: BuildAgentState) -> dict: prune_messages = [] if estimated > config.context_window_token_limit and len(messages) > 3: with tracer.push_active_function("context_pruning", input_data={"estimated_tokens": estimated, "limit": config.context_window_token_limit}) as prune_span: - prunable = messages[1:-2] - for msg in prunable: + + for msg in messages: prune_messages.append(RemoveMessage(id=msg.id)) estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0 if estimated <= config.context_window_token_limit: @@ -383,12 +386,12 @@ async def observation_node(state: BuildAgentState) -> dict: "Context pruning: removed %d messages, estimated tokens now ~%d (limit %d)", len(prune_messages), estimated, config.context_window_token_limit, ) - prune_span.set_output({ - "pruning_triggered": len(prune_messages) > 0, - "messages_pruned": len(prune_messages), - "tokens_before": orig_estimated, - "tokens_after": estimated, - }) + prune_span.set_output({ + "pruning_triggered": len(prune_messages) > 0, + "messages_pruned": len(prune_messages), + "tokens_before": orig_estimated, + "tokens_after": estimated, + }) span.set_output({ @@ -456,22 +459,33 @@ async def should_continue(state: BuildAgentState) -> str: """Route based on thought mode.""" thought = state.get("thought") if thought is not None and thought.mode == "finish": - return END + return INVESTIGATION_PHASE_NODE if state.get("step", 0) >= state.get("max_steps", config.max_iterations): return FORCED_FINISH_NODE return TOOL_NODE + async def is_investigation_finished(state: BuildAgentState) -> str: + """Check if the investigation is finished.""" + if len(investigation_stack) == 0: + return END + return THOUGHT_NODE async def investigation_phase_node(state: BuildAgentState) -> dict: """Determine the next investigation phase.""" if len(investigation_stack) == 0: raise ValueError("Investigation stack is empty") + + final_answer = None + thought = state.get("thought") + if thought and thought.mode == "finish": + final_answer = thought.final_answer + current_phase = investigation_stack[-1] with tracer.push_active_function("investigation_phase_node", input_data=f"phase :{current_phase}") as span: investigation_stack.pop() if current_phase == L2InvestigationPhase.CONFIGURATION: - verdict: L2CompileVerdictExtraction = await compilation_verdict_llm.ainvoke([SystemMessage(content=L2_COMPILATION_VERDICT_PROMPT.format(final_answer=state.get("final_answer")))]) + verdict: L2CompileVerdictExtraction = await compilation_verdict_llm.ainvoke([SystemMessage(content=L2_COMPILATION_VERDICT_PROMPT.format(final_answer=final_answer))]) span.set_output({ "compilation_status": verdict.compilation_status, "confidence": verdict.confidence, @@ -483,17 +497,23 @@ async def investigation_phase_node(state: BuildAgentState) -> dict: # next state is the hardening phase preprocess_data = state.get("harvest_report") or BuildHarvestReport() runtime_prompt = await build_runtime_prompt(preprocess_data) + messages = state["messages"] + prune_messages = [] + for msg in messages: + prune_messages.append(RemoveMessage(id=msg.id)) span.set_output({ "runtime_prompt": runtime_prompt,}) return { "runtime_prompt": runtime_prompt, "thought": None, "observation": None, - "step": 0 + "step": 0, + "messages": prune_messages, + "L2CompileVerdict": verdict, } else: #state that run was hardening need to extract the hardening verdict - verdict: L2HardeningVerdictExtraction = await hardening_verdict_llm.ainvoke([SystemMessage(content=L2_HARDENING_VERDICT_PROMPT.format(final_answer=state.get("final_answer")))]) + verdict: L2HardeningVerdictExtraction = await hardening_verdict_llm.ainvoke([SystemMessage(content=L2_HARDENING_VERDICT_PROMPT.format(final_answer=final_answer))]) span.set_output({ "hardening_status": verdict.hardening_status, "confidence": verdict.confidence, @@ -517,11 +537,12 @@ async def investigation_phase_node(state: BuildAgentState) -> dict: flow.add_edge(START, DATA_HARVEST_NODE) flow.add_edge(DATA_HARVEST_NODE, THOUGHT_NODE) - edge_map = {END: END, FORCED_FINISH_NODE: FORCED_FINISH_NODE, TOOL_NODE: TOOL_NODE} + edge_map = {INVESTIGATION_PHASE_NODE: INVESTIGATION_PHASE_NODE, FORCED_FINISH_NODE: FORCED_FINISH_NODE, TOOL_NODE: TOOL_NODE} flow.add_conditional_edges(THOUGHT_NODE, should_continue, edge_map) flow.add_edge(TOOL_NODE, OBSERVATION_NODE) flow.add_edge(OBSERVATION_NODE, THOUGHT_NODE) - flow.add_edge(FORCED_FINISH_NODE, END) + flow.add_edge(FORCED_FINISH_NODE, INVESTIGATION_PHASE_NODE) + flow.add_conditional_edges(INVESTIGATION_PHASE_NODE, is_investigation_finished, {END: END, THOUGHT_NODE: THOUGHT_NODE}) app = flow.compile() return app @@ -569,30 +590,28 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: logger.info("build_agent: L2 investigation finished") # Extract verdict from result - final_answer = None - thought = result.get("thought") - if thought and thought.mode == "finish": - final_answer = thought.final_answer - - harvest_report = result.get("harvest_report") or BuildHarvestReport() - - # Extract L2 verdict (placeholder) - llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) - verdict_extraction = await extract_l2_verdict( - llm=llm, - final_answer=final_answer or "No analysis produced.", - harvest_report=harvest_report, - tracer=tracer, - ) + compile_verdict = result.get("L2CompileVerdict") or None + hardening_verdict = result.get("L2HardeningVerdict") or None + hardening_reason = None + if compile_verdict.compilation_status == "not_compiled": + hardening_relevant = False + l2_override_verdict = "not_vulnerable" + else: + hardening_relevant = True + hardening_reason = hardening_verdict.reasoning + if hardening_verdict.hardening_status == "mitigated": + l2_override_verdict = "vulnerable_mitigated" + else: + l2_override_verdict = None # Build L2 result l2_result = L2BuildResult( - compilation_status=verdict_extraction.compilation_status, - compilation_confidence=verdict_extraction.confidence, - compilation_evidence=verdict_extraction.reasoning, - hardening_relevant=None, # Phase 2 - not implemented yet - hardening_rationale=None, # Phase 2 - not implemented yet - l2_override_verdict=verdict_extraction.override_verdict, + compilation_status=compile_verdict.compilation_status, + compilation_confidence=compile_verdict.confidence, + compilation_evidence=compile_verdict.reasoning, + hardening_relevant=hardening_relevant, + hardening_rationale=hardening_reason, + l2_override_verdict=l2_override_verdict, ) with tracer.push_active_function( diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 3a6e25a58..ceef0bd1b 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -678,9 +678,9 @@ async def observation_node(state: CodeAgentState) -> dict: prune_messages = [] orig_estimated = estimated - if estimated > config.context_window_token_limit and len(messages) > 3: - prunable = messages[1:-2] - for msg in prunable: + if estimated > config.context_window_token_limit: + + for msg in messages: prune_messages.append(RemoveMessage(id=msg.id)) estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0 if estimated <= config.context_window_token_limit: @@ -751,17 +751,17 @@ def _extract_affected_files(result: dict) -> list[str]: if downstream and downstream.parsed_patch: for pf in downstream.parsed_patch.files: - if pf.new_file: - affected.add(pf.new_file) - elif pf.old_file: - affected.add(pf.old_file) + if pf.is_new_file: + affected.add(pf.target_path) + else: + affected.add(pf.source_path) if upstream and upstream.fixed_parsed_patch: for pf in upstream.fixed_parsed_patch.files: - if pf.new_file: - affected.add(pf.new_file) - elif pf.old_file: - affected.add(pf.old_file) + if pf.is_new_file: + affected.add(pf.target_path) + else: + affected.add(pf.source_path) return sorted(affected) diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py index 4ffe34aa3..8d8c832aa 100644 --- a/src/vuln_analysis/tools/source_grep.py +++ b/src/vuln_analysis/tools/source_grep.py @@ -49,7 +49,7 @@ class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): description="Maximum number of grep results to return.", ) context_lines: int = Field( - default=2, + default=3, description="Number of context lines around each match.", ) @@ -82,8 +82,8 @@ def _format_results(pattern: str, matches: list, root: Path) -> str: rel_path = match.file_path.relative_to(root) except ValueError: rel_path = match.file_path - lines.append(f"{i}. {rel_path}:{match.line_number}") - lines.append(f" {match.line_content.strip()}") + lines.append(f"{i}. {rel_path}:{match.match_line_number}") + lines.append(f" {match.full_text.strip()}") lines.append("") return "\n".join(lines) @@ -133,7 +133,8 @@ async def _arun(query: str) -> str: ) logger.info("Source grep: found %d matches for '%s'", len(matches), pattern) - return _format_results(pattern, matches, source_dir) + # return _format_results(pattern, matches, source_dir) + return matches yield FunctionInfo.from_fn( _arun, diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py index fb921086e..6bc8b78ea 100644 --- a/src/vuln_analysis/tools/source_inspector.py +++ b/src/vuln_analysis/tools/source_inspector.py @@ -30,10 +30,9 @@ @dataclass class GrepMatch: - """A single regex match inside a file.""" file_path: Path line_number: int - line_content: str + line_content: str class SourceInspector: @@ -123,7 +122,7 @@ async def grep_native( case_insensitive: bool = False, context_lines: int = 0, max_results: int = 50, - ) -> list[GrepMatch]: + ) -> str: """Fast grep using native Unix grep subprocess. Parameters @@ -172,20 +171,9 @@ def _run_grep() -> str: stdout = await asyncio.to_thread(_run_grep) - results: list[GrepMatch] = [] - for line in stdout.splitlines(): - parts = line.split(":", 2) - if len(parts) >= 3: - try: - line_num = int(parts[1]) - except ValueError: - continue - results.append(GrepMatch( - file_path=Path(parts[0]), - line_number=line_num, - line_content=parts[2], - )) - return results + #results: list[GrepMatch] = [] + #results = parse_grep_to_blocks(stdout) + return stdout def read_file( self, From cb3ff4bc043cafe51ba4f04f7c33e200ff333c55 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 7 May 2026 08:50:07 +0000 Subject: [PATCH 31/46] new api schema for rpm checker --- src/exploit_iq_commons/data_models/common.py | 1 - src/exploit_iq_commons/data_models/input.py | 2 +- src/vuln_analysis/configs/openapi/openapi.json | 18 ++++++++---------- src/vuln_analysis/utils/output_formatter.py | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/exploit_iq_commons/data_models/common.py b/src/exploit_iq_commons/data_models/common.py index fd4681109..db848f4ad 100644 --- a/src/exploit_iq_commons/data_models/common.py +++ b/src/exploit_iq_commons/data_models/common.py @@ -68,7 +68,6 @@ class TargetPackage(HashableModel): name: str version: str | None = None release: str | None = None # e.g. "1.el8_2.3" (needed for Brew NVR lookup) - ecosystem: str | None = None arch: str = "x86_64" # e.g. "x86_64", "aarch64", "s390x", "noarch" class TypedBaseModel(BaseModel, typing.Generic[_LT]): diff --git a/src/exploit_iq_commons/data_models/input.py b/src/exploit_iq_commons/data_models/input.py index 72c5382c3..31db324e1 100644 --- a/src/exploit_iq_commons/data_models/input.py +++ b/src/exploit_iq_commons/data_models/input.py @@ -178,7 +178,7 @@ class ImageInfoInput(HashableModel): """ target_package: TargetPackage | None = None - source_info: list[SourceDocumentsInfo] + source_info: list[SourceDocumentsInfo] = [] sbom_info: SBOMInfoInput | None = None @model_validator(mode="after") diff --git a/src/vuln_analysis/configs/openapi/openapi.json b/src/vuln_analysis/configs/openapi/openapi.json index 79feca4cc..f83b2a494 100644 --- a/src/vuln_analysis/configs/openapi/openapi.json +++ b/src/vuln_analysis/configs/openapi/openapi.json @@ -2190,7 +2190,7 @@ "python", "javascript", "java", - "c", + "c" ], "title": "Ecosystem" }, @@ -2210,7 +2210,8 @@ "$ref": "#/components/schemas/SourceDocumentsInfo" }, "type": "array", - "title": "Source Info" + "title": "Source Info", + "default": [] }, "sbom_info": { "oneOf": [ @@ -2229,9 +2230,7 @@ }, "type": "object", "required": [ - "analysis_type", - "source_info", - "sbom_info" + "analysis_type" ], "title": "ImageInfoInput", "description": "Information about a container image, including the source information and sbom information." @@ -2308,7 +2307,7 @@ "python", "javascript", "java", - "c", + "c" ], "title": "Ecosystem" }, @@ -2328,7 +2327,8 @@ "$ref": "#/components/schemas/SourceDocumentsInfo" }, "type": "array", - "title": "Source Info" + "title": "Source Info", + "default": [] }, "sbom_info": { "oneOf": [ @@ -2347,9 +2347,7 @@ }, "type": "object", "required": [ - "analysis_type", - "source_info", - "sbom_info" + "analysis_type" ], "title": "ImageInfoInput", "description": "Information about a container image, including the source information and sbom information." diff --git a/src/vuln_analysis/utils/output_formatter.py b/src/vuln_analysis/utils/output_formatter.py index 5bbbe5e60..6d3acf591 100644 --- a/src/vuln_analysis/utils/output_formatter.py +++ b/src/vuln_analysis/utils/output_formatter.py @@ -109,7 +109,7 @@ def _add_header(markdown_content, model_dict: AgentMorpheusOutput): markdown_content[cve_id].append(f"# Vulnerability Analysis Report for {cve_id}") markdown_content[cve_id].append(f"> **Container Analyzed:** `{input_image.name}:{input_image.tag}`\n\n") # Only add SBOM info if it is a file location - if input_image.sbom_info.type == "file": + if input_image.sbom_info and input_image.sbom_info.type == "file": markdown_content[cve_id].append(f"> **SBOM Info:** `{input_image.sbom_info}`\n\n") markdown_content[cve_id].append(f"> **Status:** {_get_expoiltability_text(output.justification.status)}") From 8e7310ad4e223cfd8129933603bd64bde862f075 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 7 May 2026 14:51:47 +0300 Subject: [PATCH 32/46] save before big change --- .../functions/code_agent_graph_defs.py | 127 ++++++++++++++++++ .../functions/cve_package_code_agent.py | 39 +++++- src/vuln_analysis/tools/source_grep.py | 103 ++++++++++---- src/vuln_analysis/tools/source_inspector.py | 25 +++- 4 files changed, 256 insertions(+), 38 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index dec0bf8c1..3b9600b47 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -163,6 +163,51 @@ class ParsedPatch(BaseModel): files: list[PatchFile] +# --------------------------------------------------------------------------- +# Vulnerability Intelligence schema +# --------------------------------------------------------------------------- + + +class VulnerabilityIntel(BaseModel): + """Structured intelligence extracted from CVE advisories and patches. + + Used to provide grep-ready patterns and context for L1 agent source searches. + """ + + affected_files: list[str] = Field( + default_factory=list, + description="Source file paths likely to contain vulnerable code" + ) + vulnerable_functions: list[str] = Field( + default_factory=list, + description="Function names that contain or handle the vulnerability" + ) + vulnerable_variables: list[str] = Field( + default_factory=list, + description="Variable names involved in the vulnerability" + ) + vulnerable_patterns: list[str] = Field( + default_factory=list, + description="Code patterns/snippets indicating vulnerable code (from - lines)" + ) + fix_patterns: list[str] = Field( + default_factory=list, + description="Code patterns/snippets indicating fixed code (from + lines)" + ) + root_cause: str = Field( + default="", + description="Technical explanation of why the code is vulnerable" + ) + vulnerability_type: str = Field( + default="", + description="Category: buffer_overflow, integer_overflow, use_after_free, null_deref, etc." + ) + search_keywords: list[str] = Field( + default_factory=list, + description="Recommended grep patterns ordered by specificity (most specific first)" + ) + + # --------------------------------------------------------------------------- # Code Agent Report schema # --------------------------------------------------------------------------- @@ -301,6 +346,88 @@ def to_markdown( Provide your confidence level (0.0-1.0) based on the strength of evidence in the answer. """ +VULNERABILITY_INTEL_EXTRACTION_PROMPT = """\ +Extract structured vulnerability intelligence from the CVE data and patch content. +Your output will be used to guide source code searches, so focus on grep-able patterns. + + +CVE ID: {vuln_id} +Package: {target_package} +CVE Description: {cve_description} + + + +{patch_data} + + + +1. affected_files: Extract file paths from patch headers (strip a/ b/ prefixes) +2. vulnerable_functions: Extract function names from: + - Removed lines (- lines) in patch + - Function names mentioned in CVE description +3. vulnerable_variables: Extract variable names from removed lines that are key to the vulnerability +4. vulnerable_patterns: Extract distinctive code snippets from removed lines (- lines) + - Focus on patterns that can be grepped + - Include enough context to be unique +5. fix_patterns: Extract distinctive code snippets from added lines (+ lines) + - These indicate the fix is present +6. root_cause: Explain WHY the code is vulnerable in 1-2 sentences +7. vulnerability_type: Classify as one of: buffer_overflow, integer_overflow, use_after_free, + null_deref, format_string, race_condition, path_traversal, injection, other +8. search_keywords: List 3-5 grep patterns ordered by specificity: + - Start with most specific (unique variable/function names) + - End with broader patterns (file names, component names) + + + +- If no patch is provided, extract what you can from the CVE description +- For search_keywords, prefer identifiers over natural language +- Patterns should be grep-friendly (avoid regex special chars unless escaped) + +""" + + +def format_patch_data_for_intel( + parsed_patch: ParsedPatch | None +) -> str: + """Format patch and CVE data for intelligence extraction. + + Parameters + ---------- + parsed_patch: + Parsed patch file structure (may be None if no patch available). + cve_description: + CVE description text from advisories. + + Returns + ------- + str + Formatted string suitable for the VULNERABILITY_INTEL_EXTRACTION_PROMPT. + """ + if not parsed_patch: + return "" + + lines = [f"Patch: {parsed_patch.patch_filename}", ""] + for pf in parsed_patch.files: + lines.append(f"File: {pf.target_path}") + for hunk in pf.hunks: + if hunk.removed_lines: + lines.append(" Removed (vulnerable):") + for line in hunk.removed_lines[:10]: + lines.append(f" - {line}") + if len(hunk.removed_lines) > 10: + lines.append(f" ... (+{len(hunk.removed_lines) - 10} more lines)") + if hunk.added_lines: + lines.append(" Added (fix):") + for line in hunk.added_lines[:10]: + lines.append(f" + {line}") + if len(hunk.added_lines) > 10: + lines.append(f" ... (+{len(hunk.added_lines) - 10} more lines)") + lines.append("") + + return "\n".join(lines) + + CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index ceef0bd1b..c81dfd248 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -56,6 +56,9 @@ L1_MEMORY_UPDATE_PROMPT_CVE_DESC, format_patch_files_summary, format_patch_hunks_summary, + VulnerabilityIntel, + VULNERABILITY_INTEL_EXTRACTION_PROMPT, + format_patch_data_for_intel, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError @@ -212,7 +215,7 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent ) llm_name: str = Field(description="The LLM model to use with the L1 code agent.") tool_names: list[str] = Field(default=[], description="The list of tools to provide to L1 code agent") - max_iterations: int = Field(default=10, description="The maximum number of iterations for the agent.") + max_iterations: int = Field(default=5, description="The maximum number of iterations for the agent.") context_window_token_limit: int = Field(default=5000, description="Token limit for context window before pruning old messages.") @@ -232,6 +235,7 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu thought_llm = llm.with_structured_output(CheckerThought) comprehension_llm = llm.with_structured_output(CodeFindings) observation_llm = llm.with_structured_output(Observation) + vulnerability_intel_llm = llm.with_structured_output(VulnerabilityIntel) # Get tool names after filtering for dynamic guidance enabled_tool_names = [tool.name for tool in tools] tool_descriptions_list = [t.name + ": " + t.description for t in tools] @@ -299,7 +303,30 @@ async def L1_agent(state: CodeAgentState) -> dict: downstream_report = state.get("downstream_report") upstream_report = state.get("upstream_report") - with tracer.push_active_function("L1_agent", input_data={}) as span: + with tracer.push_active_function("Initial_Intelligence_Gathering", input_data={}) as span: + + if downstream_report and downstream_report.is_patch_file_available: + parsed_patch = downstream_report.parsed_patch + patch_data = format_patch_data_for_intel(parsed_patch) + elif upstream_report and upstream_report.is_fixed_srpm_is_needed: + parsed_patch = upstream_report.fixed_parsed_patch + patch_data = format_patch_data_for_intel(parsed_patch) + else: + patch_data = "" + + vul_prompt = VULNERABILITY_INTEL_EXTRACTION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package.name, + cve_description=cve_description, + patch_data=patch_data, + ) + vulnerability_intel: VulnerabilityIntel = await vulnerability_intel_llm.ainvoke( + [SystemMessage(content=vul_prompt)], + ) + span.set_output({ + "vulnerability_intel": vulnerability_intel.model_dump(), + }) + # Use case 1: Downstream patch file is available if downstream_report and downstream_report.is_patch_file_available: parsed_patch = downstream_report.parsed_patch @@ -415,11 +442,15 @@ async def L1_agent(state: CodeAgentState) -> dict: span.set_output({ "mode": "no_patch", }) - + + messages = state.get("messages", []) + remove_messages = [RemoveMessage(id=msg.id) for msg in messages if msg.id] + return { "runtime_prompt": runtime_prompt, "patch_search_mode": patch_search_mode, "cve_description": cve_description, + "messages": remove_messages, } async def should_continue_downstream(state: CodeAgentState) -> str: @@ -679,7 +710,7 @@ async def observation_node(state: CodeAgentState) -> dict: orig_estimated = estimated if estimated > config.context_window_token_limit: - + l_tool_count = _count_tokens(tool_output_for_llm) for msg in messages: prune_messages.append(RemoveMessage(id=msg.id)) estimated -= _count_tokens(msg.content) if hasattr(msg, "content") and isinstance(msg.content, str) else 0 diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py index 8d8c832aa..37c1fa0af 100644 --- a/src/vuln_analysis/tools/source_grep.py +++ b/src/vuln_analysis/tools/source_grep.py @@ -54,21 +54,49 @@ class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): ) -def _parse_query(query: str) -> tuple[str, str | None]: - """Parse query string into (pattern, file_glob). +VALID_TARGETS = ("source", "logs", "patch") + +TARGET_EXTENSIONS: dict[str, list[str]] = { + "source": ["*.c", "*.h", "*.cpp", "*.hpp", "*.py", "*.go", "*.java", "*.spec", "*.cmake", "Makefile", "*.mk"], + "logs": [], # empty = search all files + "patch": ["*.patch", "*.diff"], +} + + +def _parse_query(query: str) -> tuple[str, str | None, str, bool]: + """Parse query string into (pattern, file_glob, target, word_boundary). Supports formats: - - "pattern" -> search all source files - - "pattern,*.c" -> search only .c files - - "pattern,*.h" -> search only headers + - "pattern" -> search source (default) + - "pattern,*.c" -> search source, only .c files + - "target:pattern" -> search specific target + - "target:pattern,file_glob" -> search target with file filter + - "pattern -w" -> search with word boundary (whole words only) + - "target:pattern,file_glob -w" -> full format with word boundary + + Valid targets: source, logs, patch """ query = query.strip().strip('"').strip("'") + + word_boundary = False + if query.endswith(" -w"): + word_boundary = True + query = query[:-3].strip() + + target = "source" + if ":" in query: + prefix, rest = query.split(":", 1) + if prefix in VALID_TARGETS: + target = prefix + query = rest + if "," in query: parts = query.split(",", 1) pattern = parts[0].strip() file_glob = parts[1].strip() if len(parts) > 1 else None - return pattern, file_glob - return query, None + return pattern, file_glob, target, word_boundary + + return query, None, target, word_boundary def _format_results(pattern: str, matches: list, root: Path) -> str: @@ -95,14 +123,26 @@ async def source_grep(config: SourceGrepToolConfig, builder: Builder): # pylint @catch_tool_errors(SOURCE_GREP) async def _arun(query: str) -> str: - """Search source code using native Unix grep. + """Search source code, build logs, or patches using native Unix grep. + + Query format: '[target:]pattern[,file_glob][ -w]' + + Targets: + - source (default): Package source code + - logs: Build compilation logs + - patch: Fixed patches from newer RPM version + + Options: + - -w: Match whole words only (word boundary) - Query format: 'pattern' or 'pattern,file_glob' Examples: - - 'GENERAL_NAME_cmp' - search all source files - - 'GENERAL_NAME_cmp,*.c' - search only .c files - - 'archive_read_open,*.h' - search only headers - - 'archive_read_open,archive.c' - search only in files named archive.c + - 'archive_read_open' - search source files + - 'archive_read_open,*.c' - search only .c source files + - 'archive_read_open -w' - search for whole word only + - 'logs:undefined reference' - search build logs for link errors + - 'logs:error:' - search build logs for error messages + - 'patch:CVE-2026-5121' - find patch for specific CVE + - 'patch:archive_read,*.patch' - search in patch files """ workflow_state = ctx_state.get() @@ -114,35 +154,44 @@ async def _arun(query: str) -> str: raise ValueError("Checker context or source_key not available in workflow state") source_key = checker_context.source_key - source_dir = (Path(config.base_checker_dir) / source_key / "source").resolve() + pattern, file_glob, target, word_boundary = _parse_query(query) + + target_dir = (Path(config.base_checker_dir) / source_key / target).resolve() - if not source_dir.is_dir(): - raise ValueError(f"Source directory does not exist: {source_dir}") + if not target_dir.is_dir(): + raise ValueError(f"Target directory does not exist: {target_dir}") - inspector = SourceInspector(source_dir) - pattern, file_glob = _parse_query(query) + inspector = SourceInspector(target_dir) + default_extensions = TARGET_EXTENSIONS.get(target, []) - logger.info("Source grep: searching for '%s' in %s (glob: %s)", - pattern, source_dir, file_glob or "all source files") + logger.info("Source grep: searching for '%s' in %s (target: %s, glob: %s, word_boundary: %s)", + pattern, target_dir, target, file_glob or "default extensions", word_boundary) matches = await inspector.grep_native( pattern=pattern, file_glob=file_glob, + word_boundary=word_boundary, context_lines=config.context_lines, max_results=config.max_results, + default_extensions=default_extensions, ) - logger.info("Source grep: found %d matches for '%s'", len(matches), pattern) - # return _format_results(pattern, matches, source_dir) + logger.info("Source grep: found matches for '%s' in target '%s'", pattern, target) return matches yield FunctionInfo.from_fn( _arun, description=( - "Fast grep search in source code using native Unix grep. " - "Input: 'pattern' or 'pattern,file_glob'. " - "Examples: 'GENERAL_NAME_cmp' searches all source files, " - "'GENERAL_NAME_cmp,*.c' searches only C files, " - "'archive_read_open,archive.c' searches only in files named archive.c." + "Fast grep search using native Unix grep. " + "Query format: '[target:]pattern[,file_glob][ -w]'. " + "Targets: 'source' (default) for package source code, " + "'logs' for build compilation logs, " + "'patch' for fixed patches from newer RPM. " + "Add ' -w' suffix for whole-word matching. " + "Examples: 'archive_read_open' searches source, " + "'archive_read_open,*.c' searches only C source files, " + "'archive_read_open -w' searches for whole word only, " + "'logs:undefined reference' searches build logs, " + "'patch:CVE-2026-5121' searches patch files." ), ) diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py index 6bc8b78ea..825e5b55b 100644 --- a/src/vuln_analysis/tools/source_inspector.py +++ b/src/vuln_analysis/tools/source_inspector.py @@ -120,8 +120,10 @@ async def grep_native( file_glob: str | None = None, *, case_insensitive: bool = False, + word_boundary: bool = False, context_lines: int = 0, max_results: int = 50, + default_extensions: list[str] | None = None, ) -> str: """Fast grep using native Unix grep subprocess. @@ -130,32 +132,41 @@ async def grep_native( pattern: Search pattern (passed to grep as-is, supports basic regex). file_glob: - Optional file pattern (e.g., ``"*.c"``, ``"*.h"``). If *None*, searches - common source extensions: .c, .h, .cpp, .hpp, .py, .go, .java + Optional file pattern (e.g., ``"*.c"``, ``"*.h"``). If provided, + overrides default_extensions. case_insensitive: If *True*, perform case-insensitive matching (``-i`` flag). + word_boundary: + If *True*, match whole words only (``-w`` flag). context_lines: Lines of context around match (``-C`` flag). Default 0. max_results: Stop after this many matches (``-m`` flag). Default 50. + default_extensions: + List of file extensions to search when file_glob is not provided. + If *None*, searches ALL files (no --include filter). + If empty list ``[]``, searches ALL files (no --include filter). Returns ------- - list[GrepMatch] - Matches found, with file paths relative to source root removed - from line_content for cleaner output. + str + Raw grep output with matches found. """ cmd = ["grep", "-rn", "-I"] if case_insensitive: cmd.append("-i") + if word_boundary: + cmd.append("-w") if context_lines > 0: cmd.extend(["-C", str(context_lines)]) if file_glob: cmd.extend(["--include", file_glob]) - else: - for ext in ["*.c", "*.h", "*.cpp", "*.hpp", "*.py", "*.go", "*.java"]: + elif default_extensions is None: + pass # No filtering - search all files (caller should pass extensions explicitly) + elif default_extensions: + for ext in default_extensions: cmd.extend(["--include", ext]) cmd.extend(["-m", str(max_results), "--", pattern, str(self._root)]) From a2b7bd8768999949ef2c5422d4967b156d4e5f2f Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Thu, 7 May 2026 17:12:47 +0300 Subject: [PATCH 33/46] add vulnerability intel --- .../functions/build_agent_graph_defs.py | 13 +- .../functions/code_agent_graph_defs.py | 54 ++++-- .../functions/cve_build_agent.py | 3 + .../functions/cve_package_code_agent.py | 180 +++++------------- 4 files changed, 96 insertions(+), 154 deletions(-) diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index dd05b6506..2e350e76f 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -87,6 +87,10 @@ class L2HardeningVerdictExtraction(BaseModel): hardening_status: Literal["mitigated", "not_mitigated", "not_applicable", "unknown"] = Field( description="Whether hardening flags mitigate the vulnerability" ) + hardening_flags: list[str] = Field( + default_factory=list, + description="List of specific hardening flags found (e.g., -fstack-protector-strong, -D_FORTIFY_SOURCE=2)", + ) confidence: float = Field(ge=0.0, le=1.0, description="Confidence in the verdict") reasoning: str = Field(description="Brief explanation of the verdict") @@ -479,11 +483,14 @@ async def harvest_build_data( Extract: 1. hardening_status: "mitigated", "not_mitigated", "not_applicable", or "unknown" - "not_applicable": This CWE class has no compiler-level mitigations available -2. confidence: 0.0 to 1.0 based on evidence strength -3. reasoning: Brief explanation (1-2 sentences) +2. hardening_flags: List of specific compiler/linker flags that provide protection (e.g., ["-fstack-protector-strong", "-D_FORTIFY_SOURCE=2", "RELRO", "PIE"]) + - Extract the actual flag names mentioned in the investigation + - Empty list if no relevant flags found +3. confidence: 0.0 to 1.0 based on evidence strength +4. reasoning: Brief explanation (1-2 sentences) Output JSON only: -{{"hardening_status": "...", "confidence": 0.X, "reasoning": "..."}}""" +{{"hardening_status": "...", "hardening_flags": ["..."], "confidence": 0.X, "reasoning": "..."}}""" # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 3b9600b47..4aebbf1bf 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -57,8 +57,7 @@ class CodeAgentState(MessagesState): output: NotRequired[str] thought: NotRequired[CheckerThought | None] observation: NotRequired[Observation | None] - patch_search_mode: NotRequired[str | None] # "patch_patterns" or "cve_description" - cve_description: NotRequired[str | None] # CVE description for observation_node + vulnerability_intel: NotRequired["VulnerabilityIntel | None"] # --------------------------------------------------------------------------- @@ -428,6 +427,33 @@ def format_patch_data_for_intel( return "\n".join(lines) +def format_vulnerability_intel_for_prompt(intel: VulnerabilityIntel) -> str: + """Format VulnerabilityIntel for injection into L1 agent runtime prompt. + + Uses UPPERCASE labels so they can be referenced as anchors in thought prompts. + """ + lines = [] + if intel.affected_files: + lines.append(f"AFFECTED_FILES: {', '.join(intel.affected_files)}") + if intel.vulnerable_functions: + lines.append(f"VULNERABLE_FUNCTIONS: {', '.join(intel.vulnerable_functions)}") + if intel.vulnerable_variables: + lines.append(f"VULNERABLE_VARIABLES: {', '.join(intel.vulnerable_variables)}") + if intel.vulnerable_patterns: + lines.append("VULNERABLE_PATTERNS:") + for p in intel.vulnerable_patterns: + lines.append(f" - {p}") + if intel.fix_patterns: + lines.append("FIX_PATTERNS:") + for p in intel.fix_patterns: + lines.append(f" - {p}") + if intel.search_keywords: + lines.append(f"SEARCH_KEYWORDS: {', '.join(intel.search_keywords)}") + if intel.root_cause: + lines.append(f"ROOT_CAUSE: {intel.root_cause}") + return "\n".join(lines) + + CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. @@ -1362,17 +1388,11 @@ async def upstream_search_preprocess( CVE ID: {vuln_id} Target Package: {target_package} -CVE Description: {cve_description} - -Patch File: {patch_filename} -Files Modified: -{patch_files_summary} - -Key Changes: -{patch_hunks_summary} - + +{vulnerability_intel} + {tools} @@ -1724,14 +1744,14 @@ async def upstream_search_preprocess( L1_AGENT_PROMPT_TEMPLATE_NO_PATCH = """{sys_prompt} + CVE ID: {vuln_id} -TARGET PACKAGE: {target_package} - -CVE DESCRIPTION: -{cve_description} +Target Package: {target_package} + -SPEC CHANGELOG (indicates rebase fixed this CVE): -{spec_log_change} + +{vulnerability_intel} + {tools} diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index 6a1f4cff2..167918996 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -593,12 +593,14 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: compile_verdict = result.get("L2CompileVerdict") or None hardening_verdict = result.get("L2HardeningVerdict") or None hardening_reason = None + hardening_flags = [] if compile_verdict.compilation_status == "not_compiled": hardening_relevant = False l2_override_verdict = "not_vulnerable" else: hardening_relevant = True hardening_reason = hardening_verdict.reasoning + hardening_flags = hardening_verdict.hardening_flags or [] if hardening_verdict.hardening_status == "mitigated": l2_override_verdict = "vulnerable_mitigated" else: @@ -610,6 +612,7 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: compilation_confidence=compile_verdict.confidence, compilation_evidence=compile_verdict.reasoning, hardening_relevant=hardening_relevant, + hardening_flags=hardening_flags, hardening_rationale=hardening_reason, l2_override_verdict=l2_override_verdict, ) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index c81dfd248..6eaf19692 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -51,14 +51,11 @@ L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, L1_COMPREHENSION_PROMPT, - L1_COMPREHENSION_PROMPT_CVE_DESC, L1_MEMORY_UPDATE_PROMPT, - L1_MEMORY_UPDATE_PROMPT_CVE_DESC, - format_patch_files_summary, - format_patch_hunks_summary, VulnerabilityIntel, VULNERABILITY_INTEL_EXTRACTION_PROMPT, format_patch_data_for_intel, + format_vulnerability_intel_for_prompt, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError @@ -329,22 +326,11 @@ async def L1_agent(state: CodeAgentState) -> dict: # Use case 1: Downstream patch file is available if downstream_report and downstream_report.is_patch_file_available: - parsed_patch = downstream_report.parsed_patch - - patch_files_summary = format_patch_files_summary(parsed_patch) - patch_hunks_summary = format_patch_hunks_summary(parsed_patch) - - - patch_search_mode = "patch_patterns" - runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( sys_prompt=L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - patch_filename=downstream_report.patch_file_name, - patch_files_summary=patch_files_summary, - patch_hunks_summary=patch_hunks_summary, + vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel), tools=tools_str, tool_selection_strategy=tool_strategy, tool_instructions=L1_AGENT_THOUGHT_INSTRUCTIONS, @@ -353,26 +339,17 @@ async def L1_agent(state: CodeAgentState) -> dict: span.set_output({ "mode": "patch_available", "patch_filename": downstream_report.patch_file_name, - "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) # Use case 2: code is fixed by rebase elif upstream_report and upstream_report.is_code_fixed_by_rebase == "yes": if upstream_report.is_fixed_srpm_is_needed and upstream_report.fixed_parsed_patch: # Has patch context - use patch-based verification - parsed_patch = upstream_report.fixed_parsed_patch - patch_files_summary = format_patch_files_summary(parsed_patch) - patch_hunks_summary = format_patch_hunks_summary(parsed_patch) - patch_search_mode = "patch_patterns" - runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_FIX, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - patch_filename=upstream_report.spec_file_log_change[:100] if upstream_report.spec_file_log_change else "Rebase fix", - patch_files_summary=patch_files_summary, - patch_hunks_summary=patch_hunks_summary, + vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel), tools=tools_str, tool_selection_strategy=tool_strategy, tool_instructions=L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, @@ -381,18 +358,14 @@ async def L1_agent(state: CodeAgentState) -> dict: span.set_output({ "mode": "rebase_fix_verification", "spec_log_change": upstream_report.spec_file_log_change[:200] if upstream_report.spec_file_log_change else "", - "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) else: # No patch context - use CVE description-based verification - patch_search_mode = "cve_description" - runtime_prompt = L1_AGENT_PROMPT_TEMPLATE_NO_PATCH.format( sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - spec_log_change=upstream_report.spec_file_log_change or "", + vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel), tools=tools_str, tool_selection_strategy=tool_strategy, tool_instructions=L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, @@ -404,21 +377,11 @@ async def L1_agent(state: CodeAgentState) -> dict: }) # use case 3: in target patch was not found but patch is found in the rpm that was mention in cve that is fixed elif upstream_report and upstream_report.fixed_parsed_patch: - parsed_patch = upstream_report.fixed_parsed_patch - - patch_files_summary = format_patch_files_summary(parsed_patch) - patch_hunks_summary = format_patch_hunks_summary(parsed_patch) - - patch_search_mode = "patch_patterns" - runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - patch_filename=upstream_report.fixed_srpm_file_name, - patch_files_summary=patch_files_summary, - patch_hunks_summary=patch_hunks_summary, + vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel), tools=tools_str, tool_selection_strategy=tool_strategy, tool_instructions=L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, @@ -427,17 +390,17 @@ async def L1_agent(state: CodeAgentState) -> dict: span.set_output({ "mode": "upstream_patch_verification", "patch_filename": upstream_report.fixed_srpm_file_name, - "patch_files_count": len(parsed_patch.files) if parsed_patch else 0, }) else: - # Default prompt - no patch context, use CVE description - patch_search_mode = "cve_description" - runtime_prompt = ( - "You are a security analyst investigating a CVE.\n\n" - f"CVE ID: {vuln_id}\n" - f"Target Package: {target_package.name}\n" - f"CVE Description: {cve_description}\n\n" - "No downstream patch file is available. Use upstream analysis to determine vulnerability status." + # Use case 4: Default prompt - no patch context, use VulnerabilityIntel from CVE description + runtime_prompt = L1_AGENT_PROMPT_TEMPLATE_NO_PATCH.format( + sys_prompt=L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH, + vuln_id=vuln_id, + target_package=target_package.name, + vulnerability_intel=format_vulnerability_intel_for_prompt(vulnerability_intel), + tools=tools_str, + tool_selection_strategy=tool_strategy, + tool_instructions=L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, ) span.set_output({ "mode": "no_patch", @@ -448,8 +411,7 @@ async def L1_agent(state: CodeAgentState) -> dict: return { "runtime_prompt": runtime_prompt, - "patch_search_mode": patch_search_mode, - "cve_description": cve_description, + "vulnerability_intel": vulnerability_intel, "messages": remove_messages, } @@ -598,33 +560,19 @@ async def forced_finish_node(state: CodeAgentState) -> dict: span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num}) raise - def _extract_patch_patterns(state: CodeAgentState) -> tuple[str, str]: - """Extract vulnerable and fix patterns from parsed patch in state.""" - downstream_report = state.get("downstream_report") - upstream_report = state.get("upstream_report") - - parsed_patch = None - if downstream_report and downstream_report.parsed_patch: - parsed_patch = downstream_report.parsed_patch - elif upstream_report and upstream_report.fixed_parsed_patch: - parsed_patch = upstream_report.fixed_parsed_patch + def _extract_patterns_from_intel(state: CodeAgentState) -> tuple[str, str]: + """Extract vulnerable and fix patterns from VulnerabilityIntel in state.""" + vulnerability_intel = state.get("vulnerability_intel") - if not parsed_patch or not parsed_patch.files: - return "No patch available", "No patch available" + if not vulnerability_intel: + return "No patterns available", "No patterns available" - vulnerable_lines = [] - fix_lines = [] - for pf in parsed_patch.files: - for hunk in pf.hunks: - vulnerable_lines.extend(line.strip() for line in hunk.removed_lines[:3] if line.strip()) - fix_lines.extend(line.strip() for line in hunk.added_lines[:3] if line.strip()) - - vulnerable_patterns = "\n".join(vulnerable_lines[:10]) if vulnerable_lines else "No vulnerable code patterns identified" - fix_patterns = "\n".join(fix_lines[:10]) if fix_lines else "No fix code patterns identified" + vulnerable_patterns = "\n".join(vulnerability_intel.vulnerable_patterns) if vulnerability_intel.vulnerable_patterns else "No vulnerable code patterns identified" + fix_patterns = "\n".join(vulnerability_intel.fix_patterns) if vulnerability_intel.fix_patterns else "No fix code patterns identified" return vulnerable_patterns, fix_patterns async def observation_node(state: CodeAgentState) -> dict: - """Process tool output: comprehension -> memory update with patch context.""" + """Process tool output: comprehension -> memory update with VulnerabilityIntel context.""" logger.info("observation_node: starting") tool_message = state["messages"][-1] last_thought = state.get("thought") @@ -637,71 +585,35 @@ async def observation_node(state: CodeAgentState) -> dict: tool_input_detail = last_thought.actions.query previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."] - vulnerable_patterns, fix_patterns = _extract_patch_patterns(state) + vulnerable_patterns, fix_patterns = _extract_patterns_from_intel(state) target_package_name = target_package.name if target_package else "unknown" - patch_search_mode = state.get("patch_search_mode", "patch_patterns") with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: tool_output_for_llm = tool_message.content - if patch_search_mode == "cve_description": - # CVE-description based prompts (no patch patterns available) - cve_description = state.get("cve_description", "") - spec_log_change = "" - upstream_report = state.get("upstream_report") - if upstream_report: - spec_log_change = upstream_report.spec_file_log_change or "" - - # Step 1: Comprehension - extract findings using CVE description context - comp_prompt = L1_COMPREHENSION_PROMPT_CVE_DESC.format( - vuln_id=vuln_id, - target_package=target_package_name, - cve_description=cve_description, - spec_log_change=spec_log_change, - tool_used=tool_used, - tool_input=tool_input_detail, - last_thought=last_thought_text, - tool_output=tool_output_for_llm[:8000], - ) - code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) - findings_text = "\n".join(f"- {f}" for f in code_findings.findings) - - # Step 2: Memory update - merge findings with CVE-based tracking - mem_prompt = L1_MEMORY_UPDATE_PROMPT_CVE_DESC.format( - vuln_id=vuln_id, - target_package=target_package_name, - previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, - findings=findings_text, - tool_outcome=code_findings.tool_outcome, - ) - new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) - else: - # Patch-based prompts (default) - vulnerable_patterns, fix_patterns = _extract_patch_patterns(state) - - # Step 1: Comprehension - extract key findings from raw tool output - comp_prompt = L1_COMPREHENSION_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - vulnerable_patterns=vulnerable_patterns, - fix_patterns=fix_patterns, - tool_used=tool_used, - tool_input=tool_input_detail, - last_thought=last_thought_text, - tool_output=tool_output_for_llm[:8000], - ) - code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) - findings_text = "\n".join(f"- {f}" for f in code_findings.findings) + # Step 1: Comprehension - extract key findings from raw tool output + comp_prompt = L1_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + vulnerable_patterns=vulnerable_patterns, + fix_patterns=fix_patterns, + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=tool_output_for_llm[:8000], + ) + code_findings: CodeFindings = await comprehension_llm.ainvoke([SystemMessage(content=comp_prompt)]) + findings_text = "\n".join(f"- {f}" for f in code_findings.findings) - # Step 2: Memory update - merge findings into cumulative memory - mem_prompt = L1_MEMORY_UPDATE_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, - findings=findings_text, - tool_outcome=code_findings.tool_outcome, - ) - new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) + # Step 2: Memory update - merge findings into cumulative memory + mem_prompt = L1_MEMORY_UPDATE_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + previous_memory="\n".join(f"- {m}" for m in previous_memory) if isinstance(previous_memory, list) else previous_memory, + findings=findings_text, + tool_outcome=code_findings.tool_outcome, + ) + new_observation: Observation = await observation_llm.ainvoke([SystemMessage(content=mem_prompt)]) messages = state["messages"] active_prompt = state.get("runtime_prompt") From d62dff69b456c222c2523dd1244d09389202ed00 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sat, 9 May 2026 15:02:47 +0000 Subject: [PATCH 34/46] L2 build agent improvments --- .../data/hardening_kb/hardening_kb.json | 128 +++++++++- .../data_models/checker_status.py | 89 ++++++- src/exploit_iq_commons/utils/hardening_kb.py | 188 ++++++++++++++ .../functions/build_agent_graph_defs.py | 167 +++++++++---- .../functions/code_agent_graph_defs.py | 231 +++++++++++------- .../functions/cve_build_agent.py | 93 ++++--- .../functions/cve_checker_report.py | 13 +- .../functions/cve_package_code_agent.py | 55 +---- 8 files changed, 738 insertions(+), 226 deletions(-) create mode 100644 src/exploit_iq_commons/utils/hardening_kb.py diff --git a/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json index ab91b941e..92ab9a8f9 100644 --- a/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json +++ b/src/exploit_iq_commons/data/hardening_kb/hardening_kb.json @@ -1,9 +1,17 @@ { - "kb_version": "1.0", - "last_updated": "2026-05-05", + "kb_version": "1.1", + "last_updated": "2026-05-09", + "flag_type_definitions": { + "warning": "Compile-time warnings only. Does not add runtime protection. Use for 'Best Practices' audits, NOT for mitigation claims.", + "runtime": "Actual runtime protection or detection. Valid for 'Mitigated' status if maps to the specific CWE.", + "optimization": "Changes compiler optimization behavior but does not add runtime detection/prevention. Not valid for mitigation.", + "linker": "Linker-level hardening that affects runtime binary layout/behavior. Valid for mitigation if maps to CWE.", + "architecture": "Platform-specific build flag. Valid for mitigation ONLY if CVE advisory states the architecture is not affected." + }, "mappings": [ { "flag": "-Wall -Wextra", + "flag_type": "warning", "description": "Enable warnings for constructs often associated with defects.", "vulnerability_category": "Defensive Coding", "cwe_ids": [ @@ -15,6 +23,7 @@ }, { "flag": "-Wformat -Wformat=2", + "flag_type": "warning", "description": "Enable additional format function warnings.", "vulnerability_category": "Input Validation", "cwe_ids": [ @@ -24,6 +33,7 @@ }, { "flag": "-Wconversion -Wsign-conversion", + "flag_type": "warning", "description": "Enable implicit conversion warnings.", "vulnerability_category": "Arithmetic Safety", "cwe_ids": [ @@ -34,6 +44,7 @@ }, { "flag": "-Wtrampolines", + "flag_type": "warning", "description": "Enable warnings about trampolines that require executable stacks.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -43,6 +54,7 @@ }, { "flag": "-Wimplicit-fallthrough", + "flag_type": "warning", "description": "Warn when a switch case falls through.", "vulnerability_category": "Defensive Coding", "cwe_ids": [ @@ -52,6 +64,7 @@ }, { "flag": "-Wbidi-chars=any", + "flag_type": "warning", "description": "Enable warnings for possibly misleading Unicode bidirectional control characters.", "vulnerability_category": "Code Integrity", "cwe_ids": [ @@ -60,7 +73,8 @@ "requires": {} }, { - "flag": "-Werror ", + "flag": "-Werror", + "flag_type": "warning", "description": "Treat all or selected compiler warnings as errors.", "vulnerability_category": "Policy Enforcement", "cwe_ids": [ @@ -70,6 +84,7 @@ }, { "flag": "-Werror=format-security", + "flag_type": "warning", "description": "Treat format strings that are not string literals and used without arguments as errors.", "vulnerability_category": "Input Validation", "cwe_ids": [ @@ -78,7 +93,8 @@ "requires": {} }, { - "flag": "-Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion ", + "flag": "-Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion", + "flag_type": "warning", "description": "Treat obsolete C constructs as errors.", "vulnerability_category": "Type Safety", "cwe_ids": [ @@ -89,6 +105,7 @@ }, { "flag": "-D_FORTIFY_SOURCE=3", + "flag_type": "runtime", "description": "Fortify sources with compile- and run-time checks for unsafe libc usage and buffer overflows.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -99,8 +116,22 @@ ], "requires": {} }, + { + "flag": "-D_FORTIFY_SOURCE=2", + "flag_type": "runtime", + "description": "Fortify sources with compile- and run-time checks for unsafe libc usage and buffer overflows (legacy level).", + "vulnerability_category": "Memory Safety", + "cwe_ids": [ + "CWE-119", + "CWE-120", + "CWE-121", + "CWE-122" + ], + "requires": {} + }, { "flag": "-D_GLIBCXX_ASSERTIONS", + "flag_type": "runtime", "description": "Precondition checks for C++ standard library calls.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -112,6 +143,7 @@ }, { "flag": "-fstrict-flex-arrays=3", + "flag_type": "runtime", "description": "Consider a trailing array in a struct as a flexible array if declared as [].", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -123,6 +155,7 @@ }, { "flag": "-fstack-clash-protection", + "flag_type": "runtime", "description": "Enable run-time checks for variable-size stack allocation validity.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -132,6 +165,7 @@ }, { "flag": "-fstack-protector-strong", + "flag_type": "runtime", "description": "Enable run-time checks for stack-based buffer overflows.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -141,6 +175,7 @@ }, { "flag": "-fcf-protection=full", + "flag_type": "runtime", "description": "Enable control-flow protection against return-oriented programming (ROP) and jump-oriented programming (JOP) attacks on x86_64.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -150,6 +185,7 @@ }, { "flag": "-mbranch-protection=standard", + "flag_type": "runtime", "description": "Enable branch protection against ROP and JOP attacks on AArch64.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -157,8 +193,42 @@ ], "requires": {} }, + { + "flag": "-ftrapv", + "flag_type": "runtime", + "description": "Generate traps for signed arithmetic overflow on addition, subtraction, multiplication.", + "vulnerability_category": "Arithmetic Safety", + "cwe_ids": [ + "CWE-190", + "CWE-191" + ], + "requires": {} + }, + { + "flag": "-fsanitize=signed-integer-overflow", + "flag_type": "runtime", + "description": "Enable undefined behavior sanitizer for signed integer overflow detection.", + "vulnerability_category": "Arithmetic Safety", + "cwe_ids": [ + "CWE-190", + "CWE-191" + ], + "requires": {} + }, + { + "flag": "-fsanitize=unsigned-integer-overflow", + "flag_type": "runtime", + "description": "Enable undefined behavior sanitizer for unsigned integer overflow detection.", + "vulnerability_category": "Arithmetic Safety", + "cwe_ids": [ + "CWE-190", + "CWE-191" + ], + "requires": {} + }, { "flag": "-Wl,-z,nodlopen", + "flag_type": "linker", "description": "Restrict dlopen(3) calls to shared objects.", "vulnerability_category": "Policy Enforcement", "cwe_ids": [ @@ -168,6 +238,7 @@ }, { "flag": "-Wl,-z,noexecstack", + "flag_type": "linker", "description": "Enable data execution prevention by marking stack memory as non-executable.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -178,6 +249,7 @@ }, { "flag": "-Wl,-z,relro -Wl,-z,now", + "flag_type": "linker", "description": "Mark relocation table entries resolved at load-time as read-only.", "vulnerability_category": "Code Integrity", "cwe_ids": [ @@ -187,6 +259,7 @@ }, { "flag": "-fPIE -pie", + "flag_type": "linker", "description": "Build as position-independent executable.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -196,6 +269,7 @@ }, { "flag": "-fPIC -shared", + "flag_type": "linker", "description": "Build as position-independent code.", "vulnerability_category": "Control Flow Integrity", "cwe_ids": [ @@ -205,6 +279,7 @@ }, { "flag": "-fno-delete-null-pointer-checks", + "flag_type": "optimization", "description": "Force retention of null pointer checks.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -214,7 +289,8 @@ }, { "flag": "-fno-strict-overflow", - "description": "Define behavior for signed integer and pointer arithmetic overflows", + "flag_type": "optimization", + "description": "Do not assume signed integer overflow is undefined behavior. Prevents aggressive optimizations but does NOT add runtime detection.", "vulnerability_category": "Arithmetic Safety", "cwe_ids": [ "CWE-190" @@ -223,6 +299,7 @@ }, { "flag": "-fno-strict-aliasing", + "flag_type": "optimization", "description": "Do not assume strict aliasing.", "vulnerability_category": "Memory Safety", "cwe_ids": [ @@ -232,6 +309,7 @@ }, { "flag": "-ftrivial-auto-var-init", + "flag_type": "runtime", "description": "Initialize automatic variables that lack explicit initializers.", "vulnerability_category": "Information Leakage", "cwe_ids": [ @@ -241,6 +319,7 @@ }, { "flag": "-fexceptions", + "flag_type": "runtime", "description": "Enable exception propagation to harden multi-threaded C code.", "vulnerability_category": "Error Handling", "cwe_ids": [ @@ -250,6 +329,7 @@ }, { "flag": "-fhardened", + "flag_type": "runtime", "description": "Enable pre-determined set of hardening options in GCC.", "vulnerability_category": "Full Hardening", "cwe_ids": [ @@ -259,6 +339,7 @@ }, { "flag": "-Wl,--as-needed -Wl,--no-copy-dt-needed-entries", + "flag_type": "linker", "description": "Allow linker to omit libraries specified on the command line to link against if they are not used.", "vulnerability_category": "Supply Chain Safety", "cwe_ids": [ @@ -268,12 +349,47 @@ }, { "flag": "-fzero-init-padding-bits=all", + "flag_type": "runtime", "description": "Guarantee zero initialization of padding bits in all automatic variable initializers.", "vulnerability_category": "Information Leakage", "cwe_ids": [ "CWE-200" ], "requires": {} + }, + { + "flag": "-m64", + "flag_type": "architecture", + "description": "Compile for 64-bit x86_64 architecture. Many integer overflow vulnerabilities only affect 32-bit systems.", + "vulnerability_category": "Architecture", + "cwe_ids": [ + "CWE-190", + "CWE-680", + "CWE-681" + ], + "requires": { + "advisory_states": "Mitigation valid ONLY if CVE advisory explicitly states 64-bit systems are not affected." + } + }, + { + "flag": "-m32", + "flag_type": "architecture", + "description": "Compile for 32-bit i686 architecture.", + "vulnerability_category": "Architecture", + "cwe_ids": [], + "requires": { + "advisory_states": "Check CVE advisory for 32-bit specific vulnerabilities." + } + }, + { + "flag": "-march=", + "flag_type": "architecture", + "description": "Target specific CPU architecture. May affect vulnerability applicability.", + "vulnerability_category": "Architecture", + "cwe_ids": [], + "requires": { + "advisory_states": "Check CVE advisory for architecture-specific conditions." + } } ] -} \ No newline at end of file +} diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 2dbfc6f93..19b74f53c 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -65,6 +65,89 @@ class AcquiredArtifacts(BaseModel): patch_diff_path: Path | None = None +class VulnerabilityIntel(BaseModel): + """Structured intelligence extracted from CVE advisories and patches. + + Used to provide grep-ready patterns and context for L1 agent source searches. + """ + + affected_files: list[str] = Field( + default_factory=list, + description="Source file paths likely to contain vulnerable code" + ) + vulnerable_functions: list[str] = Field( + default_factory=list, + description="Function names that contain or handle the vulnerability" + ) + vulnerable_variables: list[str] = Field( + default_factory=list, + description="Variable names involved in the vulnerability" + ) + vulnerable_patterns: list[str] = Field( + default_factory=list, + description="Code patterns/snippets indicating vulnerable code (from - lines)" + ) + fix_patterns: list[str] = Field( + default_factory=list, + description="Code patterns/snippets indicating fixed code (from + lines)" + ) + root_cause: str = Field( + default="", + description="Technical explanation of why the code is vulnerable" + ) + vulnerability_type: str = Field( + default="", + description="Category: buffer_overflow, integer_overflow, use_after_free, null_deref, etc." + ) + search_keywords: list[str] = Field( + default_factory=list, + description="Recommended grep patterns ordered by specificity (most specific first)" + ) + affected_architectures: Literal["32-bit", "64-bit", "both"] = Field( + default="both", + description="Which CPU architectures are affected: 32-bit only, 64-bit only, or both (default)" + ) + + def format_for_prompt(self) -> str: + """Format VulnerabilityIntel for injection into L1 agent runtime prompt. + + Uses UPPERCASE labels so they can be referenced as anchors in thought prompts. + """ + lines = [] + if self.affected_files: + lines.append(f"AFFECTED_FILES: {', '.join(self.affected_files)}") + if self.vulnerable_functions: + lines.append(f"VULNERABLE_FUNCTIONS: {', '.join(self.vulnerable_functions)}") + if self.vulnerable_variables: + lines.append(f"VULNERABLE_VARIABLES: {', '.join(self.vulnerable_variables)}") + if self.vulnerable_patterns: + lines.append("VULNERABLE_PATTERNS:") + for p in self.vulnerable_patterns: + lines.append(f" - {p}") + if self.fix_patterns: + lines.append("FIX_PATTERNS:") + for p in self.fix_patterns: + lines.append(f" - {p}") + if self.search_keywords: + lines.append(f"SEARCH_KEYWORDS: {', '.join(self.search_keywords)}") + if self.root_cause: + lines.append(f"ROOT_CAUSE: {self.root_cause}") + if self.affected_architectures and self.affected_architectures != "both": + lines.append(f"AFFECTED_ARCHITECTURES: {self.affected_architectures}") + return "\n".join(lines) + + +def format_vulnerability_intel_for_prompt(intel: "VulnerabilityIntel") -> str: + """Format VulnerabilityIntel for injection into L1 agent runtime prompt. + + Uses UPPERCASE labels so they can be referenced as anchors in thought prompts. + + Note: This is a standalone function for backward compatibility. + Prefer using intel.format_for_prompt() directly. + """ + return intel.format_for_prompt() + + class L1InvestigationResult(BaseModel): """Intermediate result from L1 investigation, input to L2 or report generation.""" downstream_report: dict[str, Any] | None = Field( @@ -79,9 +162,9 @@ class L1InvestigationResult(BaseModel): default=None, description="Final answer from the L1 ReAct agent", ) - affected_files: list[str] = Field( - default_factory=list, - description="Source files identified as CVE-relevant by L1", + vulnerability_intel: VulnerabilityIntel | None = Field( + default=None, + description="Structured vulnerability intelligence extracted from CVE advisories and patches", ) preliminary_verdict: Literal["vulnerable", "protected", "not_present", "uncertain"] = Field( default="uncertain", diff --git a/src/exploit_iq_commons/utils/hardening_kb.py b/src/exploit_iq_commons/utils/hardening_kb.py new file mode 100644 index 000000000..16b609b18 --- /dev/null +++ b/src/exploit_iq_commons/utils/hardening_kb.py @@ -0,0 +1,188 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Hardening Knowledge Base utilities. + +Loads the hardening_kb.json file containing compiler/linker flags that mitigate +specific CWE vulnerability categories. Provides lookup by CWE ID to retrieve +relevant hardening flags and their descriptions for LLM context. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path + +from pydantic import BaseModel, Field + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory + +logger = LoggingFactory.get_agent_logger(__name__) + + +class HardeningEntry(BaseModel): + """A single hardening flag entry from the knowledge base.""" + + flag: str = Field(description="Compiler/linker flag(s) for hardening") + flag_type: str = Field(description="Type: runtime, linker, warning, optimization, architecture") + description: str = Field(description="Description of what the flag does") + vulnerability_category: str = Field(description="Category of vulnerability this mitigates") + cwe_ids: list[str] = Field(default_factory=list, description="CWE IDs this flag helps mitigate") + + +# Flag types that provide actual runtime mitigation (not just warnings or optimization changes) +MITIGATING_FLAG_TYPES = frozenset({"runtime", "linker"}) + + +class HardeningKB: + """In-memory cache for hardening flags knowledge base. + + Implements singleton pattern to ensure single instance across the application. + Provides lookup by CWE ID to find relevant hardening flags. + """ + + _instance = None + _lock = threading.Lock() + + def __new__(cls): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, json_path: str | Path | None = None) -> None: + if not hasattr(self, '_initialized'): + base_path = Path(__file__).resolve().parents[1] + default_json = base_path / "data" / "hardening_kb" / "hardening_kb.json" + self.json_path = Path(json_path) if json_path else default_json + + self._entries: list[HardeningEntry] = [] + self._cwe_index: dict[str, list[HardeningEntry]] = {} + self._initialized = True + self._load() + + @classmethod + def get_instance(cls) -> "HardeningKB": + """Get the singleton instance of HardeningKB.""" + return cls() + + def _load(self) -> None: + """Load the hardening KB JSON and build the CWE index.""" + try: + data = json.loads(self.json_path.read_text(encoding="utf-8")) + except FileNotFoundError: + logger.warning("Hardening KB JSON not found at %s", self.json_path) + return + except json.JSONDecodeError as exc: + logger.error("Failed to parse hardening KB JSON: %s", exc) + return + + mappings = data.get("mappings", []) + for mapping in mappings: + try: + entry = HardeningEntry( + flag=mapping.get("flag", "").strip(), + flag_type=mapping.get("flag_type", "unknown"), + description=mapping.get("description", ""), + vulnerability_category=mapping.get("vulnerability_category", ""), + cwe_ids=mapping.get("cwe_ids", []), + ) + self._entries.append(entry) + + for cwe_id in entry.cwe_ids: + normalized = self._normalize_cwe_id(cwe_id) + if normalized: + if normalized not in self._cwe_index: + self._cwe_index[normalized] = [] + self._cwe_index[normalized].append(entry) + + except Exception as exc: + logger.warning("Failed to parse hardening entry: %s - %s", mapping, exc) + + logger.info( + "Loaded hardening KB: %d entries, %d unique CWE mappings", + len(self._entries), + len(self._cwe_index), + ) + + @staticmethod + def _normalize_cwe_id(cwe_id: str) -> str | None: + """Normalize CWE ID to uppercase format (e.g., 'CWE-121'). + + Returns None for special values like 'N/A' or 'Multi'. + """ + if not cwe_id: + return None + cwe_id = cwe_id.strip().upper() + if cwe_id in ("N/A", "MULTI"): + return None + if not cwe_id.startswith("CWE-"): + cwe_id = f"CWE-{cwe_id}" + return cwe_id + + def lookup_by_cwe( + self, + cwe_id: str | None, + include_non_mitigating: bool = False, + ) -> list[HardeningEntry]: + """Return hardening entries that match the given CWE ID. + + By default, only returns flags that provide actual runtime mitigation + (flag_type: runtime, linker). Warning-only and optimization flags are + excluded since they don't mitigate vulnerabilities at runtime. + + Args: + cwe_id: The CWE identifier (e.g., 'CWE-121' or '121') + include_non_mitigating: If True, include warning/optimization flags + that don't provide runtime mitigation (for auditing purposes) + + Returns: + List of HardeningEntry objects that help mitigate this CWE + """ + if not cwe_id: + return [] + + normalized = self._normalize_cwe_id(cwe_id) + if not normalized: + return [] + + entries = self._cwe_index.get(normalized, []) + + if not include_non_mitigating: + entries = [e for e in entries if e.flag_type in MITIGATING_FLAG_TYPES] + + logger.debug( + "HardeningKB lookup for %s: found %d entries (include_non_mitigating=%s)", + normalized, + len(entries), + include_non_mitigating, + ) + return list(entries) + + def get_all_entries(self) -> list[HardeningEntry]: + """Return all hardening entries in the knowledge base.""" + return list(self._entries) + + @property + def kb_version(self) -> str | None: + """Return the version of the loaded knowledge base.""" + try: + data = json.loads(self.json_path.read_text(encoding="utf-8")) + return data.get("kb_version") + except Exception: + return None diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index 2e350e76f..0a41a5645 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -54,7 +54,7 @@ class BuildHarvestReport(BaseModel): - Hardening flags relevant to the CVE's CWE class Note: Compiled files are NOT pre-extracted. The LLM searches the build log - for affected files from l1_result.affected_files during the ReAct loop. + for affected files from l1_result.vulnerability_intel.affected_files during the ReAct loop. """ disabled_features: list[str] = Field( @@ -69,6 +69,10 @@ class BuildHarvestReport(BaseModel): default_factory=list, description="Hardening flags relevant to the CVE's CWE, with descriptions for LLM context", ) + build_architecture: Literal["32-bit", "64-bit", "unknown"] = Field( + default="unknown", + description="Target architecture from -m64/-m32 flags or build target (x86_64/i686)", + ) @@ -104,7 +108,7 @@ class BuildAgentState(MessagesState): """LangGraph state for the L2 Build Agent.""" harvest_report: NotRequired[BuildHarvestReport | None] - l1_affected_files: NotRequired[list[str]] + vulnerability_intel_str: NotRequired[str | None] l1_preliminary_verdict: NotRequired[str | None] runtime_prompt: NotRequired[str | None] thought: NotRequired[CheckerThought | None] @@ -205,6 +209,16 @@ async def harvest_build_data( from exploit_iq_commons.utils.hardening_kb import HardeningKB from vuln_analysis.tools.source_inspector import SourceInspector + # Handle case where build_log_path is a directory instead of a file + if build_log_path and build_log_path.is_dir(): + log_files = list(build_log_path.glob("*-build.log")) or list(build_log_path.glob("*.log")) + if log_files: + build_log_path = log_files[0] + logger.info("harvest_build_data: resolved build log directory to file: %s", build_log_path) + else: + logger.warning("harvest_build_data: build_log_path is a directory but no .log files found") + build_log_path = None + # Lookup expected hardening flags from KB based on CWE expected_hardening = [] if cwe_id: @@ -250,6 +264,31 @@ async def harvest_build_data( len(disabled_features), ) + # Detect build architecture from build log + build_architecture: Literal["32-bit", "64-bit", "unknown"] = "unknown" + if build_log_path: + try: + build_log_content = build_log_path.read_text(encoding="utf-8", errors="replace") + + # Check for explicit -m64 or -m32 flags (most reliable) + if re.search(r"\s-m64\b", build_log_content): + build_architecture = "64-bit" + elif re.search(r"\s-m32\b", build_log_content): + build_architecture = "32-bit" + # Check for build target patterns (e.g., "Building for target x86_64") + elif re.search(r"target\s+x86_64|x86_64-\w+-linux", build_log_content, re.IGNORECASE): + build_architecture = "64-bit" + elif re.search(r"target\s+i[3-6]86|i[3-6]86-\w+-linux", build_log_content, re.IGNORECASE): + build_architecture = "32-bit" + + if build_architecture != "unknown": + logger.info( + "harvest_build_data: detected build architecture: %s", + build_architecture, + ) + except OSError as e: + logger.warning("harvest_build_data: failed to read build log for arch detection: %s", e) + # Extract %build section and features from spec file spec_build_section = "" spec_disabled_features: list[str] = [] @@ -267,6 +306,7 @@ async def harvest_build_data( disabled_features=disabled_features, spec_disabled_features=spec_disabled_features, expected_hardening=expected_hardening, + build_architecture=build_architecture, ) @@ -282,13 +322,12 @@ async def harvest_build_data( "You are an L2 Build Agent investigating whether VULNERABLE CODE is DISABLED at build time.\n\n" "GOAL: Determine if the CVE-affected feature/component is compiled into the binary.\n\n" "EVIDENCE SOURCES:\n" - "1. Pre-harvested disabled features from build log (-D defines like OPENSSL_NO_SM2)\n" - "2. Pre-harvested disabled features from spec file (no-sm2, --disable-*, --without-*)\n" - "3. Build log (searchable) - verify affected source files were compiled\n\n" + "1. BUILD_HARVEST section below - disabled features ALREADY extracted (no tool call needed)\n" + "2. Build log (searchable with 'logs:' prefix) - verify affected source files were compiled\n\n" "INVESTIGATION STEPS:\n" - "1. Identify the CVE-affected component/feature from the description\n" - "2. Check if that feature appears in disabled_features or spec_disabled_features\n" - "3. Search build log to verify affected files from L1 were actually compiled\n\n" + "1. FIRST: Check BUILD_HARVEST below - disabled features are ALREADY extracted (no tool needed)\n" + "2. If feature IS in disabled_features or spec_disabled_features -> verdict NOT_COMPILED immediately\n" + "3. If feature NOT disabled (or lists empty), search build log with 'logs:' prefix to verify compilation\n\n" "VERDICTS:\n" "- NOT_COMPILED: Feature is disabled OR affected files not in build log\n" "- COMPILED: Feature is enabled AND affected files are compiled\n" @@ -300,22 +339,26 @@ async def harvest_build_data( CVE ID: {vuln_id} Target Package: {target_package} -CVE Description: {cve_description} - -Affected Files (from L1 code analysis): -{l1_affected_files} + +{vulnerability_intel} L1 Preliminary Verdict: {l1_preliminary_verdict} - + +** CHECK THESE FIRST - No tool call needed! ** + Disabled Features (from build log -D defines): {disabled_features} Disabled Features (from spec configure flags): {spec_disabled_features} + +DECISION GUIDE: +- If CVE-affected feature appears above -> verdict NOT_COMPILED (no tool needed) +- If lists are empty or feature not listed -> search build log with 'logs:' prefix @@ -340,18 +383,24 @@ async def harvest_build_data( 1. You MUST select a tool ONLY from . Do NOT invent tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. FIRST check if CVE-affected feature appears in disabled_features or spec_disabled_features. -5. If feature not clearly disabled, search build log for affected files from L1. -6. Do NOT call the same tool with the same input twice. +4. BUILD_HARVEST contains pre-extracted disabled features - CHECK IT FIRST (no grep needed). +5. If feature in BUILD_HARVEST -> finish with NOT_COMPILED verdict immediately. +6. If feature NOT in BUILD_HARVEST, search build log using 'logs:' prefix (e.g., 'logs:filename.c'). +7. NEVER grep source code - use 'logs:' prefix to search build logs for compilation evidence. +8. Do NOT call the same tool with the same input twice. - -{{"thought": "CVE affects SM2 crypto. Checking if sm2 appears in disabled features list.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "sm2", "reason": "Search for SM2 in disabled features context"}}, "final_answer": null}} - + +{{"thought": "CVE affects SM2 crypto. BUILD_HARVEST shows 'sm2' in spec_disabled_features. SM2 is disabled at build time.", "mode": "finish", "actions": null, "final_answer": "NOT_COMPILED. The spec file configures with 'no-sm2' flag, which disables SM2 cryptographic functions. The vulnerable code in crypto/sm2/ is not compiled into the binary."}} + - -{{"thought": "SM2 not in disabled features. Now verify affected file crypto/sm2/sm2.c was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "sm2.c", "reason": "Check if affected source file appears in build log"}}, "final_answer": null}} - + +{{"thought": "CVE affects zisofs. BUILD_HARVEST disabled features are empty - zisofs not disabled. Need to verify affected file was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:archive_read_support_format_iso9660", "reason": "Check if affected file appears in build compilation log"}}, "final_answer": null}} + + + +{{"thought": "Feature not disabled. Now verify affected file crypto/sm2/sm2.c was compiled.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:sm2.c", "reason": "Check if affected source file appears in build log"}}, "final_answer": null}} + {{"thought": "Found no-sm2 in spec_disabled_features. SM2 code is not compiled.", "mode": "finish", "actions": null, "final_answer": "NOT_COMPILED. The spec file configures with 'no-sm2' flag, which disables SM2 cryptographic functions. The vulnerable code in crypto/sm2/ is not compiled into the binary."}} @@ -375,9 +424,18 @@ async def harvest_build_data( "GOAL: Determine if hardening flags relevant to this CVE's vulnerability class are present.\n\n" "CONTEXT: Investigation 1 determined the vulnerable code IS compiled. Now check if\n" "compiler/linker hardening makes exploitation significantly harder.\n\n" + "CRITICAL - CWE-SPECIFIC MATCHING:\n" + "- ONLY flags listed in EXPECTED_HARDENING can justify a MITIGATED verdict\n" + "- General hardening flags (stack protector, FORTIFY_SOURCE) do NOT mitigate all CWEs\n" + "- Example: -fstack-protector helps CWE-121 (stack overflow), NOT CWE-190 (integer overflow)\n" + "- You MUST match the EXACT flags from EXPECTED_HARDENING table to the build output\n\n" "EVIDENCE SOURCES:\n" - "1. Expected hardening flags (from CWE-based knowledge base)\n" - "2. Build log (searchable) - verify presence of hardening flags\n\n" + "1. EXPECTED_HARDENING table (CWE-specific flags from knowledge base) - THIS IS YOUR CHECKLIST\n" + "2. Build log (searchable with 'logs:' prefix) - contains CFLAGS/CXXFLAGS/LDFLAGS definitions\n\n" + "EFFICIENT SEARCH STRATEGY:\n" + "- Search 'logs:FLAGS=' to get ALL compiler/linker flags in ONE call (matches CFLAGS=, LDFLAGS=, etc.)\n" + "- Grep supports regex OR: 'logs:CFLAGS\\|LDFLAGS' combines patterns\n" + "- Analyze the output to check for expected hardening flags - avoid multiple individual searches\n\n" "IMPORTANT - RHEL/Fedora Specs Files:\n" "When you see these specs files in build logs, hardening flags are IMPLICITLY enabled:\n" "- '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' => -fPIE (position-independent code for ASLR)\n" @@ -385,13 +443,14 @@ async def harvest_build_data( "These flags will NOT appear explicitly in the build log - the specs file injects them.\n" "If you see these specs files, count the corresponding protections as PRESENT.\n\n" "INVESTIGATION STEPS:\n" - "1. Review expected_hardening flags for this CVE's CWE class\n" - "2. Search build log for each expected hardening flag OR corresponding specs file\n" - "3. Determine if critical mitigations are present\n\n" + "1. Review EXPECTED_HARDENING table - these are the ONLY flags that matter for this CWE\n" + "2. Search 'logs:FLAGS=' to get all compiler/linker flag definitions at once\n" + "3. For EACH flag in EXPECTED_HARDENING, check if present in build output\n" + "4. Verdict based ONLY on EXPECTED_HARDENING flags (ignore unrelated hardening)\n\n" "VERDICTS:\n" - "- MITIGATED: Key hardening flags present that reduce exploitability\n" - "- NOT_MITIGATED: Hardening flags absent, vulnerability fully exploitable\n" - "- UNKNOWN: Cannot determine hardening status from available evidence" + "- MITIGATED: One or more flags from EXPECTED_HARDENING are present in build\n" + "- NOT_MITIGATED: NONE of the EXPECTED_HARDENING flags are present (even if other hardening exists)\n" + "- UNKNOWN: Cannot determine from available evidence" ) L2_HARDENING_PROMPT_TEMPLATE = """{sys_prompt} @@ -430,29 +489,36 @@ async def harvest_build_data( 1. You MUST select a tool ONLY from . Do NOT invent tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. Search build log for EACH expected hardening flag from the table. -5. Focus on flags most relevant to the CWE (e.g., stack protector for CWE-121). -6. Do NOT call the same tool with the same input twice. +4. EFFICIENT SEARCH: First search for 'logs:FLAGS=' to get ALL compiler/linker flags in one call. +5. Grep supports regex OR: use 'logs:CFLAGS\\|LDFLAGS' to combine patterns. +6. Analyze the FLAGS output to check for expected hardening flags - avoid multiple individual searches. +7. Do NOT call the same tool with the same input twice. +8. STRICT MATCHING: Only flags from EXPECTED_HARDENING justify MITIGATED verdict. +9. IGNORE UNRELATED HARDENING: -fstack-protector, -D_FORTIFY_SOURCE do NOT mitigate all CWEs. Check the table! - -{{"thought": "CWE-121 stack overflow. Searching for -fstack-protector in build log.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "-fstack-protector", "reason": "Check for stack protector hardening"}}, "final_answer": null}} - + +{{"thought": "Need to check hardening flags for this CWE. Search for all compiler/linker flag definitions at once.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "logs:FLAGS=", "reason": "Get all CFLAGS/CXXFLAGS/LDFLAGS definitions in one search"}}, "final_answer": null}} + + + +{{"thought": "CWE-121 stack overflow. EXPECTED_HARDENING lists -fstack-protector-strong. Found -fstack-protector-strong in CFLAGS. This matches EXPECTED_HARDENING.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Found -fstack-protector-strong which is listed in EXPECTED_HARDENING for CWE-121. Stack buffer overflows will be detected at runtime."}} + - -{{"thought": "Found -fstack-protector-strong. Now check for FORTIFY_SOURCE.", "mode": "act", "actions": {{"tool": "Source Grep", "query": "FORTIFY_SOURCE", "reason": "Check for FORTIFY_SOURCE buffer overflow protection"}}, "final_answer": null}} - + +{{"thought": "CWE-190 integer overflow. EXPECTED_HARDENING lists -ftrapv, -fsanitize=signed-integer-overflow. Found -fstack-protector-strong and -D_FORTIFY_SOURCE=2, but these are for memory safety NOT integer overflow. Checking for expected flags: -ftrapv MISSING, -fsanitize=signed-integer-overflow MISSING.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. Build has general hardening (-fstack-protector-strong, -D_FORTIFY_SOURCE=2) but NONE of the CWE-190 specific flags from EXPECTED_HARDENING (-ftrapv, -fsanitize=signed-integer-overflow) are present. Integer overflow is not mitigated."}} + - -{{"thought": "Found both stack protector and FORTIFY_SOURCE. Critical mitigations present.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Build log shows '-fstack-protector-strong' and '-D_FORTIFY_SOURCE=2'. These flags detect stack buffer overflows at runtime, making exploitation of CWE-121 significantly harder."}} - + +{{"thought": "CWE-693 control flow. EXPECTED_HARDENING lists PIE. Found -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 which implies -fPIE. This matches EXPECTED_HARDENING.", "mode": "finish", "actions": null, "final_answer": "MITIGATED. Build uses RHEL hardened specs files which implicitly enable PIE (listed in EXPECTED_HARDENING for CWE-693). Control flow exploitation is significantly harder."}} + -{{"thought": "No stack protector found. Vulnerability is fully exploitable.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. No stack protection flags found in build log. The CWE-121 stack overflow vulnerability can be exploited without runtime detection."}} +{{"thought": "CWE-190 integer overflow. EXPECTED_HARDENING lists -ftrapv, -fsanitize=*-integer-overflow. None found in FLAGS output.", "mode": "finish", "actions": null, "final_answer": "NOT_MITIGATED. None of the EXPECTED_HARDENING flags for CWE-190 (-ftrapv, -fsanitize=signed-integer-overflow, -fsanitize=unsigned-integer-overflow) are present in the build."}} -{{"thought": "Build log incomplete. Cannot verify hardening status.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. Build log does not contain compilation commands with flag details. Cannot determine if hardening mitigations are present."}} +{{"thought": "Build log does not contain FLAGS= definitions. Cannot determine if EXPECTED_HARDENING flags are present.", "mode": "finish", "actions": null, "final_answer": "UNKNOWN. Build log does not contain CFLAGS/LDFLAGS definitions. Cannot determine if EXPECTED_HARDENING mitigations are present."}} """ @@ -500,11 +566,14 @@ async def harvest_build_data( L2_COMPREHENSION_PROMPT = """Analyze the tool output for L2 build/compilation verification. GOAL: Determine whether {vuln_id} vulnerable code is COMPILED in {target_package} - -L1 Affected Files: {l1_affected_files} + +{vulnerability_intel} + + + Disabled Features (build log): {disabled_features} Disabled Features (spec file): {spec_disabled_features} - + TOOL USED: {tool_used} TOOL INPUT: {tool_input} @@ -514,9 +583,9 @@ async def harvest_build_data( BUILD ANALYSIS RULES: 1. CHECK if tool output shows: - - Compilation commands for affected files (e.g., gcc -c file.c -o file.o) + - Compilation commands for AFFECTED_FILES (e.g., gcc -c file.c -o file.o) - Feature-disable flags that match the CVE-affected component - - Object files or compilation artifacts for affected code + - Object files or compilation artifacts for VULNERABLE_FUNCTIONS 2. COMPILATION EVIDENCE: - COMPILED: Found gcc/compile commands for affected files diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 4aebbf1bf..76d92bc6d 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -40,6 +40,7 @@ logger = logging.getLogger(__name__) +from exploit_iq_commons.data_models.checker_status import VulnerabilityIntel from vuln_analysis.functions.react_internals import CheckerThought, Observation, L1VerdictExtraction # --------------------------------------------------------------------------- # Graph state @@ -162,51 +163,6 @@ class ParsedPatch(BaseModel): files: list[PatchFile] -# --------------------------------------------------------------------------- -# Vulnerability Intelligence schema -# --------------------------------------------------------------------------- - - -class VulnerabilityIntel(BaseModel): - """Structured intelligence extracted from CVE advisories and patches. - - Used to provide grep-ready patterns and context for L1 agent source searches. - """ - - affected_files: list[str] = Field( - default_factory=list, - description="Source file paths likely to contain vulnerable code" - ) - vulnerable_functions: list[str] = Field( - default_factory=list, - description="Function names that contain or handle the vulnerability" - ) - vulnerable_variables: list[str] = Field( - default_factory=list, - description="Variable names involved in the vulnerability" - ) - vulnerable_patterns: list[str] = Field( - default_factory=list, - description="Code patterns/snippets indicating vulnerable code (from - lines)" - ) - fix_patterns: list[str] = Field( - default_factory=list, - description="Code patterns/snippets indicating fixed code (from + lines)" - ) - root_cause: str = Field( - default="", - description="Technical explanation of why the code is vulnerable" - ) - vulnerability_type: str = Field( - default="", - description="Category: buffer_overflow, integer_overflow, use_after_free, null_deref, etc." - ) - search_keywords: list[str] = Field( - default_factory=list, - description="Recommended grep patterns ordered by specificity (most specific first)" - ) - - # --------------------------------------------------------------------------- # Code Agent Report schema # --------------------------------------------------------------------------- @@ -376,6 +332,11 @@ def to_markdown( 8. search_keywords: List 3-5 grep patterns ordered by specificity: - Start with most specific (unique variable/function names) - End with broader patterns (file names, component names) +9. affected_architectures: Determine which CPU architectures are affected: + - "32-bit": Only 32-bit systems affected (look for phrases like "32-bit systems", "i386", "i686", "on 32-bit", "64-bit systems are not affected") + - "64-bit": Only 64-bit systems affected (rare, look for "64-bit only", "x86_64 only") + - "both": Both architectures affected (DEFAULT - use when not explicitly stated otherwise) + NOTE: Integer overflow vulnerabilities (CWE-190, CWE-680, CWE-681) often only affect 32-bit due to smaller integer sizes. @@ -427,33 +388,6 @@ def format_patch_data_for_intel( return "\n".join(lines) -def format_vulnerability_intel_for_prompt(intel: VulnerabilityIntel) -> str: - """Format VulnerabilityIntel for injection into L1 agent runtime prompt. - - Uses UPPERCASE labels so they can be referenced as anchors in thought prompts. - """ - lines = [] - if intel.affected_files: - lines.append(f"AFFECTED_FILES: {', '.join(intel.affected_files)}") - if intel.vulnerable_functions: - lines.append(f"VULNERABLE_FUNCTIONS: {', '.join(intel.vulnerable_functions)}") - if intel.vulnerable_variables: - lines.append(f"VULNERABLE_VARIABLES: {', '.join(intel.vulnerable_variables)}") - if intel.vulnerable_patterns: - lines.append("VULNERABLE_PATTERNS:") - for p in intel.vulnerable_patterns: - lines.append(f" - {p}") - if intel.fix_patterns: - lines.append("FIX_PATTERNS:") - for p in intel.fix_patterns: - lines.append(f" - {p}") - if intel.search_keywords: - lines.append(f"SEARCH_KEYWORDS: {', '.join(intel.search_keywords)}") - if intel.root_cause: - lines.append(f"ROOT_CAUSE: {intel.root_cause}") - return "\n".join(lines) - - CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final L1 Code Agent investigation report. @@ -1078,6 +1012,24 @@ def _extract_spec_changelog(inspector, spec_path: Path) -> str | None: return content[idx + len("%changelog"):] +_BINARY_FILE_EXTENSIONS = frozenset({ + '.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2', + '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp', + '.pdf', '.doc', '.docx', '.xls', '.xlsx', + '.exe', '.dll', '.so', '.dylib', '.a', '.o', '.obj', + '.pyc', '.pyo', '.class', '.jar', '.war', + '.woff', '.woff2', '.ttf', '.otf', '.eot', + '.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', + '.db', '.sqlite', '.sqlite3', +}) + + +def _is_binary_file_path(path: str) -> bool: + """Check if file path has a binary file extension.""" + path_lower = path.lower() + return any(path_lower.endswith(ext) for ext in _BINARY_FILE_EXTENSIONS) + + def parse_patch_file(patch_path: Path) -> ParsedPatch | None: """Parse a downstream .patch file into structured data. @@ -1094,6 +1046,8 @@ def parse_patch_file(patch_path: Path) -> ParsedPatch | None: for patched_file in patch_set: if patched_file.is_binary_file: continue + if _is_binary_file_path(patched_file.target_file): + continue hunks: list[PatchHunk] = [] for hunk in patched_file: @@ -1416,11 +1370,33 @@ async def upstream_search_preprocess( 4. Your next action MUST build on findings - progress the investigation + +PHASE 1 - INTELLIGENCE (PRE-COMPLETED): + Review VULNERABILITY_INTEL above. It contains: + - AFFECTED_FILES: Files to verify + - VULNERABLE_FUNCTIONS: Functions to search for + - VULNERABLE_PATTERNS: Code patterns indicating vulnerability + - FIX_PATTERNS: Code patterns indicating the fix + - SEARCH_KEYWORDS: Terms to grep for + +PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): + For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES: + 1. Search for vulnerable pattern - it should exist if unpatched + 2. Search for fix pattern - it should be absent if unpatched + IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. + +PHASE 3 - VERDICT: + Only conclude when: + - ALL AFFECTED_FILES have been searched + - ALL VULNERABLE_FUNCTIONS have been located + - Evidence is sufficient for confident verdict + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. 6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. 7. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. @@ -1434,7 +1410,7 @@ async def upstream_search_preprocess( If a pattern wasn't found: - Try simpler substrings or partial patterns - Try a different tool (Source Grep <-> Code Keyword Search) -- Search for file paths from PATCH_CONTEXT +- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES @@ -1462,15 +1438,37 @@ async def upstream_search_preprocess( 4. Your next action MUST build on findings - progress the investigation + +PHASE 1 - INTELLIGENCE (PRE-COMPLETED): + Review VULNERABILITY_INTEL above. It contains: + - AFFECTED_FILES: Files to verify + - VULNERABLE_FUNCTIONS: Functions to search for + - VULNERABLE_PATTERNS: Code patterns indicating vulnerability + - FIX_PATTERNS: Code patterns indicating the fix + - SEARCH_KEYWORDS: Terms to grep for + +PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): + For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES: + 1. Search for vulnerable pattern - it SHOULD exist in unpatched target + 2. Search for fix pattern - it should NOT exist in unpatched target + IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. + +PHASE 3 - VERDICT: + Only conclude when: + - ALL AFFECTED_FILES have been searched + - ALL VULNERABLE_FUNCTIONS have been located + - Evidence is sufficient for confident verdict + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. 6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. -7. FIRST search for VULNERABLE code (removed lines) - it SHOULD exist in target. -8. THEN search for FIX code (added lines) - it should NOT exist in target. +7. FIRST search for VULNERABLE code - it SHOULD exist in target. +8. THEN search for FIX code - it should NOT exist in target. 9. If a pattern contains special regex characters, escape them or use literal substrings. @@ -1481,9 +1479,9 @@ async def upstream_search_preprocess( If a pattern wasn't found: - Try simpler substrings or partial patterns - Try a different tool (Source Grep <-> Code Keyword Search) -- Search for file paths from PATCH_CONTEXT +- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES If KNOWLEDGE shows partial evidence: -- Investigate other files mentioned in PATCH_CONTEXT +- Investigate other files mentioned in VULNERABILITY_INTEL AFFECTED_FILES - Search for key variables from the fix pattern @@ -1515,15 +1513,37 @@ async def upstream_search_preprocess( 4. Your next action MUST build on findings - progress the investigation + +PHASE 1 - INTELLIGENCE (PRE-COMPLETED): + Review VULNERABILITY_INTEL above. It contains: + - AFFECTED_FILES: Files to verify + - VULNERABLE_FUNCTIONS: Functions to search for + - VULNERABLE_PATTERNS: Code patterns indicating vulnerability + - FIX_PATTERNS: Code patterns indicating the fix + - SEARCH_KEYWORDS: Terms to grep for + +PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): + For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES: + 1. Search for fix pattern - it SHOULD exist in rebased target + 2. Verify vulnerable pattern is ABSENT from target + IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. + +PHASE 3 - VERDICT: + Only conclude when: + - ALL AFFECTED_FILES have been searched + - ALL VULNERABLE_FUNCTIONS have been located + - Evidence is sufficient for confident verdict + + 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. Source Grep: use query field with pattern from patch (function name, variable, or code snippet). +4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. 6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. -7. FIRST search for FIX code (added lines) - it SHOULD exist in rebased target. -8. THEN verify VULNERABLE code (removed lines) is ABSENT from target. +7. FIRST search for FIX code - it SHOULD exist in rebased target. +8. THEN verify VULNERABLE code is ABSENT from target. 9. If a pattern contains special regex characters, escape them or use literal substrings. @@ -1534,9 +1554,9 @@ async def upstream_search_preprocess( If a pattern wasn't found: - Try simpler substrings or partial patterns - Try a different tool (Source Grep <-> Code Keyword Search) -- Search for file paths from PATCH_CONTEXT +- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES If KNOWLEDGE shows partial evidence: -- Investigate other files mentioned in PATCH_CONTEXT +- Investigate other files mentioned in VULNERABILITY_INTEL AFFECTED_FILES - Search for key variables from the fix pattern @@ -1573,9 +1593,9 @@ async def upstream_search_preprocess( - tool_outcome MUST be: "{tool_used} [{tool_input}] -> NO MATCHES" - DO NOT fabricate, infer, or assume any results. STOP HERE. -PATCH CONTEXT: -- Vulnerable code patterns (removed lines): {vulnerable_patterns} -- Fix code patterns (added lines): {fix_patterns} + +{vulnerability_intel} + TOOL USED: {tool_used} TOOL INPUT: {tool_input} @@ -1590,13 +1610,21 @@ async def upstream_search_preprocess( 4. The tool_outcome MUST accurately reflect what NEW OUTPUT shows, not what you expect. CODE ANALYSIS RULES (only if NEW OUTPUT has content): -1. READ the actual code snippets in NEW OUTPUT. Compare against PATCH CONTEXT patterns. +1. READ the actual code snippets in NEW OUTPUT. Compare against VULNERABLE_PATTERNS and FIX_PATTERNS. 2. For each match found: - Quote the actual line from NEW OUTPUT - State the file:line where it was found - Determine if it matches VULNERABLE or FIX pattern 3. RECORD file paths and line numbers for all relevant matches. + +Based on VULNERABILITY_INTEL above, assess investigation completeness: +- Have you searched in ALL files listed in AFFECTED_FILES? +- Have you found ALL instances of VULNERABLE_FUNCTIONS? +- Are there OTHER files containing the same vulnerable pattern? +If coverage is incomplete, note which files/functions remain unchecked. + + OUTPUT RULES: - findings: 2-4 observations. Each positive finding MUST quote actual content from NEW OUTPUT. - tool_outcome: "{tool_used} [pattern] -> found in file.c:123" OR "{tool_used} [pattern] -> NO MATCHES" @@ -1770,9 +1798,32 @@ async def upstream_search_preprocess( 4. Your next action MUST build on findings - progress the investigation + +PHASE 1 - INTELLIGENCE (PRE-COMPLETED): + Review VULNERABILITY_INTEL above. It contains: + - AFFECTED_FILES: Files to verify (may be inferred from CVE description) + - VULNERABLE_FUNCTIONS: Functions to search for + - VULNERABLE_PATTERNS: Code patterns indicating vulnerability + - FIX_PATTERNS: Code patterns indicating the fix + - SEARCH_KEYWORDS: Terms to grep for + - ROOT_CAUSE: Description of the vulnerability mechanism + +PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): + For EACH item in VULNERABLE_FUNCTIONS and SEARCH_KEYWORDS: + 1. Search for vulnerable code patterns + 2. Search for defensive/fix patterns (bounds checks, validation, etc.) + IMPORTANT: Do NOT stop after finding the first file. Check ALL potential locations. + +PHASE 3 - VERDICT: + Only conclude when: + - Key files have been searched + - Vulnerable functions have been located + - Evidence is sufficient for confident verdict + + RESPONSE FORMAT (JSON): You must respond with a JSON object with these fields: -- thought: Your reasoning based on KNOWLEDGE and CVE description (reference what was already found) +- thought: Your reasoning based on KNOWLEDGE and VULNERABILITY_INTEL (reference what was already found) - mode: "act" (to use a tool) or "finish" (to provide final answer) - actions: (only if mode="act") {{"tool": "Tool Name", "query": "search term", "reason": "why this search"}} - final_answer: (only if mode="finish") Your conclusion about patch status @@ -1790,7 +1841,7 @@ async def upstream_search_preprocess( If a pattern wasn't found: - Try simpler substrings or partial patterns - Try a different tool (Source Grep <-> Code Keyword Search) -- Search for related symbols from CVE description +- Search for SEARCH_KEYWORDS from VULNERABILITY_INTEL diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index 167918996..8f8c7e709 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -32,7 +32,10 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id -from exploit_iq_commons.data_models.checker_status import L2BuildResult +from exploit_iq_commons.data_models.checker_status import ( + L2BuildResult, + format_vulnerability_intel_for_prompt, +) from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode @@ -80,7 +83,7 @@ class CVEBuildAgentConfig(FunctionBaseConfig, name="cve_build_agent"): description="Root directory for checker-specific artifacts.", ) max_iterations: int = Field( - default=10, + default=5, description="The maximum number of iterations for the agent.", ) llm_name: str = Field(description="The LLM model to use with the L1 code agent.") @@ -157,8 +160,9 @@ async def create_graph_build_agent( checker_dir = Path(config.base_checker_dir) / source_key build_log_path = Path(artifacts.build_log_path) - # L1 results - l1_affected_files = l1_result.affected_files + # L1 results - use full VulnerabilityIntel for richer context + vulnerability_intel = l1_result.vulnerability_intel + vulnerability_intel_str = format_vulnerability_intel_for_prompt(vulnerability_intel) if vulnerability_intel else "No intel available" l1_preliminary_verdict = l1_result.preliminary_verdict # Extract CWE ID from intel (if available) @@ -168,15 +172,7 @@ async def create_graph_build_agent( if intel.nvd and intel.nvd.cwe_id: cwe_id = intel.nvd.cwe_id logger.info("build_agent: CWE ID from intel: %s", cwe_id) - descriptions: list[tuple[str, str]] = [] - if intel.ghsa: - cve_text = intel.ghsa.description or intel.ghsa.summary or "" - if cve_text: - descriptions.append(("ghsa", cve_text)) - if intel.ubuntu and intel.ubuntu.description: - descriptions.append(("ubuntu", intel.ubuntu.description)) - - cve_description = "\n".join(f"[{src}] {txt}" for src, txt in descriptions) + _tiktoken_enc = tiktoken.get_encoding("cl100k_base") investigation_stack: list[L2InvestigationPhase] = [] @@ -216,13 +212,11 @@ async def build_runtime_prompt(harvest_report: BuildHarvestReport) -> str: sys_prompt=L2_CONFIG_SYS_PROMPT, vuln_id=vuln_id, target_package=target_package.name, - cve_description=cve_description, - l1_affected_files=l1_affected_files, + vulnerability_intel=vulnerability_intel_str, l1_preliminary_verdict=l1_preliminary_verdict, disabled_features=harvest_report.disabled_features, spec_disabled_features=harvest_report.spec_disabled_features, tools=tools_str, - tool_selection_strategy=tool_strategy, tool_instructions=L2_CONFIG_THOUGHT_INSTRUCTIONS, ) return runtime_prompt @@ -261,17 +255,18 @@ async def data_harvest_node(state: BuildAgentState) -> dict: ) runtime_prompt = await build_runtime_prompt(harvest_report) - + + affected_files_count = len(vulnerability_intel.affected_files) if vulnerability_intel else 0 span.set_output({ "disabled_features_count": len(harvest_report.disabled_features), "spec_disabled_features_count": len(harvest_report.spec_disabled_features), "expected_hardening_count": len(harvest_report.expected_hardening), - "l1_affected_files_count": len(l1_affected_files), + "vulnerability_intel_files_count": affected_files_count, }) return { "harvest_report": harvest_report, - "l1_affected_files": l1_affected_files, + "vulnerability_intel_str": vulnerability_intel_str, "l1_preliminary_verdict": l1_preliminary_verdict, "runtime_prompt": runtime_prompt, "messages": [AIMessage(content="Build data harvested, beginning analysis.")], @@ -348,7 +343,7 @@ async def observation_node(state: BuildAgentState) -> dict: comp_prompt = L2_COMPREHENSION_PROMPT.format( vuln_id=vuln_id, target_package=target_package_name, - l1_affected_files=", ".join(l1_affected_files) if l1_affected_files else "None", + vulnerability_intel=vulnerability_intel_str, disabled_features=", ".join(harvest_report.disabled_features) if harvest_report.disabled_features else "None", spec_disabled_features=", ".join(harvest_report.spec_disabled_features) if harvest_report.spec_disabled_features else "None", tool_used=tool_used, @@ -492,18 +487,44 @@ async def investigation_phase_node(state: BuildAgentState) -> dict: "reasoning": verdict.reasoning, }) if verdict.compilation_status == "not_compiled": - return {"L2CompileVerdict": verdict,} + return {"L2CompileVerdict": verdict} else: - # next state is the hardening phase - preprocess_data = state.get("harvest_report") or BuildHarvestReport() - runtime_prompt = await build_runtime_prompt(preprocess_data) - messages = state["messages"] - prune_messages = [] - for msg in messages: - prune_messages.append(RemoveMessage(id=msg.id)) - span.set_output({ - "runtime_prompt": runtime_prompt,}) - return { + # Check for architecture mismatch before proceeding to hardening + harvest_report = state.get("harvest_report") + l1_arch = vulnerability_intel.affected_architectures if vulnerability_intel else "both" + build_arch = harvest_report.build_architecture if harvest_report else "unknown" + + if l1_arch != "both" and build_arch != "unknown" and l1_arch != build_arch: + logger.info( + "investigation_phase_node: Architecture mismatch - CVE affects %s, build is %s. Skipping hardening.", + l1_arch, build_arch + ) + # Override verdict with architecture mismatch reasoning + arch_verdict = L2CompileVerdictExtraction( + compilation_status="compiled", + confidence=1.0, + reasoning=f"Architecture mismatch: CVE affects {l1_arch} only, build is {build_arch}. Vulnerability cannot occur on this architecture." + ) + span.set_output({ + "architecture_mismatch": True, + "l1_arch": l1_arch, + "build_arch": build_arch, + }) + # Clear investigation stack to skip hardening phase + investigation_stack.clear() + return {"L2CompileVerdict": arch_verdict} + + # Normal path: proceed to hardening phase + preprocess_data = state.get("harvest_report") or BuildHarvestReport() + runtime_prompt = await build_runtime_prompt(preprocess_data) + messages = state["messages"] + prune_messages = [] + for msg in messages: + prune_messages.append(RemoveMessage(id=msg.id)) + span.set_output({ + "runtime_prompt": runtime_prompt, + }) + return { "runtime_prompt": runtime_prompt, "thought": None, "observation": None, @@ -582,9 +603,13 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: } with tracer.push_active_function("l2_build_agent_graph", input_data=initial_state["messages"][0].content): + # Each phase: (max_iterations * 3 react nodes) + data_harvest/forced_finish/investigation_phase + # Two phases (CONFIG + HARDENING) when code is compiled + steps_per_phase = (config.max_iterations * 3) + 4 + recursion_limit = steps_per_phase * 2 + 5 # buffer for edge cases result = await build_agent_graph.ainvoke( initial_state, - config={"recursion_limit": config.max_iterations * 4}, + config={"recursion_limit": recursion_limit}, ) logger.info("build_agent: L2 investigation finished") @@ -597,6 +622,10 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: if compile_verdict.compilation_status == "not_compiled": hardening_relevant = False l2_override_verdict = "not_vulnerable" + elif hardening_verdict is None: + # Architecture mismatch case: compiled but hardening was skipped + hardening_relevant = False + l2_override_verdict = "not_vulnerable" else: hardening_relevant = True hardening_reason = hardening_verdict.reasoning diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index f49b40789..80765f602 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -142,12 +142,21 @@ def _apply_l2_verdict( updated_fields = {} if l2_result.l2_override_verdict == "not_vulnerable": - if l2_result.compilation_status == "not_compiled": + evidence = l2_result.compilation_evidence or "" + if "Architecture mismatch" in evidence: + # Architecture-based not affected - vulnerability cannot occur on this platform + updated_fields["justification_label"] = "requires_environment" + updated_fields["executive_summary"] = ( + f"{report.executive_summary}\n\n" + f"**L2 Override:** {evidence} " + f"Vulnerability condition cannot occur on this architecture." + ) + elif l2_result.compilation_status == "not_compiled": updated_fields["justification_label"] = "code_not_present" updated_fields["executive_summary"] = ( f"{report.executive_summary}\n\n" f"**L2 Override:** Vulnerable code is NOT compiled into the binary. " - f"Evidence: {l2_result.compilation_evidence or 'Build analysis confirmed exclusion.'}" + f"Evidence: {evidence or 'Build analysis confirmed exclusion.'}" ) else: updated_fields["justification_label"] = "code_not_reachable" diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 6eaf19692..e8e4c5df5 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -25,7 +25,10 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id -from exploit_iq_commons.data_models.checker_status import L1InvestigationResult +from exploit_iq_commons.data_models.checker_status import ( + L1InvestigationResult, + format_vulnerability_intel_for_prompt, +) from langgraph.graph import StateGraph, START, END from langgraph.prebuilt import ToolNode @@ -55,7 +58,6 @@ VulnerabilityIntel, VULNERABILITY_INTEL_EXTRACTION_PROMPT, format_patch_data_for_intel, - format_vulnerability_intel_for_prompt, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType, BrewDownloaderError @@ -560,17 +562,6 @@ async def forced_finish_node(state: CodeAgentState) -> dict: span.set_output({"error": str(e), "exception_type": type(e).__name__, "step": step_num}) raise - def _extract_patterns_from_intel(state: CodeAgentState) -> tuple[str, str]: - """Extract vulnerable and fix patterns from VulnerabilityIntel in state.""" - vulnerability_intel = state.get("vulnerability_intel") - - if not vulnerability_intel: - return "No patterns available", "No patterns available" - - vulnerable_patterns = "\n".join(vulnerability_intel.vulnerable_patterns) if vulnerability_intel.vulnerable_patterns else "No vulnerable code patterns identified" - fix_patterns = "\n".join(vulnerability_intel.fix_patterns) if vulnerability_intel.fix_patterns else "No fix code patterns identified" - return vulnerable_patterns, fix_patterns - async def observation_node(state: CodeAgentState) -> dict: """Process tool output: comprehension -> memory update with VulnerabilityIntel context.""" logger.info("observation_node: starting") @@ -585,7 +576,8 @@ async def observation_node(state: CodeAgentState) -> dict: tool_input_detail = last_thought.actions.query previous_memory = state.get("observation").memory if state.get("observation") else ["No data gathered yet."] - vulnerable_patterns, fix_patterns = _extract_patterns_from_intel(state) + vulnerability_intel = state.get("vulnerability_intel") + intel_formatted = format_vulnerability_intel_for_prompt(vulnerability_intel) if vulnerability_intel else "No intel available" target_package_name = target_package.name if target_package else "unknown" with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: @@ -595,8 +587,7 @@ async def observation_node(state: CodeAgentState) -> dict: comp_prompt = L1_COMPREHENSION_PROMPT.format( vuln_id=vuln_id, target_package=target_package_name, - vulnerable_patterns=vulnerable_patterns, - fix_patterns=fix_patterns, + vulnerability_intel=intel_formatted, tool_used=tool_used, tool_input=tool_input_detail, last_thought=last_thought_text, @@ -686,29 +677,6 @@ async def should_continue(state: CodeAgentState) -> str: return app -def _extract_affected_files(result: dict) -> list[str]: - """Extract affected files from downstream and upstream reports.""" - affected = set() - downstream: DownstreamSearchReport | None = result.get("downstream_report") - upstream: UpstreamSearchReport | None = result.get("upstream_report") - - if downstream and downstream.parsed_patch: - for pf in downstream.parsed_patch.files: - if pf.is_new_file: - affected.add(pf.target_path) - else: - affected.add(pf.source_path) - - if upstream and upstream.fixed_parsed_patch: - for pf in upstream.fixed_parsed_patch.files: - if pf.is_new_file: - affected.add(pf.target_path) - else: - affected.add(pf.source_path) - - return sorted(affected) - - @register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder): @@ -757,17 +725,17 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: tracer=tracer, ) preliminary_verdict = verdict_extraction.preliminary_verdict - affected_files = _extract_affected_files(result) confidence = verdict_extraction.confidence downstream_report: DownstreamSearchReport | None = result.get("downstream_report") upstream_report: UpstreamSearchReport | None = result.get("upstream_report") + vulnerability_intel: VulnerabilityIntel | None = result.get("vulnerability_intel") l1_result = L1InvestigationResult( downstream_report=downstream_report.model_dump() if downstream_report else None, upstream_report=upstream_report.model_dump() if upstream_report else None, l1_agent_answer=final_answer, - affected_files=affected_files, + vulnerability_intel=vulnerability_intel, preliminary_verdict=preliminary_verdict, confidence=confidence, ) @@ -778,7 +746,7 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: ) as span: span.set_output({ "l1_agent_answer": final_answer[:500] if final_answer else None, - "affected_files": affected_files, + "vulnerability_intel": vulnerability_intel, "confidence": l1_result.confidence, }) @@ -787,10 +755,9 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: else: logger.warning("package_code_agent: checker_context is None, cannot store l1_result") logger.info( - "package_code_agent: L1 result - verdict=%s, confidence=%.2f, affected_files=%d", + "package_code_agent: L1 result - verdict=%s, confidence=%.2f", preliminary_verdict, l1_result.confidence, - len(affected_files), ) return message From 38f88fdc6f7a86b00de7b88d61058b3f233d30ec Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 10 May 2026 06:10:13 +0000 Subject: [PATCH 35/46] filter binary file --- .../functions/code_agent_graph_defs.py | 112 +----------------- 1 file changed, 1 insertion(+), 111 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 76d92bc6d..ca795a2ff 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -1013,7 +1013,7 @@ def _extract_spec_changelog(inspector, spec_path: Path) -> str | None: _BINARY_FILE_EXTENSIONS = frozenset({ - '.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2', + '.uu','.uue','.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.exe', '.dll', '.so', '.dylib', '.a', '.o', '.obj', @@ -1860,113 +1860,3 @@ async def upstream_search_preprocess( {{"thought": "KNOWLEDGE shows: zisofs at iso9660.c, pz_log2_bs has no bounds check. Evidence sufficient.", "mode": "finish", "actions": null, "final_answer": "The package is LIKELY VULNERABLE. Found zisofs handling at iso9660.c but no bounds checking on pz_log2_bs variable. The CVE describes missing validation on block size which matches the observed code."}} """ - -# --------------------------------------------------------------------------- -# L1 Agent Helper Functions -# --------------------------------------------------------------------------- - -def format_patch_files_summary(parsed_patch: ParsedPatch | None) -> str: - """Generate a concise summary of files modified by the patch. - - Returns a bullet list showing each file with hunk count and line changes. - - Example output: - - archive_read_support_format_zip.c (3 hunks, +15/-8 lines) - - archive.h (1 hunk, +2/-0 lines, new file) - """ - if not parsed_patch or not parsed_patch.files: - return "No files in patch." - - lines = [] - for pf in parsed_patch.files: - # Extract clean filename from path (strip a/ or b/ prefix) - filename = pf.target_path.lstrip("ab/") - - # Count total added/removed lines across all hunks - added = sum(len(h.added_lines) for h in pf.hunks) - removed = sum(len(h.removed_lines) for h in pf.hunks) - hunk_count = len(pf.hunks) - - # Build descriptor - hunk_word = "hunk" if hunk_count == 1 else "hunks" - desc = f"- {filename} ({hunk_count} {hunk_word}, +{added}/-{removed} lines" - - if pf.is_new_file: - desc += ", new file" - elif pf.is_deleted_file: - desc += ", deleted" - desc += ")" - - lines.append(desc) - - return "\n".join(lines) - - -def format_patch_hunks_summary( - parsed_patch: ParsedPatch | None, - max_hunks: int = 5, - max_lines_per_hunk: int = 5, -) -> str: - """Extract key code changes from patch hunks for LLM context. - - Shows the vulnerable code (removed lines) and fix code (added lines) - in a searchable format. Prioritizes hunks with actual code changes - over pure additions/deletions. - - Args: - parsed_patch: The parsed patch structure - max_hunks: Maximum number of hunks to include (default 5) - max_lines_per_hunk: Max lines to show per removed/added section - - Returns: - Formatted string showing file, line numbers, and code changes. - """ - if not parsed_patch or not parsed_patch.files: - return "No patch content available." - - output_lines: list[str] = [] - hunk_count = 0 - - for pf in parsed_patch.files: - filename = pf.target_path.lstrip("ab/") - - for hunk in pf.hunks: - if hunk_count >= max_hunks: - remaining = sum(len(f.hunks) for f in parsed_patch.files) - hunk_count - if remaining > 0: - output_lines.append(f"\n... and {remaining} more hunks") - return "\n".join(output_lines) - - # Skip hunks with no actual changes (just context) - if not hunk.removed_lines and not hunk.added_lines: - continue - - output_lines.append(f"\nFile: {filename} (line {hunk.source_start})") - - # Show removed lines (vulnerable code) - if hunk.removed_lines: - output_lines.append(" VULNERABLE (removed):") - for line in hunk.removed_lines[:max_lines_per_hunk]: - cleaned = line.strip() - if cleaned: - output_lines.append(f" {cleaned}") - if len(hunk.removed_lines) > max_lines_per_hunk: - output_lines.append( - f" ... (+{len(hunk.removed_lines) - max_lines_per_hunk} more lines)" - ) - - # Show added lines (fix code) - if hunk.added_lines: - output_lines.append(" FIX (added):") - for line in hunk.added_lines[:max_lines_per_hunk]: - cleaned = line.strip() - if cleaned: - output_lines.append(f" {cleaned}") - if len(hunk.added_lines) > max_lines_per_hunk: - output_lines.append( - f" ... (+{len(hunk.added_lines) - max_lines_per_hunk} more lines)" - ) - - hunk_count += 1 - - return "\n".join(output_lines) if output_lines else "No code changes in patch." \ No newline at end of file From f1d1fac8977f56663e8f4c7efcf263f911348246 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 10 May 2026 06:22:12 +0000 Subject: [PATCH 36/46] fix status of report --- src/vuln_analysis/functions/cve_checker_report.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 80765f602..83f7a2334 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -60,6 +60,7 @@ "code_not_reachable": "FALSE", "protected_by_mitigating_control": "FALSE", "protected_by_compiler": "FALSE", + "requires_environment": "FALSE", "vulnerable": "TRUE", "uncertain": "UNKNOWN", } From 45b9ef853019b238c5d63e2cb349e966654c0d1b Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 10 May 2026 06:55:37 +0000 Subject: [PATCH 37/46] check for package name in cve matches package target --- .../data_models/checker_status.py | 3 +++ src/vuln_analysis/register.py | 10 ++++++++-- src/vuln_analysis/utils/package_identifier.py | 20 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 19b74f53c..1221978e5 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -26,6 +26,7 @@ class PackageCheckerStatus(IntEnum): ERROR_PKG_IDENT_NO_INTEL = 1 PKG_IDENT_NOT_VUL = 2 ERROR_FAILED_TO_DOWNLOAD_SRPM = 3 + PKG_IDENT_CVE_MISMATCH = 4 PACKAGE_CHECKER_STATUS_DESCRIPTIONS: dict[PackageCheckerStatus, str] = { @@ -37,6 +38,8 @@ class PackageCheckerStatus(IntEnum): "Identification state concluded from intel that target package is not vulnerable", PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM: "Failed to download the patched SRPM", + PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH: + "CVE does not apply to target package - RHSA does not list this package", } class EnumIdentifyResult(str, Enum): diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 6ee21f603..d7412d7ae 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -344,9 +344,15 @@ async def checker_early_exit_node(state: AgentMorpheusEngineInput) -> AgentMorph checklist=[], summary=reason, justification=JustificationOutput( - label="not_vulnerable" if status == PackageCheckerStatus.PKG_IDENT_NOT_VUL else "error", + label="not_vulnerable" if status in ( + PackageCheckerStatus.PKG_IDENT_NOT_VUL, + PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH, + ) else "error", reason=reason, - status="FALSE" if status == PackageCheckerStatus.PKG_IDENT_NOT_VUL else "UNKNOWN", + status="FALSE" if status in ( + PackageCheckerStatus.PKG_IDENT_NOT_VUL, + PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH, + ) else "UNKNOWN", ), intel_score=0, cvss=None, diff --git a/src/vuln_analysis/utils/package_identifier.py b/src/vuln_analysis/utils/package_identifier.py index 0b1f328a3..6c3b8c6a5 100644 --- a/src/vuln_analysis/utils/package_identifier.py +++ b/src/vuln_analysis/utils/package_identifier.py @@ -67,6 +67,11 @@ def identify(self, intel: CveIntel | None) -> tuple[PackageCheckerStatus, Packag status = PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL return status, package_identify + if not self._is_cve_for_target_package(intel): + status = PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH + package_identify.is_target_package_affected = EnumIdentifyResult.NO + return status, package_identify + package_identify.is_target_package_affected = self._is_target_package_affected(intel,package_identify) package_identify.is_target_package_fixed = self._is_target_package_fixed(intel,package_identify) @@ -93,6 +98,21 @@ def _find_and_locate_rpm(self, intel: CveIntel) -> list[str]: names.append(name) return names + def _is_cve_for_target_package(self, intel: CveIntel) -> bool: + """Validate CVE applies to target package via RHSA package_state. + + Returns True if validation passes or cannot be performed. + Returns False only if RHSA explicitly lists packages and target is NOT among them. + """ + if not intel.rhsa or not intel.rhsa.package_state: + return True # No RHSA data to validate against + + target_name = self._target_package.name + for ps in intel.rhsa.package_state: + if ps.package_name and package_names_match(target_name, ps.package_name): + return True + return False # RHSA has packages but target not found + def _is_target_package_affected( self, intel: CveIntel, package_identify: PackageIdentifyResult, ) -> EnumIdentifyResult: From ecff8160772381cabb17becb615622bb25fdefd7 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 10 May 2026 07:39:30 +0000 Subject: [PATCH 38/46] add Intel calculation of score and reports --- .../data_models/checker_status.py | 3 +++ .../functions/cve_checker_report.py | 11 +++++---- .../functions/cve_source_acquisition.py | 22 +++++++++++++---- src/vuln_analysis/register.py | 24 +++++++++++++------ 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 1221978e5..b30d895ce 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -27,6 +27,7 @@ class PackageCheckerStatus(IntEnum): PKG_IDENT_NOT_VUL = 2 ERROR_FAILED_TO_DOWNLOAD_SRPM = 3 PKG_IDENT_CVE_MISMATCH = 4 + PKG_INTEL_LOW_SCORE = 5 PACKAGE_CHECKER_STATUS_DESCRIPTIONS: dict[PackageCheckerStatus, str] = { @@ -40,6 +41,8 @@ class PackageCheckerStatus(IntEnum): "Failed to download the patched SRPM", PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH: "CVE does not apply to target package - RHSA does not list this package", + PackageCheckerStatus.PKG_INTEL_LOW_SCORE: + "Intel quality score below threshold - insufficient information for reliable analysis", } class EnumIdentifyResult(str, Enum): diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 83f7a2334..6720b8f54 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -200,6 +200,7 @@ def _build_analysis( message: AgentMorpheusEngineInput, code_agent_report: CodeAgentReport, l1_result: L1InvestigationResult, + intel_score: int, ) -> list[AgentMorpheusEngineOutput]: """Build the final analysis output from the code agent report.""" label = code_agent_report.justification_label @@ -239,7 +240,7 @@ def _build_analysis( reason=reason, status=status, ), - intel_score=0, + intel_score=intel_score, cvss=None, ) for intel in (message.info.intel if message.info and message.info.intel else []) @@ -277,10 +278,10 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: AgentMorpheusEngineOutput( vuln_id=intel.vuln_id, checklist=[], - summary="L1 investigation did not produce results.", + summary="Rpm scanning investigation did not produce results.", justification=JustificationOutput( label="uncertain", - reason="L1 result not available for report generation.", + reason="Rpm scanning investigation did not produce results.", status="UNKNOWN", ), intel_score=0, @@ -380,12 +381,12 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: "affected_files": code_agent_report.affected_files, "recommendation": code_agent_report.recommendation, }) - + intel_score = intel[0].intel_score return AgentMorpheusOutput( input=message.input, info=message.info, output=OutputPayload( - analysis=_build_analysis(message, code_agent_report, l1_result), + analysis=_build_analysis(message, code_agent_report, l1_result, intel_score), vex=None, ), ) diff --git a/src/vuln_analysis/functions/cve_source_acquisition.py b/src/vuln_analysis/functions/cve_source_acquisition.py index 60d193347..c4cc42cf6 100644 --- a/src/vuln_analysis/functions/cve_source_acquisition.py +++ b/src/vuln_analysis/functions/cve_source_acquisition.py @@ -32,6 +32,7 @@ from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager, SourceRPMDownloader from vuln_analysis.utils.package_identifier import PackageIdentifier from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewProfileType , BrewDownloaderError +from vuln_analysis.functions.cve_calculate_intel_score import CVECalculateIntelScoreConfig logger = LoggingFactory.get_agent_logger(__name__) @@ -82,11 +83,24 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: status = PackageCheckerStatus.OK per_vuln_results: dict[str, PackageIdentifyResult] = {} + + intel_score_config = builder.get_function_config("cve_calculate_intel_score") + assert isinstance(intel_score_config, CVECalculateIntelScoreConfig) + for vuln_info in vulns: - intel = intel_by_vuln.get(vuln_info.vuln_id) - status,result = identifier.identify( intel) - message.info.checker_context.identify_result = result - break + intel = intel_by_vuln.get(vuln_info.vuln_id) + + if intel_score_config.generate_intel_score and intel: + score = intel.get_intel_score() + if score < intel_score_config.intel_low_score and not intel_score_config.insist_analysis: + logger.info("Intel score %d below threshold %d for %s - skipping", + score, intel_score_config.intel_low_score, vuln_info.vuln_id) + status = PackageCheckerStatus.PKG_INTEL_LOW_SCORE + break + + status, result = identifier.identify(intel) + message.info.checker_context.identify_result = result + break message.info.checker_context.status = status diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index d7412d7ae..f7cfb2534 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -244,7 +244,12 @@ async def checker_init_state_node(state: AgentMorpheusInput) -> AgentMorpheusEng async def checker_fetch_intel_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: """Fetch intel for CVE input (package checker path). Reuses the same fetch_intel function.""" return await cve_fetch_intel_fn.ainvoke(state.model_dump()) - + + @catch_pipeline_errors_async + async def checker_calculate_intel_score_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + """Calculate intel score for CVE input (package checker path).""" + return await cve_calculate_intel_score_fn.ainvoke(state.model_dump()) + async def check_vdbs_success(state: AgentMorpheusInput) -> str: """Checks if the VDBs were successfully generated""" if state.code_index_success: @@ -338,16 +343,20 @@ async def checker_early_exit_node(state: AgentMorpheusEngineInput) -> AgentMorph else f"Checker exited early with status {status}" ) logger.info("checker_early_exit: status=%s reason=%s", status, reason) + def _get_justification_label(s: PackageCheckerStatus | None) -> str: + if s in (PackageCheckerStatus.PKG_IDENT_NOT_VUL, PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH): + return "not_vulnerable" + if s == PackageCheckerStatus.PKG_INTEL_LOW_SCORE: + return "poor_quality_intel" + return "error" + analysis = [ AgentMorpheusEngineOutput( vuln_id=v.vuln_id, checklist=[], summary=reason, justification=JustificationOutput( - label="not_vulnerable" if status in ( - PackageCheckerStatus.PKG_IDENT_NOT_VUL, - PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH, - ) else "error", + label=_get_justification_label(status), reason=reason, status="FALSE" if status in ( PackageCheckerStatus.PKG_IDENT_NOT_VUL, @@ -422,7 +431,7 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): # -- Package checker nodes -- graph_builder.add_node("checker_init_state", checker_init_state_node) graph_builder.add_node("checker_fetch_intel", checker_fetch_intel_node) - + graph_builder.add_node("checker_calculate_intel_score", checker_calculate_intel_score_node) graph_builder.add_node("source_acquisition", source_acquisition_node) graph_builder.add_node("checker_early_exit", checker_early_exit_node) graph_builder.add_node("checker_segmentation", checker_segmentation_node) @@ -452,7 +461,8 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): # Package checker path graph_builder.add_edge("checker_init_state", "checker_fetch_intel") - graph_builder.add_edge("checker_fetch_intel", "source_acquisition") + graph_builder.add_edge("checker_fetch_intel", "checker_calculate_intel_score") + graph_builder.add_edge("checker_calculate_intel_score", "source_acquisition") graph_builder.add_conditional_edges( "source_acquisition", From d7c77a3163bd4c7c470da589ad28efb6cf349336 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Sun, 10 May 2026 14:03:38 +0000 Subject: [PATCH 39/46] clear summary report --- .../functions/code_agent_graph_defs.py | 74 +++++- .../functions/cve_checker_report.py | 233 +++++++++++++++--- 2 files changed, 269 insertions(+), 38 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index ca795a2ff..ce331f9af 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) -from exploit_iq_commons.data_models.checker_status import VulnerabilityIntel +from exploit_iq_commons.data_models.checker_status import L2BuildResult, VulnerabilityIntel from vuln_analysis.functions.react_internals import CheckerThought, Observation, L1VerdictExtraction # --------------------------------------------------------------------------- # Graph state @@ -390,9 +390,10 @@ def format_patch_data_for_intel( CODE_AGENT_REPORT_PROMPT = """\ -You are a security analyst generating the final L1 Code Agent investigation report. -Synthesize the results from the downstream search, upstream search, and L1 agent analysis -into a comprehensive, auditable report with a clear justification and supporting evidence. +You are a security analyst generating the final Code Agent investigation report. +Synthesize the results from the downstream search, upstream search, L1 agent analysis, +and optionally L2 build analysis into a comprehensive, auditable report with a clear +justification and supporting evidence. @@ -413,16 +414,28 @@ def format_patch_data_for_intel( {l1_agent_section} +{l2_context_section} Generate a structured report following these requirements: 1. JUSTIFICATION LABEL (select the most appropriate): - code_not_present: Vulnerable code/function is absent from this package version + - code_not_reachable: Code exists but cannot be reached/executed in this context - protected_by_mitigating_control: Downstream patch or backport mitigates the vulnerability + - protected_by_compiler: Compiler hardening flags mitigate the vulnerability + - requires_environment: Vulnerability requires specific environment conditions not present - vulnerable: Package is actually vulnerable and needs patching - uncertain: Insufficient information to determine exploitability - PRECEDENCE RULES: + PRECEDENCE RULES (L2 overrides L1 when L2_BUILD_CONTEXT is present): + - L2 analyzes actual compiled binaries; treat L2 findings as ground truth for exploitability. + - If L2 verdict is "not_vulnerable" with architecture mismatch → use "requires_environment" + - If L2 verdict is "not_vulnerable" with compilation_status="not_compiled" → use "code_not_present" + - If L2 verdict is "vulnerable_mitigated" with hardening flags → use "protected_by_compiler" + - Do NOT state "vulnerable" if L2 evidence contradicts it. Instead explain: + "While source contains vulnerable patterns, the build is not affected due to [L2 reason]." + + PRECEDENCE RULES (when no L2 context): - If a CVE-specific patch file exists AND is applied in build, use "protected_by_mitigating_control". - If L1 agent found the fix code in source, use "protected_by_mitigating_control". - If L1 agent found vulnerable code pattern still present, use "vulnerable". @@ -633,6 +646,44 @@ def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str: return "\n".join(lines) +def _format_l2_for_report(l2_result: L2BuildResult | None) -> str: + """Format L2 Build Agent results for prompt injection. + + When L2 results are present and contain an override verdict, this produces + a structured context block that instructs the LLM to treat L2 findings as + ground truth for exploitability (since L2 analyzes actual compiled binaries). + + Returns an empty string if L2 results are None or have no override verdict. + """ + if l2_result is None or l2_result.l2_override_verdict is None: + return "" + + lines = [ + "", + f"**L2 Verdict:** {l2_result.l2_override_verdict}", + f"**Compilation Status:** {l2_result.compilation_status}", + ] + + if l2_result.compilation_evidence: + lines.append(f"**Evidence:** {l2_result.compilation_evidence}") + + if l2_result.hardening_flags: + flags_str = ", ".join(l2_result.hardening_flags[:10]) + if len(l2_result.hardening_flags) > 10: + flags_str += f" (+{len(l2_result.hardening_flags) - 10} more)" + lines.append(f"**Hardening Flags:** {flags_str}") + + if l2_result.hardening_rationale: + lines.append(f"**Hardening Rationale:** {l2_result.hardening_rationale}") + + if l2_result.hardening_relevant is not None: + lines.append(f"**Hardening Relevant to CVE:** {l2_result.hardening_relevant}") + + lines.append("") + + return "\n".join(lines) + + # --------------------------------------------------------------------------- # Report generation pipeline # --------------------------------------------------------------------------- @@ -848,11 +899,13 @@ async def generate_code_agent_report( l1_agent_answer: str | None, tracer, policy_context: str = "", + l2_result: L2BuildResult | None = None, ) -> CodeAgentReport: """Generate the final L1 Code Agent investigation report. - Synthesizes results from downstream search, upstream search, and L1 agent analysis - into a comprehensive, auditable report with a clear verdict. + Synthesizes results from downstream search, upstream search, L1 agent analysis, + and optionally L2 build analysis into a comprehensive, auditable report with + a clear verdict. Parameters ---------- @@ -874,6 +927,9 @@ async def generate_code_agent_report( Request-scoped tracing context. policy_context: Pre-formatted NVR posture and RHSA excerpt context for the LLM prompt. + l2_result: + Output of L2 build analysis (may be None). When present, L2 verdicts + override L1 findings as L2 analyzes actual compiled binaries. Returns ------- @@ -887,6 +943,7 @@ async def generate_code_agent_report( downstream_section = _format_downstream_for_report(downstream_report) upstream_section = _format_upstream_for_report(upstream_report) l1_agent_section = l1_agent_answer or "L1 agent did not produce a final answer." + l2_context_section = _format_l2_for_report(l2_result) if policy_context: policy_context_section = ( @@ -905,10 +962,12 @@ async def generate_code_agent_report( downstream_section=downstream_section, upstream_section=upstream_section, l1_agent_section=l1_agent_section, + l2_context_section=l2_context_section, ) report_llm = llm.with_structured_output(CodeAgentReport) + has_l2_override = l2_result is not None and l2_result.l2_override_verdict is not None with tracer.push_active_function( "generate_report", input_data={ @@ -917,6 +976,7 @@ async def generate_code_agent_report( "has_downstream_patch": downstream_report.is_patch_file_available if downstream_report else False, "has_upstream_patch": upstream_report.is_fixed_srpm_is_needed if upstream_report else False, "has_l1_answer": l1_agent_answer is not None, + "has_l2_override": has_l2_override, }, ) as span: messages = [ diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 6720b8f54..8e434777c 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -21,6 +21,8 @@ checker_context and produces the final AgentMorpheusOutput. """ +import warnings +from dataclasses import dataclass from pathlib import Path from typing import Literal @@ -45,6 +47,7 @@ ) from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentReport, + CodeSnippet, DownstreamSearchReport, UpstreamSearchReport, generate_code_agent_report, @@ -71,6 +74,123 @@ _POLICY_MAX_PACKAGE_STATE_ITEMS = 8 +@dataclass +class ReportBlocks: + """Formatted report blocks - each piece of data formatted once for UI output.""" + + # Package info + package_name: str + package_version: str + package_release: str + package_arch: str + + # CVE info + cve_id: str + cve_description: str + + # Verdict + justification_label: str + executive_summary: str + recommendation: str + + # Evidence + evidence_chain: list[str] + affected_files: list[str] + + # Extracted facts from downstream search + patch_file_name: str + spec_patch_directives: list[str] + build_log_evidence: str + + # Code snippets + vulnerable_snippets: list[CodeSnippet] + fix_snippets: list[CodeSnippet] + + @property + def package_header_md(self) -> str: + """Format package metadata as Markdown header.""" + version_release = f"{self.package_version}-{self.package_release}" if self.package_release else self.package_version + return f"**Package:** `{self.package_name}-{version_release}` ({self.package_arch})" + + @property + def evidence_chain_md(self) -> str: + """Format combined evidence chain with extracted facts as Markdown.""" + lines: list[str] = ["## Evidence Chain"] + + # Add narrative evidence chain + for ev in self.evidence_chain[:5]: + lines.append(f"- {ev}") + + # Add extracted facts if available + if self.spec_patch_directives: + for directive in self.spec_patch_directives[:2]: + lines.append(f"- Spec patch directive: `{directive}`") + + if self.build_log_evidence: + first_line = self.build_log_evidence.split('\n')[0][:100] + lines.append(f"- Build log: `{first_line}`") + + return "\n".join(lines) + + @property + def affected_files_md(self) -> str: + """Format affected files as Markdown list.""" + if not self.affected_files: + return "" + lines = ["**Affected Files:**"] + for f in self.affected_files[:10]: + lines.append(f"- `{f}`") + return "\n".join(lines) + + @property + def recommendation_md(self) -> str: + """Format recommendation as Markdown.""" + return f"## Recommendation\n{self.recommendation}" + + +def _build_report_blocks( + message: AgentMorpheusEngineInput, + code_agent_report: CodeAgentReport, + cve_description: str, + downstream_report: DownstreamSearchReport | None, +) -> ReportBlocks: + """Extract and format all report data into blocks.""" + target_package = message.input.image.target_package + + # Extract code snippets by type + vulnerable_snippets = [s for s in code_agent_report.code_snippets if s.snippet_type == "vulnerable"] + fix_snippets = [s for s in code_agent_report.code_snippets if s.snippet_type == "fix"] + + # Extract facts from downstream report + patch_file_name = "" + spec_patch_directives: list[str] = [] + build_log_evidence = "" + + if downstream_report: + patch_file_name = downstream_report.patch_file_name or "" + spec_patch_directives = downstream_report.spec_patch_directives_for_cve or [] + build_log_evidence = downstream_report.build_log_patch_applied or "" + + return ReportBlocks( + package_name=target_package.name if target_package else "unknown", + package_version=target_package.version or "" if target_package else "", + package_release=target_package.release or "" if target_package else "", + package_arch=target_package.arch or "x86_64" if target_package else "x86_64", + cve_id=message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "", + cve_description=cve_description, + justification_label=code_agent_report.justification_label, + executive_summary=code_agent_report.executive_summary, + recommendation=code_agent_report.recommendation, + evidence_chain=list(code_agent_report.evidence_chain), + affected_files=list(code_agent_report.affected_files), + patch_file_name=patch_file_name, + spec_patch_directives=spec_patch_directives, + build_log_evidence=build_log_evidence, + vulnerable_snippets=vulnerable_snippets, + fix_snippets=fix_snippets, + ) + + def _format_policy_context_for_report( *, target_nvr: str, @@ -136,7 +256,20 @@ def _apply_l2_verdict( report: CodeAgentReport, l2_result: L2BuildResult, ) -> CodeAgentReport: - """Apply L2 Build Agent verdict overrides to the CodeAgentReport.""" + """Apply L2 Build Agent verdict overrides to the CodeAgentReport. + + .. deprecated:: + This function is deprecated. L2 results are now passed directly to + `generate_code_agent_report()` so the LLM can synthesize L1 and L2 + findings into a cohesive narrative. This function will be removed + in a future release. + """ + warnings.warn( + "_apply_l2_verdict is deprecated. L2 results are now integrated " + "directly into the LLM prompt via generate_code_agent_report().", + DeprecationWarning, + stacklevel=2, + ) if l2_result.l2_override_verdict is None: return report @@ -199,41 +332,71 @@ def _apply_l2_verdict( def _build_analysis( message: AgentMorpheusEngineInput, code_agent_report: CodeAgentReport, - l1_result: L1InvestigationResult, intel_score: int, + cve_description: str = "", + downstream_report: DownstreamSearchReport | None = None, ) -> list[AgentMorpheusEngineOutput]: - """Build the final analysis output from the code agent report.""" - label = code_agent_report.justification_label + """Build the final analysis output from the code agent report using ReportBlocks. + + Output structure (no duplication): + - summary: Package header + executive summary (brief) + - reason: Evidence chain (with extracted facts) + Recommendation (details) + - checklist: CVE desc, Affected files, Vulnerable code, Fix code (expandable items) + """ + # Build report blocks from inputs + blocks = _build_report_blocks(message, code_agent_report, cve_description, downstream_report) + + label = blocks.justification_label status: _StatusLiteral = _JUSTIFICATION_LABEL_TO_STATUS.get(label, "UNKNOWN") - reason_parts = [code_agent_report.executive_summary] - if code_agent_report.evidence_chain: - reason_parts.append("\n\nEvidence chain:") - reason_parts.extend(f"- {ev}" for ev in code_agent_report.evidence_chain[:5]) - if code_agent_report.patch_analysis: - reason_parts.append(f"\n\nPatch analysis: {code_agent_report.patch_analysis}") - if code_agent_report.code_snippets: - reason_parts.append("\n\nCode snippets:") - for snippet in code_agent_report.code_snippets[:3]: - reason_parts.append(f"- [{snippet.snippet_type}] {snippet.file_path}:{snippet.line_number or 'N/A'}") - if code_agent_report.limitations: - reason_parts.append("\n\nLimitations:") - reason_parts.extend(f"- {lim}" for lim in code_agent_report.limitations) - reason_parts.append(f"\n\nRecommendation: {code_agent_report.recommendation}") + # Build summary: Package header + executive summary + summary = f"{blocks.package_header_md}\n\n{blocks.executive_summary}" + + # Build reason: Evidence chain (with extracted facts) + Recommendation + # NO code snippets, NO affected files, NO limitations (per plan) + reason_parts: list[str] = [ + blocks.evidence_chain_md, + ] reason = "\n".join(reason_parts) - summary = code_agent_report.executive_summary - response_text = l1_result.l1_agent_answer or "L1 agent completed investigation." + # Build checklist items in order: CVE desc, Affected files, Vulnerable code, Fix code + checklist_items: list[ChecklistItemOutput] = [] + + # [0] CVE Description + if blocks.cve_description: + checklist_items.append(ChecklistItemOutput( + input="CVE Description", + response=blocks.cve_description, + )) + + # [1] Affected Files + if blocks.affected_files: + files_md = "\n".join(f"- `{f}`" for f in blocks.affected_files[:10]) + checklist_items.append(ChecklistItemOutput( + input="Affected Files", + response=files_md, + )) + + # [2] Vulnerable Code + if blocks.vulnerable_snippets: + snippet = blocks.vulnerable_snippets[0] + checklist_items.append(ChecklistItemOutput( + input=f"Vulnerable Code (`{snippet.file_path}`)", + response=f"Line {snippet.line_number or 'N/A'}:\n```\n{snippet.code}\n```", + )) + + # [3] Fix Code + if blocks.fix_snippets: + snippet = blocks.fix_snippets[0] + checklist_items.append(ChecklistItemOutput( + input=f"Fix Code (`{snippet.file_path}`)", + response=f"Line {snippet.line_number or 'N/A'}:\n```\n{snippet.code}\n```", + )) return [ AgentMorpheusEngineOutput( vuln_id=intel.vuln_id, - checklist=[ - ChecklistItemOutput( - input="L1 Package Code Agent analysis", - response=response_text, - ), - ], + checklist=checklist_items, summary=summary, justification=JustificationOutput( label=label, @@ -342,12 +505,9 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: l1_agent_answer=l1_result.l1_agent_answer, tracer=tracer, policy_context=policy_context, + l2_result=l2_result, ) - if l2_result: - logger.info("cve_checker_report: applying L2 verdict override") - code_agent_report = _apply_l2_verdict(code_agent_report, l2_result) - source_key = ctx.source_key if source_key: report_dir = Path(config.base_checker_dir) / source_key / "report" @@ -382,11 +542,22 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: "recommendation": code_agent_report.recommendation, }) intel_score = intel[0].intel_score + + cve_description = "" + if descriptions: + cve_description = descriptions[0][1] + return AgentMorpheusOutput( input=message.input, info=message.info, output=OutputPayload( - analysis=_build_analysis(message, code_agent_report, l1_result, intel_score), + analysis=_build_analysis( + message, + code_agent_report, + intel_score, + cve_description=cve_description, + downstream_report=downstream_report, + ), vex=None, ), ) From a9fa52efc7480e2d3aa544e572ecd18f406cbf5a Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 11 May 2026 10:30:26 +0000 Subject: [PATCH 40/46] clean report summary executive --- .../data_models/checker_status.py | 17 ++ .../functions/code_agent_graph_defs.py | 214 ++++++++---------- .../functions/cve_checker_report.py | 11 - .../functions/cve_package_code_agent.py | 6 + 4 files changed, 122 insertions(+), 126 deletions(-) diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index b30d895ce..2be0e68d8 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -113,6 +113,18 @@ class VulnerabilityIntel(BaseModel): default="both", description="Which CPU architectures are affected: 32-bit only, 64-bit only, or both (default)" ) + is_downstream_patch_available: bool = Field( + default=False, + description="True if a CVE-specific patch file exists in the downstream package" + ) + is_patch_applied_in_build: bool = Field( + default=False, + description="True if the patch was confirmed applied in build logs" + ) + patch_file_name: str = Field( + default="", + description="Name of the CVE-specific patch file (if available)" + ) def format_for_prompt(self) -> str: """Format VulnerabilityIntel for injection into L1 agent runtime prompt. @@ -120,6 +132,11 @@ def format_for_prompt(self) -> str: Uses UPPERCASE labels so they can be referenced as anchors in thought prompts. """ lines = [] + if self.is_downstream_patch_available: + status = "APPLIED" if self.is_patch_applied_in_build else "AVAILABLE" + lines.append(f"DOWNSTREAM_PATCH_STATUS: {status}") + if self.patch_file_name: + lines.append(f"PATCH_FILE: {self.patch_file_name}") if self.affected_files: lines.append(f"AFFECTED_FILES: {', '.join(self.affected_files)}") if self.vulnerable_functions: diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index ce331f9af..51dfe9a62 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -181,16 +181,16 @@ class CodeSnippet(BaseModel): class CodeAgentReport(BaseModel): """Final L1 Code Agent investigation report synthesizing all phases.""" - confidence: float = Field( - ge=0.0, le=1.0, - description="Overall confidence in the verdict (0.0-1.0)") justification_label: str = Field( description=( "Justification category aligned with VEX: one of " "code_not_present, protected_by_mitigating_control, vulnerable, uncertain" )) executive_summary: str = Field( - description="3-5 sentence summary of the investigation findings and verdict") + description=( + "3-4 sentence synthesis. Must include: 1) Final verdict, " + "2) Technical nature of flaw, 3) Why L2 context overrides L1 (if applicable)." + )) evidence_chain: list[str] = Field( description="Ordered list of evidence items tracing the vulnerability through phases") affected_files: list[str] = Field( @@ -198,14 +198,12 @@ class CodeAgentReport(BaseModel): patch_analysis: str | None = Field( default=None, description="Analysis of downstream patches if any were found") - limitations: list[str] = Field( - default_factory=list, - description="Gaps or uncertainties in the investigation") - recommendation: str = Field( - description="Actionable recommendation based on the verdict") code_snippets: list[CodeSnippet] = Field( default_factory=list, description="Structured code snippets showing vulnerable and fix code") + caveats: list[str] = Field( + default_factory=list, + description="Investigation gaps or uncertainties that may need manual review") def to_markdown( self, @@ -214,7 +212,6 @@ def to_markdown( version: str = "", release: str = "", downstream_report: DownstreamSearchReport | None = None, - policy_context: str | None = None, ) -> str: """Render the report as a formatted markdown string.""" lines = [ @@ -231,8 +228,7 @@ def to_markdown( lines.append("") lines.extend([ - f"**Justification:** `{self.justification_label}` ", - f"**Confidence:** {self.confidence:.0%}", + f"**Justification:** `{self.justification_label}`", "", "## Executive Summary", self.executive_summary, @@ -241,14 +237,6 @@ def to_markdown( ]) lines.extend(f"- {ev}" for ev in self.evidence_chain) - if policy_context: - lines.extend([ - "", - "## Red Hat / package identification context", - "", - policy_context, - ]) - if downstream_report is not None: lines.append("") lines.extend(_format_extracted_facts_section(downstream_report)) @@ -260,12 +248,6 @@ def to_markdown( if self.patch_analysis: lines.extend(["", "## Patch Analysis", self.patch_analysis]) - if self.limitations: - lines.extend(["", "## Limitations"]) - lines.extend(f"- {lim}" for lim in self.limitations) - - lines.extend(["", "## Recommendation", self.recommendation]) - if self.code_snippets: lines.extend(["", "## Code Snippets"]) for snippet in self.code_snippets: @@ -276,6 +258,10 @@ def to_markdown( lines.append(f"(source: {snippet.source})") lines.append(f"```\n{snippet.code}\n```") + if self.caveats: + lines.extend(["", "## Caveats"]) + lines.extend(f"- {caveat}" for caveat in self.caveats) + return "\n".join(lines) @@ -455,45 +441,43 @@ def format_patch_data_for_intel( - When downstream investigation includes a parsed patch, code_snippets may be filled programmatically from that patch; use an empty code_snippets list if you do not have verbatim lines to copy. - Always populate affected_files with CVE-relevant source paths so patch hunks can be prioritized. -4. LIMITATIONS: - - Note any missing data (no patch file, no build log, etc.) - - Flag low-confidence findings that need manual review +4. EXECUTIVE SUMMARY (3-4 sentences, scenario-aware): + + When L2_BUILD_CONTEXT is present (L2 override scenario): + - Sentence 1 (Verdict): State final posture clearly (e.g., "NOT vulnerable due to environmental constraints") + - Sentence 2 (Technical Context): Describe the nature of the flaw from CVE description (e.g., "integer overflow in zisofs allocation") and why L2 negates it (e.g., "64-bit addressing prevents the overflow condition") + - Sentence 3 (Reconciliation): Explain why L1 found code but L2 says safe (e.g., "Vulnerable patterns exist in source but are inert on this architecture") -5. RECOMMENDATION: - - Provide actionable guidance based on the justification - - For protected_by_mitigating_control: confirm the fix is deployed - - For vulnerable: recommend patching or mitigation - - For uncertain: suggest next investigation steps + When NO L2 context (standard L1 scenarios): + - Sentence 1: State verdict (protected/vulnerable/uncertain) + - Sentence 2: Technical nature of flaw and how it was addressed (patch) or why it's exploitable + - Sentence 3: Any additional context from investigation results -6. EXECUTIVE SUMMARY (tie to NVR posture): - - When RED_HAT_PACKAGE_CONTEXT lists fixed/affected NVRs or RHSA excerpts, relate the **scanned NVR** to that posture in plain language (e.g. "The scanned build matches the fixed NVR…" or "Scanned NVR is listed as affected; verify fix deployment"). - - Do NOT invent RHSA IDs or advisory references not present in the context above. - - If no identify context is provided, base the summary solely on downstream/upstream investigation results. + Do NOT invent RHSA IDs, function names, or technical details not present in the context. -7. PATCH ANALYSIS (semantic fix narrative): +5. PATCH ANALYSIS (semantic fix narrative): - When downstream patch evidence exists, briefly describe **what** the fix does: name the function(s) or file(s) and the nature of the change (e.g. "adds range validation 15–17 in parse_rockridge_ZF1"). - Derive this from Downstream Search summary, patch file names, or L1 agent code excerpts—do NOT invent code or function names absent from investigation results. -8. DELIVERY MODEL: +6. DELIVERY MODEL: - When a CVE-named patch file is present, explicitly note that the fix is carried as a separate `%patch` directive while the upstream tarball (`Source0`) version may remain unchanged. - Encourage citing "Extracted facts" for exact spec `PatchN:` and `Source0`/`Version` lines when shown below. + +7. CAVEATS (optional): + - Note any missing data (no patch file, no build log, etc.) + - Flag low-confidence findings that may need manual review + - Leave empty if no significant gaps exist Provide a structured JSON response with: -- confidence: 0.0-1.0 overall confidence - * >= 0.8: High certainty (definitive patch match or clear vulnerability) - * 0.5-0.79: Moderate certainty (semantic match, needs verification) - * < 0.5: Low certainty (flag for manual review) - justification_label: one of the labels above -- executive_summary: 2-3 sentence summary +- executive_summary: 3-4 sentence summary (see Instruction #4 for structure) - evidence_chain: list of evidence items in logical order - affected_files: list of source files involved - patch_analysis: analysis of patches (or null if none) -- limitations: list of investigation gaps -- recommendation: actionable next step - code_snippets: list of code snippets with file_path, line_number, code, snippet_type, source (may be overwritten from the downstream patch when one is parsed) -- affected_files: required for prioritizing patch excerpts when a downstream patch exists +- caveats: list of investigation gaps or uncertainties (empty list if none) Ensure all code snippets and special characters within JSON string values are properly escaped (e.g., quotes as \", backslashes as \\, newlines as \\n) to maintain valid JSON format. @@ -1002,18 +986,17 @@ async def generate_code_agent_report( report.code_snippets = _extract_code_snippets(downstream_report, upstream_report) span.set_output({ - "confidence": report.confidence, "justification_label": report.justification_label, "affected_files_count": len(report.affected_files), - "limitations_count": len(report.limitations), + "caveats_count": len(report.caveats), "code_snippets_count": len(report.code_snippets), "snippet_source": snippet_source, "downstream_patch_snippet_count_pre_cap": downstream_patch_snippet_count_pre_cap, }) logger.info( - "generate_code_agent_report: confidence=%.2f justification=%s", - report.confidence, report.justification_label, + "generate_code_agent_report: justification=%s", + report.justification_label, ) return report @@ -1316,83 +1299,76 @@ async def upstream_search_preprocess( L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE = ( "You are a security analyst investigating whether a CVE fix has been applied to a package.\n" - "You have access to a downstream patch file that contains the fix for this vulnerability.\n\n" - "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" - "- VULNERABLE code (- lines that were removed)\n" - "- FIX code (+ lines that were added)\n\n" - "YOUR FIRST ACTION must be to search for the vulnerable code pattern using Source Grep or Code Keyword Search.\n" - "Then verify whether the code matches the vulnerable or fixed version.\n\n" + "A downstream patch file exists and has been analyzed.\n\n" + "VULNERABILITY_INTEL contains DOWNSTREAM_PATCH_STATUS and extracted patterns from the patch.\n" + "The source code index contains the UNPATCHED tarball; the patch is applied at BUILD time.\n\n" + "YOUR TASK: Verify (1) vulnerable code exists in source, (2) fix pattern is absent.\n" + "Both outcomes are EXPECTED when DOWNSTREAM_PATCH_STATUS is APPLIED.\n\n" "CRITICAL RULES:\n" - "- The patch shows WHAT was vulnerable (- lines) and HOW it was fixed (+ lines).\n" - "- If you find the vulnerable code pattern in the source, the package is VULNERABLE.\n" - "- If you find the fix pattern instead, the package is PATCHED.\n" - "- If neither pattern is found, search for related function/variable names.\n" - "- Base conclusions ONLY on tool results, not assumptions.\n\n" + "- If DOWNSTREAM_PATCH_STATUS is APPLIED, the package is PATCHED (patch applied at build time).\n" + "- Finding vulnerable code in source is EXPECTED (source is unpatched tarball).\n" + "- NOT finding fix pattern in source is EXPECTED (fix is in patch file, not tarball).\n" + "- Both findings together confirm the patch will correctly fix the code at build time.\n\n" "ANSWER QUALITY:\n" "- Cite specific file paths and line numbers from tool results.\n" "- Quote the actual code found, not just describe it.\n" - "- Explain WHY the code matches or differs from the patch.\n" + "- Confirm the patch addresses the vulnerable code found.\n" "- State confidence level based on evidence quality." ) L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH = ( "You are a security analyst verifying that a package is VULNERABLE to a CVE.\n" "The TARGET package does NOT contain a CVE-specific patch file.\n" - "However, you have access to the patch from a FIXED RPM version (upstream/patched build).\n\n" - "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" - "- VULNERABLE code (- lines that were removed in the fix)\n" - "- FIX code (+ lines that were added in the fix)\n\n" + "However, patterns have been extracted from the patch in a FIXED RPM version.\n\n" + "VULNERABILITY_INTEL contains patterns extracted from the fixed version's patch.\n\n" "YOUR TASK: Verify the TARGET package contains the vulnerable code and LACKS the fix.\n\n" "VERIFICATION STRATEGY:\n" - "1. FIRST search for the VULNERABLE code pattern (removed lines from the patch).\n" - " - Use function names, variable names, or unique code snippets from the '- lines'.\n" + "1. FIRST search for the VULNERABLE code pattern (from VULNERABLE_PATTERNS).\n" + " - Use function names, variable names, or unique code snippets.\n" " - The vulnerable code SHOULD exist in the target package.\n" - "2. If vulnerable code is found, search for the FIX code pattern (added lines).\n" + "2. If vulnerable code is found, search for the FIX code pattern (from FIX_PATTERNS).\n" " - The fix code should NOT exist in the target package.\n" "3. CONCLUSION:\n" " - If vulnerable code EXISTS and fix is ABSENT → Package is VULNERABLE.\n" " - If fix code IS found → Package may be patched via rebase (investigate further).\n" - " - If neither is found → Use file paths from patch to locate relevant code.\n\n" + " - If neither is found → Use file paths from AFFECTED_FILES to locate relevant code.\n\n" "CRITICAL RULES:\n" "- The patch is from a FIXED version - expect the target to have vulnerable code.\n" - "- Use file paths and function names from the patch to locate code.\n" + "- Use file paths and function names from VULNERABILITY_INTEL to locate code.\n" "- Search for distinctive code patterns, not generic keywords.\n" "- Base conclusions ONLY on tool results, not assumptions.\n\n" "ANSWER QUALITY:\n" "- Cite specific file paths and line numbers from tool results.\n" "- Quote the actual code found, not just describe it.\n" - "- Compare found code against both vulnerable and fix patterns from the patch.\n" + "- Compare found code against both vulnerable and fix patterns.\n" "- Clearly state whether vulnerable code exists and whether fix is absent.\n" "- State confidence level based on evidence quality." ) L1_AGENT_SYS_PROMPT_REBASE_FIX = ( "You are a security analyst verifying that a CVE fix is PRESENT in a rebased package.\n" - "The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.\n" - "You have access to the upstream patch that shows what the fix looks like.\n\n" - "The patch content is provided below in PATCH_CONTEXT. Use it to understand:\n" - "- VULNERABLE code (- lines that were removed in the fix)\n" - "- FIX code (+ lines that were added in the fix)\n\n" + "The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.\n\n" + "VULNERABILITY_INTEL contains patterns extracted from the upstream fix.\n\n" "YOUR TASK: Verify the TARGET package contains the FIX code (proving rebase was effective).\n\n" "VERIFICATION STRATEGY:\n" - "1. FIRST search for the FIX code pattern (added lines from the patch).\n" - " - Use function names, variable names, or unique code snippets from the '+ lines'.\n" + "1. FIRST search for the FIX code pattern (from FIX_PATTERNS).\n" + " - Use function names, variable names, or unique code snippets.\n" " - The fix code SHOULD exist in the target package (proving rebase worked).\n" "2. If fix code is found, optionally confirm VULNERABLE code is ABSENT.\n" " - The vulnerable code should NOT exist (was replaced by the fix).\n" "3. CONCLUSION:\n" " - If fix code EXISTS → Package is PATCHED via rebase.\n" " - If vulnerable code still EXISTS and fix is ABSENT → Rebase may be incomplete.\n" - " - If neither is found → Use file paths from patch to locate relevant code.\n\n" + " - If neither is found → Use file paths from AFFECTED_FILES to locate relevant code.\n\n" "CRITICAL RULES:\n" "- The patch is from a FIXED version - expect the target to have the fix code.\n" - "- Use file paths and function names from the patch to locate code.\n" + "- Use file paths and function names from VULNERABILITY_INTEL to locate code.\n" "- Search for distinctive code patterns, not generic keywords.\n" "- Base conclusions ONLY on tool results, not assumptions.\n\n" "ANSWER QUALITY:\n" "- Cite specific file paths and line numbers from tool results.\n" "- Quote the actual code found, not just describe it.\n" - "- Compare found code against both vulnerable and fix patterns from the patch.\n" + "- Compare found code against both vulnerable and fix patterns.\n" "- Clearly state whether fix code exists, confirming the rebase.\n" "- State confidence level based on evidence quality." ) @@ -1431,60 +1407,68 @@ async def upstream_search_preprocess( +PHASE 0 - CHECK PATCH STATUS (PRIORITY): + FIRST check VULNERABILITY_INTEL for DOWNSTREAM_PATCH_STATUS. + If DOWNSTREAM_PATCH_STATUS is APPLIED: + - The source code index contains the UNPATCHED tarball + - The patch file is applied at BUILD time, not in the indexed source + - Do 2 verification searches, then FINISH with verdict PATCHED + PHASE 1 - INTELLIGENCE (PRE-COMPLETED): Review VULNERABILITY_INTEL above. It contains: + - DOWNSTREAM_PATCH_STATUS: APPLIED means package is patched at build time + - PATCH_FILE: Name of the patch file - AFFECTED_FILES: Files to verify - VULNERABLE_FUNCTIONS: Functions to search for - VULNERABLE_PATTERNS: Code patterns indicating vulnerability - - FIX_PATTERNS: Code patterns indicating the fix - - SEARCH_KEYWORDS: Terms to grep for + - FIX_PATTERNS: Code patterns indicating the fix (will be ABSENT in source) -PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): - For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES: - 1. Search for vulnerable pattern - it should exist if unpatched - 2. Search for fix pattern - it should be absent if unpatched - IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. +PHASE 2 - SOURCE CODE INSPECTION (when DOWNSTREAM_PATCH_STATUS is APPLIED): + Do exactly 2 verification searches: + 1. Search for vulnerable function/pattern → should FIND it (source is unpatched) + 2. Search for fix pattern → should NOT find it (fix is in separate patch file) + Both outcomes are EXPECTED and confirm the patch is correct. + After both searches, FINISH immediately with PATCHED verdict. PHASE 3 - VERDICT: - Only conclude when: - - ALL AFFECTED_FILES have been searched - - ALL VULNERABLE_FUNCTIONS have been located - - Evidence is sufficient for confident verdict + If DOWNSTREAM_PATCH_STATUS is APPLIED: + - Found vulnerable code + fix absent = PATCHED (patch will fix it at build time) + - This is the EXPECTED outcome, not a failure + Conclude after 2 searches - do NOT keep searching. 1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. 2. Output valid JSON only. thought < 100 words. final_answer < 150 words. 3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. -4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). -5. Code Keyword Search: use query field for broader searches. -6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. -7. ALWAYS search for the vulnerable code pattern FIRST, then the fix pattern. -8. If a pattern contains special regex characters, escape them or use literal substrings. +4. If DOWNSTREAM_PATCH_STATUS is APPLIED, do max 2 searches then conclude PATCHED. +5. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. +6. When patch is APPLIED: finding vulnerable code = GOOD, not finding fix = GOOD (expected). +7. If a pattern contains special regex characters, escape them or use literal substrings. +If DOWNSTREAM_PATCH_STATUS is APPLIED: +- Search 1: Find vulnerable function → EXPECTED to find (source is unpatched) +- Search 2: Check fix pattern → EXPECTED to NOT find (fix is in patch file) +- After both: FINISH with PATCHED verdict If a search returned results: -- Narrow down by searching within that specific file (e.g., "pattern,filename.c") -- Search for related symbols or variables from the code found +- If vulnerable code found and patch is APPLIED, proceed to verify fix is absent +- After both checks complete, FINISH If a pattern wasn't found: - Try simpler substrings or partial patterns - Try a different tool (Source Grep <-> Code Keyword Search) -- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES - -{{"thought": "No prior searches in KNOWLEDGE. Search for the vulnerable function from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code pattern"}}, "final_answer": null}} - - -{{"thought": "KNOWLEDGE shows Source Grep found no results for X. Try broader search with Code Keyword Search", "mode": "act", "actions": {{"tool": "Code Keyword Search", "query": "", "reason": "Broader search for the function"}}, "final_answer": null}} - - -{{"thought": "KNOWLEDGE shows function found at file.c:123. Search for specific vulnerable pattern in that file", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",file.c", "reason": "Check if vulnerable pattern exists in the found file"}}, "final_answer": null}} - - -{{"thought": "KNOWLEDGE shows fix pattern not found. Search for the key variable from fix to understand current code", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Find how the vulnerable variable is currently handled"}}, "final_answer": null}} - + +{{"thought": "DOWNSTREAM_PATCH_STATUS is APPLIED. Search for vulnerable function first", "mode": "act", "actions": {{"tool": "Source Grep", "query": "parse_rockridge", "reason": "Verify vulnerable function exists in unpatched source"}}, "final_answer": null}} + + +{{"thought": "Found vulnerable function. Now verify fix pattern is absent (expected since fix is in patch file)", "mode": "act", "actions": {{"tool": "Source Grep", "query": "if (file->pz_log2_bs < 15", "reason": "Confirm fix pattern is absent from source"}}, "final_answer": null}} + + +{{"thought": "Vulnerable code found, fix absent as expected. DOWNSTREAM_PATCH_STATUS is APPLIED so package is PATCHED.", "mode": "finish", "actions": null, "final_answer": "The package is PATCHED. Found vulnerable function at file.c:123. Fix pattern absent from source (expected - fix is in patch file applied at build time). DOWNSTREAM_PATCH_STATUS confirms patch is applied."}} + {{"thought": "KNOWLEDGE has sufficient evidence: vulnerable code at X, fix absent", "mode": "finish", "actions": null, "final_answer": "The package is [PATCHED/VULNERABLE]. Found [evidence] at [file:line]. The code [matches/differs from] the patch because [reason]."}} """ diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 8e434777c..09f06c581 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -91,7 +91,6 @@ class ReportBlocks: # Verdict justification_label: str executive_summary: str - recommendation: str # Evidence evidence_chain: list[str] @@ -142,12 +141,6 @@ def affected_files_md(self) -> str: lines.append(f"- `{f}`") return "\n".join(lines) - @property - def recommendation_md(self) -> str: - """Format recommendation as Markdown.""" - return f"## Recommendation\n{self.recommendation}" - - def _build_report_blocks( message: AgentMorpheusEngineInput, code_agent_report: CodeAgentReport, @@ -180,7 +173,6 @@ def _build_report_blocks( cve_description=cve_description, justification_label=code_agent_report.justification_label, executive_summary=code_agent_report.executive_summary, - recommendation=code_agent_report.recommendation, evidence_chain=list(code_agent_report.evidence_chain), affected_files=list(code_agent_report.affected_files), patch_file_name=patch_file_name, @@ -524,14 +516,12 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: version=version, release=release, downstream_report=downstream_report, - policy_context=policy_context, )) logger.info("cve_checker_report: wrote report to %s", report_path) with tracer.push_active_function( "report_finish", input_data={ - "confidence": code_agent_report.confidence, "justification_label": code_agent_report.justification_label, "has_l2_override": l2_result is not None and l2_result.l2_override_verdict is not None, }, @@ -539,7 +529,6 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: span.set_output({ "executive_summary": code_agent_report.executive_summary, "affected_files": code_agent_report.affected_files, - "recommendation": code_agent_report.recommendation, }) intel_score = intel[0].intel_score diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index e8e4c5df5..d06e7e1c9 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -322,6 +322,12 @@ async def L1_agent(state: CodeAgentState) -> dict: vulnerability_intel: VulnerabilityIntel = await vulnerability_intel_llm.ainvoke( [SystemMessage(content=vul_prompt)], ) + + if downstream_report: + vulnerability_intel.is_downstream_patch_available = downstream_report.is_patch_file_available + vulnerability_intel.is_patch_applied_in_build = downstream_report.is_patch_applied_in_build + vulnerability_intel.patch_file_name = downstream_report.patch_file_name or "" + span.set_output({ "vulnerability_intel": vulnerability_intel.model_dump(), }) From cac3af5c149abb82bedd28705a24ed9a712e1a76 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 11 May 2026 12:02:10 +0000 Subject: [PATCH 41/46] update report structure --- .../functions/code_agent_graph_defs.py | 386 ++++++++++++++++-- .../functions/cve_checker_report.py | 163 +++++++- 2 files changed, 492 insertions(+), 57 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 51dfe9a62..3aaf6fa83 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -27,6 +27,7 @@ import re import shutil import subprocess +import warnings from pathlib import Path from typing import Literal, NotRequired, TYPE_CHECKING @@ -214,54 +215,172 @@ def to_markdown( downstream_report: DownstreamSearchReport | None = None, ) -> str: """Render the report as a formatted markdown string.""" - lines = [ - "# L1 Code Agent Investigation Report", - "", - ] - if vuln_id or target_package: - lines.append(f"**CVE:** {vuln_id} ") - lines.append(f"**Package:** {target_package} ") - if version: - lines.append(f"**Version:** {version} ") - if release: - lines.append(f"**Release:** {release}") + lines: list[str] = [] + + # Header with title + lines.append("# L1 Code Agent Investigation Report") + lines.append("") + + # Verdict banner based on justification label + verdict_map = { + "protected_by_mitigating_control": ("NOT VULNERABLE", "Protected by downstream patch"), + "protected_by_compiler": ("NOT VULNERABLE", "Protected by compiler hardening"), + "code_not_present": ("NOT VULNERABLE", "Vulnerable code not present"), + "code_not_reachable": ("NOT VULNERABLE", "Vulnerable code not reachable"), + "requires_environment": ("NOT VULNERABLE", "Requires specific environment"), + "vulnerable": ("VULNERABLE", "Package requires patching"), + "uncertain": ("UNCERTAIN", "Requires manual review"), + } + verdict_status, verdict_desc = verdict_map.get( + self.justification_label, + ("UNKNOWN", "Unknown status") + ) + + lines.append(f"> **Verdict: {verdict_status}** - {verdict_desc}") + lines.append("") + + # Package information table + lines.append("## Package Information") + lines.append("") + lines.append("| Field | Value |") + lines.append("|-------|-------|") + if vuln_id: + lines.append(f"| **CVE ID** | `{vuln_id}` |") + if target_package: + lines.append(f"| **Package** | `{target_package}` |") + if version: + version_str = f"{version}-{release}" if release else version + lines.append(f"| **Version** | `{version_str}` |") + lines.append(f"| **Justification** | `{self.justification_label}` |") + lines.append("") + + # Executive Summary + lines.append("---") + lines.append("") + lines.append("## Executive Summary") + lines.append("") + lines.append(self.executive_summary) + lines.append("") + + # Evidence Chain + lines.append("---") + lines.append("") + lines.append("## Evidence Chain") + lines.append("") + lines.extend(_format_interleaved_evidence( + self.evidence_chain, + downstream_report, + )) + + # Affected Files + if self.affected_files: + lines.append("---") lines.append("") - - lines.extend([ - f"**Justification:** `{self.justification_label}`", - "", - "## Executive Summary", - self.executive_summary, - "", - "## Evidence Chain", - ]) - lines.extend(f"- {ev}" for ev in self.evidence_chain) - - if downstream_report is not None: + lines.append("## Affected Files") lines.append("") - lines.extend(_format_extracted_facts_section(downstream_report)) - - if self.affected_files: - lines.extend(["", "## Affected Files"]) - lines.extend(f"- `{f}`" for f in self.affected_files) - + # Separate source files from test files + source_files = [f for f in self.affected_files if "/test/" not in f and "test_" not in f] + test_files = [f for f in self.affected_files if "/test/" in f or "test_" in f] + + if source_files: + lines.append("**Source files:**") + for f in source_files: + lines.append(f"- `{f}`") + lines.append("") + + if test_files: + lines.append("**Test files:**") + for f in test_files: + lines.append(f"- `{f}`") + lines.append("") + + # Patch Analysis if self.patch_analysis: - lines.extend(["", "## Patch Analysis", self.patch_analysis]) - + lines.append("---") + lines.append("") + lines.append("## Patch Analysis") + lines.append("") + lines.append(self.patch_analysis) + lines.append("") + + # Code Snippets - separate vulnerable from fix, prioritize main source files if self.code_snippets: - lines.extend(["", "## Code Snippets"]) - for snippet in self.code_snippets: - lines.append(f"\n### {snippet.snippet_type.title()} - `{snippet.file_path}`") - if snippet.line_number: - lines.append(f"Line {snippet.line_number} (source: {snippet.source}):") - else: - lines.append(f"(source: {snippet.source})") - lines.append(f"```\n{snippet.code}\n```") - + lines.append("---") + lines.append("") + lines.append("## Code Comparison") + lines.append("") + + # Filter and organize snippets + vuln_snippets = [s for s in self.code_snippets if s.snippet_type == "vulnerable"] + fix_snippets = [s for s in self.code_snippets if s.snippet_type == "fix"] + + # Prioritize main source files (not test/build files) + def is_main_source(path: str) -> bool: + return "/test/" not in path and "test_" not in path and "Makefile" not in path and "CMakeLists" not in path + + main_vuln = [s for s in vuln_snippets if is_main_source(s.file_path)] + main_fix = [s for s in fix_snippets if is_main_source(s.file_path)] + + # Show main vulnerability code + if main_vuln: + lines.append("### Vulnerable Code") + lines.append("") + for snippet in main_vuln[:2]: + file_name = snippet.file_path.split("/")[-1] + lines.append(f"**File:** `{file_name}` (Line {snippet.line_number or 'N/A'})") + lines.append("") + lines.append("```c") + lines.append(snippet.code.strip()) + lines.append("```") + lines.append("") + + # Show fix code + if main_fix: + lines.append("### Fix Code") + lines.append("") + for snippet in main_fix[:2]: + file_name = snippet.file_path.split("/")[-1] + lines.append(f"**File:** `{file_name}` (Line {snippet.line_number or 'N/A'})") + lines.append("") + lines.append("```c") + lines.append(snippet.code.strip()) + lines.append("```") + lines.append("") + + # Show other snippets (test/build files) in collapsible section if any + other_vuln = [s for s in vuln_snippets if not is_main_source(s.file_path)] + other_fix = [s for s in fix_snippets if not is_main_source(s.file_path)] + + if other_vuln or other_fix: + lines.append("
") + lines.append("Additional Changes (Test/Build Files)") + lines.append("") + for snippet in other_vuln + other_fix: + file_name = snippet.file_path.split("/")[-1] + lines.append(f"**{snippet.snippet_type.title()}** - `{file_name}`") + lines.append("") + lines.append("```") + lines.append(snippet.code.strip()) + lines.append("```") + lines.append("") + lines.append("
") + lines.append("") + + # Caveats if self.caveats: - lines.extend(["", "## Caveats"]) - lines.extend(f"- {caveat}" for caveat in self.caveats) - + lines.append("---") + lines.append("") + lines.append("## Caveats") + lines.append("") + for caveat in self.caveats: + lines.append(f"- {caveat}") + lines.append("") + + # Footer + lines.append("---") + lines.append("") + lines.append("*Report generated by L1 Code Agent*") + return "\n".join(lines) @@ -430,9 +549,16 @@ def format_patch_data_for_intel( 2. EVIDENCE CHAIN: - Start with downstream patch availability - - Include L1 agent's code search findings (vulnerable vs fix patterns) + - Include code analysis findings (patch targets, vulnerable vs fix patterns) - Reference specific files, line numbers, and code snippets - Summarize findings; the rendered report places an "Extracted facts" section **after** the Evidence chain with verbatim spec Patch lines, changelog hits, and build log lines (when available)—do not invent `PatchN:` numbers or spec quotes; only state patch indices you could derive from the investigation text below, or point readers to *Extracted facts* for exact lines + + PHRASING GUIDANCE for code analysis findings: + - GOOD: "Code analysis verified that the patch modifies `filename.c` to address the vulnerability" + - GOOD: "Patch targets the `function_name()` function in `filename.c`" + - BAD: "L1 agent found the fix code in the source" (ambiguous - implies fix already exists) + - BAD: "Found fix in source" (unclear what was found) + - Use active voice: "The patch adds validation..." not "Validation was found..." 3. CODE SNIPPETS: - Extract key code snippets from patches showing vulnerable and fix code @@ -501,12 +627,182 @@ def _cap_text_excerpt(text: str, max_chars: int) -> tuple[str, bool]: return t[: max_chars] + "\n[… truncated …]", True +def _format_interleaved_evidence( + evidence_chain: list[str], + downstream_report: DownstreamSearchReport | None, + *, + max_excerpt: int = L1_EXTRACTED_FACTS_EXCERPT_CHARS, +) -> list[str]: + """Build audit-ready markdown for the Evidence Chain section. + + Structure follows the 3-pillar model: + - Status Summary table for at-a-glance verification + - Patch Metadata (the "What") + - Integration Evidence (the "Plan" - spec file directives) + - Execution Evidence (the "Action" - build logs) + - Source Validation (the "Result" - L1 agent findings) + """ + lines: list[str] = [] + + if downstream_report is None: + for ev in evidence_chain: + lines.append(f"- {ev}") + return lines + + d = downstream_report + + # Categorize evidence items by keywords + patch_evidence: list[str] = [] + build_evidence: list[str] = [] + code_evidence: list[str] = [] + other_evidence: list[str] = [] + + patch_keywords = ("patch", "spec", "patchn", "directive") + build_keywords = ("build", "applied", "log") + code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent") + + for ev in evidence_chain: + ev_lower = ev.lower() + if any(kw in ev_lower for kw in patch_keywords): + patch_evidence.append(ev) + elif any(kw in ev_lower for kw in build_keywords): + build_evidence.append(ev) + elif any(kw in ev_lower for kw in code_keywords): + code_evidence.append(ev) + else: + other_evidence.append(ev) + + # Status Summary - at-a-glance verification (using bullets for UI compatibility) + lines.append("### Status Summary") + lines.append("") + patch_check = "PASS" if d.is_patch_file_available else "FAIL" + spec_check = "PASS" if d.is_patch_in_spec_file else "FAIL" + build_check = "PASS" if d.is_patch_applied_in_build else "FAIL" + lines.append(f"- **Patch file exists:** {patch_check}") + lines.append(f"- **Referenced in spec:** {spec_check}") + lines.append(f"- **Applied in build:** {build_check}") + lines.append("") + + # Section 1: Patch Metadata + if d.patch_file_name or patch_evidence: + lines.append("### 1. Patch Metadata") + lines.append("") + if d.patch_file_name: + lines.append(f"- **Patch file:** `{d.patch_file_name}`") + for ev in patch_evidence: + lines.append(f"- {ev}") + lines.append("") + + # Section 2: Integration Evidence (Spec File) - the "Plan" + has_integration = d.spec_patch_directives_for_cve or d.spec_changelog_cve_lines.strip() + if has_integration: + lines.append("### 2. Integration Evidence (Spec File)") + lines.append("") + + if d.spec_patch_directives_for_cve: + # Split directives into declaration and application + declarations = [line for line in d.spec_patch_directives_for_cve + if line.strip().startswith("Patch")] + applications = [line for line in d.spec_patch_directives_for_cve + if line.strip().startswith("%patch")] + + if declarations: + lines.append("**Patch declaration:**") + lines.append("") + lines.append("```ini") + lines.append("\n".join(declarations)) + lines.append("```") + lines.append("") + + if applications: + lines.append("**Patch application directive:**") + lines.append("") + lines.append("```ini") + lines.append("\n".join(applications)) + lines.append("```") + lines.append("") + + if d.spec_changelog_cve_lines.strip(): + ex, trunc = _cap_text_excerpt(d.spec_changelog_cve_lines, max_excerpt) + hdr = "**Changelog entry:**" + if trunc: + hdr += " *(truncated)*" + lines.append(hdr) + lines.append("") + lines.append("```ini") + lines.append(ex) + lines.append("```") + lines.append("") + + # Section 3: Execution Evidence (Build Log) - the "Action" + if d.build_log_patch_applied.strip() or build_evidence: + lines.append("### 3. Execution Evidence (Build Log)") + lines.append("") + + for ev in build_evidence: + lines.append(f"- {ev}") + if build_evidence: + lines.append("") + + if d.build_log_patch_applied.strip(): + ex, trunc = _cap_text_excerpt(d.build_log_patch_applied, max_excerpt) + if trunc: + lines.append("**Build output:** *(truncated)*") + else: + lines.append("**Build output:**") + lines.append("") + lines.append("```bash") + lines.append(ex) + lines.append("```") + lines.append("") + + # Section 4: Source Validation - the "Result" + if code_evidence: + lines.append("### 4. Source Validation") + lines.append("") + for ev in code_evidence: + lines.append(f"- {ev}") + lines.append("") + + # Section 5: Tarball Reference + if d.spec_version_line or d.spec_source0_line: + lines.append("### 5. Tarball Reference") + lines.append("") + if d.spec_version_line: + lines.append(f"- `{d.spec_version_line}`") + if d.spec_source0_line: + lines.append(f"- `{d.spec_source0_line}`") + lines.append("") + + # Additional evidence (uncategorized) + if other_evidence: + lines.append("### Additional Evidence") + lines.append("") + for ev in other_evidence: + lines.append(f"- {ev}") + lines.append("") + + return lines + + def _format_extracted_facts_section( d: DownstreamSearchReport, *, max_excerpt: int = L1_EXTRACTED_FACTS_EXCERPT_CHARS, ) -> list[str]: - """Build markdown lines for the deterministic *Extracted facts* block.""" + """Build markdown lines for the deterministic *Extracted facts* block. + + .. deprecated:: + This function is deprecated. Use `_format_interleaved_evidence()` instead, + which merges Evidence Chain and Extracted facts into a single interleaved + section for better readability. + """ + warnings.warn( + "_format_extracted_facts_section is deprecated. " + "Use _format_interleaved_evidence() instead.", + DeprecationWarning, + stacklevel=2, + ) lines: list[str] = [ "## Extracted facts", "", diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 09f06c581..9734c9137 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -100,6 +100,12 @@ class ReportBlocks: patch_file_name: str spec_patch_directives: list[str] build_log_evidence: str + spec_changelog_cve_lines: str + spec_version_line: str + spec_source0_line: str + is_patch_file_available: bool + is_patch_in_spec_file: bool + is_patch_applied_in_build: bool # Code snippets vulnerable_snippets: list[CodeSnippet] @@ -113,21 +119,136 @@ def package_header_md(self) -> str: @property def evidence_chain_md(self) -> str: - """Format combined evidence chain with extracted facts as Markdown.""" - lines: list[str] = ["## Evidence Chain"] + """Format audit-ready evidence chain as Markdown. - # Add narrative evidence chain - for ev in self.evidence_chain[:5]: - lines.append(f"- {ev}") + Structure follows the 3-pillar model for audit readability: + - Status Summary table for at-a-glance verification + - Patch Metadata (the "What") + - Integration Evidence (the "Plan" - spec file directives) + - Execution Evidence (the "Action" - build logs) + - Source Validation (the "Result" - L1 agent findings) + """ + lines: list[str] = ["## Evidence Chain", ""] - # Add extracted facts if available - if self.spec_patch_directives: - for directive in self.spec_patch_directives[:2]: - lines.append(f"- Spec patch directive: `{directive}`") + # Categorize evidence items + patch_keywords = ("patch", "spec", "patchn", "directive") + build_keywords = ("build", "applied", "log") + code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent") - if self.build_log_evidence: - first_line = self.build_log_evidence.split('\n')[0][:100] - lines.append(f"- Build log: `{first_line}`") + patch_evidence: list[str] = [] + build_evidence: list[str] = [] + code_evidence: list[str] = [] + other_evidence: list[str] = [] + + for ev in self.evidence_chain[:8]: + ev_lower = ev.lower() + if any(kw in ev_lower for kw in patch_keywords): + patch_evidence.append(ev) + elif any(kw in ev_lower for kw in build_keywords): + build_evidence.append(ev) + elif any(kw in ev_lower for kw in code_keywords): + code_evidence.append(ev) + else: + other_evidence.append(ev) + + # Status Summary - at-a-glance verification (using bullets for UI compatibility) + lines.append("### Status Summary") + lines.append("") + patch_check = "PASS" if self.is_patch_file_available else "FAIL" + spec_check = "PASS" if self.is_patch_in_spec_file else "FAIL" + build_check = "PASS" if self.is_patch_applied_in_build else "FAIL" + lines.append(f"- **Patch file exists:** {patch_check}") + lines.append(f"- **Referenced in spec:** {spec_check}") + lines.append(f"- **Applied in build:** {build_check}") + lines.append("") + + # Section 1: Patch Metadata + if self.patch_file_name or patch_evidence: + lines.append("### 1. Patch Metadata") + lines.append("") + if self.patch_file_name: + lines.append(f"- **Patch file:** `{self.patch_file_name}`") + for ev in patch_evidence: + lines.append(f"- {ev}") + lines.append("") + + # Section 2: Integration Evidence (Spec File) - the "Plan" + has_integration = self.spec_patch_directives or self.spec_changelog_cve_lines.strip() + if has_integration: + lines.append("### 2. Integration Evidence (Spec File)") + lines.append("") + + if self.spec_patch_directives: + declarations = [d for d in self.spec_patch_directives if d.strip().startswith("Patch")] + applications = [d for d in self.spec_patch_directives if d.strip().startswith("%patch")] + + if declarations: + lines.append("**Patch declaration:**") + lines.append("") + lines.append("```ini") + lines.append("\n".join(declarations)) + lines.append("```") + lines.append("") + + if applications: + lines.append("**Patch application directive:**") + lines.append("") + lines.append("```ini") + lines.append("\n".join(applications)) + lines.append("```") + lines.append("") + + if self.spec_changelog_cve_lines.strip(): + lines.append("**Changelog entry:**") + lines.append("") + lines.append("```ini") + lines.append(self.spec_changelog_cve_lines.strip()) + lines.append("```") + lines.append("") + + # Section 3: Execution Evidence (Build Log) - the "Action" + if self.build_log_evidence.strip() or build_evidence: + lines.append("### 3. Execution Evidence (Build Log)") + lines.append("") + + for ev in build_evidence: + lines.append(f"- {ev}") + if build_evidence: + lines.append("") + + if self.build_log_evidence.strip(): + lines.append("**Build output:**") + lines.append("") + lines.append("```bash") + lines.append(self.build_log_evidence.strip()) + lines.append("```") + lines.append("") + + # Section 4: Source Validation - the "Result" + if code_evidence: + lines.append("### 4. Source Validation") + lines.append("") + for ev in code_evidence: + lines.append(f"- {ev}") + lines.append("") + + # Section 5: Tarball Reference + if self.spec_version_line or self.spec_source0_line: + lines.append("### 5. Tarball Reference") + lines.append("") + if self.spec_version_line: + lines.append(f"- `{self.spec_version_line}`") + if self.spec_source0_line: + lines.append(f"- `{self.spec_source0_line}`") + lines.append("") + + # Additional evidence (uncategorized) + if other_evidence: + lines.append("### Additional Evidence") + lines.append("") + for ev in other_evidence: + lines.append(f"- {ev}") + lines.append("") return "\n".join(lines) @@ -158,11 +279,23 @@ def _build_report_blocks( patch_file_name = "" spec_patch_directives: list[str] = [] build_log_evidence = "" + spec_changelog_cve_lines = "" + spec_version_line = "" + spec_source0_line = "" + is_patch_file_available = False + is_patch_in_spec_file = False + is_patch_applied_in_build = False if downstream_report: patch_file_name = downstream_report.patch_file_name or "" spec_patch_directives = downstream_report.spec_patch_directives_for_cve or [] build_log_evidence = downstream_report.build_log_patch_applied or "" + spec_changelog_cve_lines = downstream_report.spec_changelog_cve_lines or "" + spec_version_line = downstream_report.spec_version_line or "" + spec_source0_line = downstream_report.spec_source0_line or "" + is_patch_file_available = downstream_report.is_patch_file_available + is_patch_in_spec_file = downstream_report.is_patch_in_spec_file + is_patch_applied_in_build = downstream_report.is_patch_applied_in_build return ReportBlocks( package_name=target_package.name if target_package else "unknown", @@ -178,6 +311,12 @@ def _build_report_blocks( patch_file_name=patch_file_name, spec_patch_directives=spec_patch_directives, build_log_evidence=build_log_evidence, + spec_changelog_cve_lines=spec_changelog_cve_lines, + spec_version_line=spec_version_line, + spec_source0_line=spec_source0_line, + is_patch_file_available=is_patch_file_available, + is_patch_in_spec_file=is_patch_in_spec_file, + is_patch_applied_in_build=is_patch_applied_in_build, vulnerable_snippets=vulnerable_snippets, fix_snippets=fix_snippets, ) From 4fa7f93e70849076428ec867b689d29d0284496b Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Mon, 11 May 2026 13:20:09 +0000 Subject: [PATCH 42/46] report use case upstream find patch to use --- .../functions/code_agent_graph_defs.py | 99 ++++++++++++------- .../functions/cve_checker_report.py | 20 ++-- 2 files changed, 76 insertions(+), 43 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 3aaf6fa83..1b6d4ce2d 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -100,6 +100,10 @@ class UpstreamSearchReport(BaseModel): is_fixed_srpm_is_needed: bool = Field(default=False, description="True if a fixed SRPM is needed downstream style patch files") fixed_srpm_file_name: str = Field(default="", description="The name of the fixed SRPM file") fixed_parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed fixed SRPM patch file") + reference_package_nvr: str = Field( + default="", + description="NVR (name-version-release) of the reference fixed package from intel", + ) reason_cve_code: str = Field( default="", description="Does the CVE description match the code which is vulnerable", @@ -496,7 +500,7 @@ def format_patch_data_for_intel( CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final Code Agent investigation report. -Synthesize the results from the downstream search, upstream search, L1 agent analysis, +Synthesize the results from the target package analysis, reference intel gathering, L1 agent analysis, and optionally L2 build analysis into a comprehensive, auditable report with a clear justification and supporting evidence. @@ -509,10 +513,12 @@ def format_patch_data_for_intel( {policy_context_section} -## Downstream Search +## Target Package Analysis +(Checked the target package being scanned for CVE-specific patch files) {downstream_section} -## Upstream Search +## Reference Intel Gathering +(Checked target spec for rebase indicators; obtained patch patterns from a known-fixed reference package for comparison) {upstream_section} ## L1 Agent Analysis @@ -548,7 +554,8 @@ def format_patch_data_for_intel( - Only use "uncertain" when evidence is conflicting or insufficient. 2. EVIDENCE CHAIN: - - Start with downstream patch availability + - Start with target package patch availability + - Include reference intel findings (rebase indicator, reference patch patterns) - Include code analysis findings (patch targets, vulnerable vs fix patterns) - Reference specific files, line numbers, and code snippets - Summarize findings; the rendered report places an "Extracted facts" section **after** the Evidence chain with verbatim spec Patch lines, changelog hits, and build log lines (when available)—do not invent `PatchN:` numbers or spec quotes; only state patch indices you could derive from the investigation text below, or point readers to *Extracted facts* for exact lines @@ -564,7 +571,7 @@ def format_patch_data_for_intel( - Extract key code snippets from patches showing vulnerable and fix code - Include file paths and line numbers - Mark each snippet as "vulnerable", "fix", or "context" - - When downstream investigation includes a parsed patch, code_snippets may be filled programmatically from that patch; use an empty code_snippets list if you do not have verbatim lines to copy. + - When target package or reference package investigation includes a parsed patch, code_snippets may be filled programmatically from that patch; use an empty code_snippets list if you do not have verbatim lines to copy. - Always populate affected_files with CVE-relevant source paths so patch hunks can be prioritized. 4. EXECUTIVE SUMMARY (3-4 sentences, scenario-aware): @@ -582,8 +589,8 @@ def format_patch_data_for_intel( Do NOT invent RHSA IDs, function names, or technical details not present in the context. 5. PATCH ANALYSIS (semantic fix narrative): - - When downstream patch evidence exists, briefly describe **what** the fix does: name the function(s) or file(s) and the nature of the change (e.g. "adds range validation 15–17 in parse_rockridge_ZF1"). - - Derive this from Downstream Search summary, patch file names, or L1 agent code excerpts—do NOT invent code or function names absent from investigation results. + - When target package patch or reference patch evidence exists, briefly describe **what** the fix does: name the function(s) or file(s) and the nature of the change (e.g. "adds range validation 15–17 in parse_rockridge_ZF1"). + - Derive this from Target Package Analysis, Reference Intel, patch file names, or L1 agent code excerpts—do NOT invent code or function names absent from investigation results. 6. DELIVERY MODEL: - When a CVE-named patch file is present, explicitly note that the fix is carried as a separate `%patch` directive while the upstream tarball (`Source0`) version may remain unchanged. @@ -635,9 +642,9 @@ def _format_interleaved_evidence( ) -> list[str]: """Build audit-ready markdown for the Evidence Chain section. - Structure follows the 3-pillar model: + Structure follows the 3-pillar model for TARGET package verification: - Status Summary table for at-a-glance verification - - Patch Metadata (the "What") + - Target Patch Metadata (the "What") - Integration Evidence (the "Plan" - spec file directives) - Execution Evidence (the "Action" - build logs) - Source Validation (the "Result" - L1 agent findings) @@ -657,7 +664,7 @@ def _format_interleaved_evidence( code_evidence: list[str] = [] other_evidence: list[str] = [] - patch_keywords = ("patch", "spec", "patchn", "directive") + patch_keywords = ("patch", "spec", "patchn", "directive", "target", "reference") build_keywords = ("build", "applied", "log") code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent") @@ -672,23 +679,23 @@ def _format_interleaved_evidence( else: other_evidence.append(ev) - # Status Summary - at-a-glance verification (using bullets for UI compatibility) - lines.append("### Status Summary") + # Status Summary - at-a-glance verification of TARGET package (using bullets for UI compatibility) + lines.append("### Status Summary (Target Package)") lines.append("") patch_check = "PASS" if d.is_patch_file_available else "FAIL" spec_check = "PASS" if d.is_patch_in_spec_file else "FAIL" build_check = "PASS" if d.is_patch_applied_in_build else "FAIL" - lines.append(f"- **Patch file exists:** {patch_check}") - lines.append(f"- **Referenced in spec:** {spec_check}") - lines.append(f"- **Applied in build:** {build_check}") + lines.append(f"- **Target patch file exists:** {patch_check}") + lines.append(f"- **Referenced in target spec:** {spec_check}") + lines.append(f"- **Applied in target build:** {build_check}") lines.append("") - # Section 1: Patch Metadata + # Section 1: Target Patch Metadata if d.patch_file_name or patch_evidence: lines.append("### 1. Patch Metadata") lines.append("") if d.patch_file_name: - lines.append(f"- **Patch file:** `{d.patch_file_name}`") + lines.append(f"- **Target patch file:** `{d.patch_file_name}`") for ev in patch_evidence: lines.append(f"- {ev}") lines.append("") @@ -864,18 +871,22 @@ def _format_extracted_facts_section( def _format_downstream_for_report(report: DownstreamSearchReport | None) -> str: - """Format Downstream search results for prompt injection.""" + """Format target package analysis results for prompt injection. + + This section reports whether the TARGET package (the one being scanned) + contains a CVE-specific patch file. + """ if report is None: - return "Downstream search did not produce results." + return "Target package analysis did not produce results." lines = [] - lines.append(f"**Patch File Available:** {report.is_patch_file_available}") + lines.append(f"**Target Package Patch Available:** {report.is_patch_file_available}") if report.is_patch_file_available: - lines.append(f"**Patch File:** `{report.patch_file_name}`") - lines.append(f"**In Spec File:** {report.is_patch_in_spec_file}") + lines.append(f"**Target Patch File:** `{report.patch_file_name}`") + lines.append(f"**Referenced in Spec:** {report.is_patch_in_spec_file}") if report.spec_file_log_change: - lines.append(f"**Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```") + lines.append(f"**Target Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```") lines.append(f"**Applied in Build:** {report.is_patch_applied_in_build}") if report.build_log_patch_applied: lines.append(f"**Build Log Evidence:**\n```\n{report.build_log_patch_applied[:500]}\n```") @@ -889,36 +900,53 @@ def _format_downstream_for_report(report: DownstreamSearchReport | None) -> str: if len(report.parsed_patch.files) > 5: lines.append(f" (+{len(report.parsed_patch.files) - 5} more files)") else: - lines.append("No CVE-specific patch file found in downstream package.") + lines.append("No CVE-specific patch file found in target package.") return "\n".join(lines) def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str: - """Format Upstream search results for prompt injection.""" + """Format reference intel gathering results for prompt injection. + + This section reports TWO distinct pieces of information: + 1. Rebase indicator: Checked TARGET's spec file for CVE mention + 2. Reference package: Downloaded a known-fixed package from intel to extract patch patterns + """ if report is None: - return "Upstream search did not produce results." + return "Reference intel gathering did not produce results." lines = [] - lines.append(f"**Fixed by Rebase:** {report.is_code_fixed_by_rebase}") + + # Part 1: Rebase indicator (checked in TARGET's spec file) + rebase_status = report.is_code_fixed_by_rebase + if rebase_status == "unknown": + lines.append("**Target Rebase Indicator:** not found (no CVE mention in target's spec file)") + elif rebase_status == "yes": + lines.append("**Target Rebase Indicator:** found (CVE mentioned in target's spec changelog)") + else: + lines.append(f"**Target Rebase Indicator:** {rebase_status}") if report.spec_file_log_change: - lines.append(f"**Spec Changelog:**\n```\n{report.spec_file_log_change[:500]}\n```") + lines.append(f"**Target Spec Changelog Match:**\n```\n{report.spec_file_log_change[:500]}\n```") + # Part 2: Reference package (downloaded from intel for comparison) if report.is_fixed_srpm_is_needed: - lines.append(f"**Fixed SRPM Available:** Yes") - lines.append(f"**Fixed SRPM File:** `{report.fixed_srpm_file_name}`") + if report.reference_package_nvr: + lines.append(f"**Reference Fixed Package:** `{report.reference_package_nvr}` (from intel)") + else: + lines.append(f"**Reference Fixed Package:** Available (from intel)") + lines.append(f"**Reference Patch File:** `{report.fixed_srpm_file_name}`") if report.fixed_parsed_patch: - lines.append(f"\n**Fixed Patch ({len(report.fixed_parsed_patch.files)} files):**") + lines.append(f"\n**Reference Patch ({len(report.fixed_parsed_patch.files)} files):**") for pf in report.fixed_parsed_patch.files[:5]: added = sum(len(h.added_lines) for h in pf.hunks) removed = sum(len(h.removed_lines) for h in pf.hunks) lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)") if report.spec_fixed_srpm_rebase: - lines.append(f"**SRPM Rebased:** Yes") + lines.append(f"**Reference Package Rebased:** Yes") if report.spec_fixed_srpm_change: - lines.append(f"**Rebase Changes:**\n```\n{report.spec_fixed_srpm_change[:500]}\n```") + lines.append(f"**Reference Rebase Changes:**\n```\n{report.spec_fixed_srpm_change[:500]}\n```") if report.reason_code_fixed_by_rebase: lines.append(f"\n**Rebase Reasoning:** {report.reason_code_fixed_by_rebase}") @@ -1523,6 +1551,11 @@ async def upstream_search_preprocess( inspector = SourceInspector(source_path) report = UpstreamSearchReport() cve_pattern = re.escape(vuln_id) + + # Store reference package NVR from fix_info if available + if fix_info and fix_info.get("nevra"): + report.reference_package_nvr = fix_info["nevra"] + with tracer.push_active_function("Is_upstream_fixed_by_rebase", input_data={"vuln_id": vuln_id}) as span: spec_files = inspector.find_files("*.spec", recursive=False) spec_path = spec_files[0] if spec_files else None diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 9734c9137..80f020ff4 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -121,9 +121,9 @@ def package_header_md(self) -> str: def evidence_chain_md(self) -> str: """Format audit-ready evidence chain as Markdown. - Structure follows the 3-pillar model for audit readability: + Structure follows the 3-pillar model for TARGET package audit readability: - Status Summary table for at-a-glance verification - - Patch Metadata (the "What") + - Target Patch Metadata (the "What") - Integration Evidence (the "Plan" - spec file directives) - Execution Evidence (the "Action" - build logs) - Source Validation (the "Result" - L1 agent findings) @@ -131,7 +131,7 @@ def evidence_chain_md(self) -> str: lines: list[str] = ["## Evidence Chain", ""] # Categorize evidence items - patch_keywords = ("patch", "spec", "patchn", "directive") + patch_keywords = ("patch", "spec", "patchn", "directive", "target", "reference") build_keywords = ("build", "applied", "log") code_keywords = ("code", "function", "vulnerable", "fix", "found", "source", "l1", "agent") @@ -151,23 +151,23 @@ def evidence_chain_md(self) -> str: else: other_evidence.append(ev) - # Status Summary - at-a-glance verification (using bullets for UI compatibility) - lines.append("### Status Summary") + # Status Summary - at-a-glance verification of TARGET package (using bullets for UI compatibility) + lines.append("### Status Summary (Target Package)") lines.append("") patch_check = "PASS" if self.is_patch_file_available else "FAIL" spec_check = "PASS" if self.is_patch_in_spec_file else "FAIL" build_check = "PASS" if self.is_patch_applied_in_build else "FAIL" - lines.append(f"- **Patch file exists:** {patch_check}") - lines.append(f"- **Referenced in spec:** {spec_check}") - lines.append(f"- **Applied in build:** {build_check}") + lines.append(f"- **Target patch file exists:** {patch_check}") + lines.append(f"- **Referenced in target spec:** {spec_check}") + lines.append(f"- **Applied in target build:** {build_check}") lines.append("") - # Section 1: Patch Metadata + # Section 1: Target Patch Metadata if self.patch_file_name or patch_evidence: lines.append("### 1. Patch Metadata") lines.append("") if self.patch_file_name: - lines.append(f"- **Patch file:** `{self.patch_file_name}`") + lines.append(f"- **Target patch file:** `{self.patch_file_name}`") for ev in patch_evidence: lines.append(f"- {ev}") lines.append("") From 7234275dda1d6cbe48d16ee03d9f3167f00235b9 Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Tue, 12 May 2026 11:45:10 +0000 Subject: [PATCH 43/46] update rebase to fetch patch from github --- .../functions/code_agent_graph_defs.py | 133 ++++++++++-------- 1 file changed, 73 insertions(+), 60 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 1b6d4ce2d..6c4a1cc94 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -43,6 +43,8 @@ from exploit_iq_commons.data_models.checker_status import L2BuildResult, VulnerabilityIntel from vuln_analysis.functions.react_internals import CheckerThought, Observation, L1VerdictExtraction +import aiohttp + # --------------------------------------------------------------------------- # Graph state # --------------------------------------------------------------------------- @@ -62,6 +64,48 @@ class CodeAgentState(MessagesState): vulnerability_intel: NotRequired["VulnerabilityIntel | None"] +# --------------------------------------------------------------------------- +# Patch schemas (must be defined before reports that use them) +# --------------------------------------------------------------------------- + + +class PatchHunk(BaseModel): + """A single hunk from a downstream patch file.""" + source_start: int + source_length: int + target_start: int + target_length: int + context_lines: list[str] = Field(default_factory=list, description="Unchanged lines") + removed_lines: list[str] = Field(default_factory=list, description="Deleted lines (- stripped)") + added_lines: list[str] = Field(default_factory=list, description="Added lines (+ stripped)") + + +class PatchFile(BaseModel): + """Changes to a single file in a downstream patch.""" + source_path: str + target_path: str + hunks: list[PatchHunk] + is_new_file: bool = False + is_deleted_file: bool = False + + +class ParsedPatch(BaseModel): + """Structured representation of a downstream patch file.""" + patch_filename: str + files: list[PatchFile] + + +class OSVPatchResult(BaseModel): + """Result of fetching a patch from OSV/GitHub.""" + cve_id: str + fixed_commit: str + repo_url: str + patch_url: str + patch_content: str | None = Field(default=None, description="Raw .patch text") + parsed_patch: "ParsedPatch | None" = Field(default=None, description="Structured patch data") + commit_message: str | None = None + commit_author: str | None = None + commit_date: str | None = None # --------------------------------------------------------------------------- # Reflection schemas # --------------------------------------------------------------------------- @@ -94,6 +138,7 @@ class DownstreamSearchReport(BaseModel): ) parsed_patch: ParsedPatch | None = Field(default=None, description="The parsed patch file") + class UpstreamSearchReport(BaseModel): """Result of an upstream search.""" @@ -120,11 +165,15 @@ class UpstreamSearchReport(BaseModel): default="", description="The change of the fixed SRPM in the spec file", ) - spec_fixed_srpm_rebase: bool = Field(default=False, description="True if the fixed SRPM is rebased in the spec file") reason_code_fixed_by_rebase: str = Field( default="", description="The reason why the code is fixed by rebase", ) + osv_result: OSVPatchResult | None = Field(default=None, description="The result of the OSV patch retrieval") + + + + class ReflectionBase(BaseModel): """Base schema for phase reports. @@ -137,37 +186,6 @@ class ReflectionBase(BaseModel): description="True if results are good enough to proceed.") -# --------------------------------------------------------------------------- -# Patch schemas (used by DownstreamSearchReport and UpstreamSearchReport) -# --------------------------------------------------------------------------- - - -class PatchHunk(BaseModel): - """A single hunk from a downstream patch file.""" - source_start: int - source_length: int - target_start: int - target_length: int - context_lines: list[str] = Field(default_factory=list, description="Unchanged lines") - removed_lines: list[str] = Field(default_factory=list, description="Deleted lines (- stripped)") - added_lines: list[str] = Field(default_factory=list, description="Added lines (+ stripped)") - - -class PatchFile(BaseModel): - """Changes to a single file in a downstream patch.""" - source_path: str - target_path: str - hunks: list[PatchHunk] - is_new_file: bool = False - is_deleted_file: bool = False - - -class ParsedPatch(BaseModel): - """Structured representation of a downstream patch file.""" - patch_filename: str - files: list[PatchFile] - - # --------------------------------------------------------------------------- # Code Agent Report schema # --------------------------------------------------------------------------- @@ -500,7 +518,7 @@ def format_patch_data_for_intel( CODE_AGENT_REPORT_PROMPT = """\ You are a security analyst generating the final Code Agent investigation report. -Synthesize the results from the target package analysis, reference intel gathering, L1 agent analysis, +Synthesize the results from the target package analysis, additional intel (target rebase check + reference package), L1 agent analysis, and optionally L2 build analysis into a comprehensive, auditable report with a clear justification and supporting evidence. @@ -517,8 +535,10 @@ def format_patch_data_for_intel( (Checked the target package being scanned for CVE-specific patch files) {downstream_section} -## Reference Intel Gathering -(Checked target spec for rebase indicators; obtained patch patterns from a known-fixed reference package for comparison) +## Additional Intel (Target Rebase + Reference Package) +This section contains TWO distinct checks: +- TARGET REBASE CHECK: Searched the target package's spec file for CVE mentions indicating a rebase fix +- REFERENCE PACKAGE: Downloaded a known-fixed package version from intel to extract patch patterns {upstream_section} ## L1 Agent Analysis @@ -555,7 +575,8 @@ def format_patch_data_for_intel( 2. EVIDENCE CHAIN: - Start with target package patch availability - - Include reference intel findings (rebase indicator, reference patch patterns) + - Include target rebase findings (if CVE mentioned in target's spec changelog, this is from the TARGET package) + - Include reference package findings (if a known-fixed package from intel was used for comparison) - Include code analysis findings (patch targets, vulnerable vs fix patterns) - Reference specific files, line numbers, and code snippets - Summarize findings; the rendered report places an "Extracted facts" section **after** the Evidence chain with verbatim spec Patch lines, changelog hits, and build log lines (when available)—do not invent `PatchN:` numbers or spec quotes; only state patch indices you could derive from the investigation text below, or point readers to *Extracted facts* for exact lines @@ -943,10 +964,6 @@ def _format_upstream_for_report(report: UpstreamSearchReport | None) -> str: removed = sum(len(h.removed_lines) for h in pf.hunks) lines.append(f"- `{pf.target_path}` (+{added}/-{removed} lines)") - if report.spec_fixed_srpm_rebase: - lines.append(f"**Reference Package Rebased:** Yes") - if report.spec_fixed_srpm_change: - lines.append(f"**Reference Rebase Changes:**\n```\n{report.spec_fixed_srpm_change[:500]}\n```") if report.reason_code_fixed_by_rebase: lines.append(f"\n**Rebase Reasoning:** {report.reason_code_fixed_by_rebase}") @@ -1551,7 +1568,7 @@ async def upstream_search_preprocess( inspector = SourceInspector(source_path) report = UpstreamSearchReport() cve_pattern = re.escape(vuln_id) - + need_to_find_code = True # Store reference package NVR from fix_info if available if fix_info and fix_info.get("nevra"): report.reference_package_nvr = fix_info["nevra"] @@ -1600,25 +1617,21 @@ async def upstream_search_preprocess( report.is_fixed_srpm_is_needed = False span.set_output({ "is_fixed_srpm_is_needed": report.is_fixed_srpm_is_needed}) - with tracer.push_active_function("is_fixed_srpm_rebase", input_data={"spec file"}) as span: - spec_files = patch_inspector.find_files("*.spec", recursive=False) - spec_path = spec_files[0] if spec_files else None - if not spec_path: - report.spec_fixed_srpm_rebase = False - else: - grep_spec_matches = inspector.grep_content(cve_pattern, spec_path) - if grep_spec_matches: - report.spec_fixed_srpm_rebase = True - report.spec_fixed_srpm_change = "\n".join(m.line_content for m in grep_spec_matches) - return report - else: - report.spec_fixed_srpm_rebase = False - span.set_output({ - "spec_fixed_srpm_rebase": report.spec_fixed_srpm_rebase, - "spec_fixed_srpm_change": report.spec_fixed_srpm_change, - }) - - + + if not patch_dir.exists() or need_to_find_code: + from vuln_analysis.utils.osv_patch_retriever import OSVPatchRetriever + with tracer.push_active_function("search_for_code_in_osv", input_data={"vuln_id": vuln_id}) as span: + async with aiohttp.ClientSession() as session: + retriever = OSVPatchRetriever(session=session) + result = await retriever.get_fix_patch(vuln_id, fix_info["version"], fix_info["name"]) + if result and result.parsed_patch: + report.fixed_parsed_patch = result.parsed_patch + report.fixed_srpm_file_name = result.patch_url + report.is_fixed_srpm_is_needed = True + report.osv_result = result + span.set_output({ + "osv_commit_message": report.osv_result.commit_message, + }) return report From 9c5f781497dcd2526798e466766a117feee3d9be Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 13 May 2026 10:59:42 +0300 Subject: [PATCH 44/46] missing file --- .../utils/osv_patch_retriever.py | 412 ++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 src/vuln_analysis/utils/osv_patch_retriever.py diff --git a/src/vuln_analysis/utils/osv_patch_retriever.py b/src/vuln_analysis/utils/osv_patch_retriever.py new file mode 100644 index 000000000..c6a5701d5 --- /dev/null +++ b/src/vuln_analysis/utils/osv_patch_retriever.py @@ -0,0 +1,412 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OSV Patch Retriever - fetch upstream fix patches from OSV when RPM patches are unavailable.""" + +from __future__ import annotations + +import os +import re +from typing import TYPE_CHECKING + +import aiohttp +from pydantic import BaseModel +from unidiff import PatchSet + +from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from vuln_analysis.utils.async_http_utils import request_with_retry +from vuln_analysis.functions.code_agent_graph_defs import OSVPatchResult + +if TYPE_CHECKING: + from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch + +logger = LoggingFactory.get_agent_logger(__name__) + +_OSV_API_URL = os.environ.get("OSV_API_URL", "https://api.osv.dev/v1/vulns/") +_OSV_TIMEOUT_SECONDS = int(os.environ.get("OSV_TIMEOUT_SECONDS", "10")) +_GITHUB_PATCH_TIMEOUT_SECONDS = int(os.environ.get("GITHUB_PATCH_TIMEOUT_SECONDS", "30")) + +_BINARY_FILE_EXTENSIONS = frozenset({ + '.uu', '.uue', '.iso', '.bin', '.gz', '.bz2', '.xz', '.zip', '.tar', '.tgz', '.tbz2', + '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp', + '.pdf', '.doc', '.docx', '.xls', '.xlsx', + '.exe', '.dll', '.so', '.dylib', '.a', '.o', '.obj', + '.pyc', '.pyo', '.class', '.jar', '.war', +}) + +_GITHUB_REPO_PATTERN = re.compile(r"https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") + + +class OSVAffectedRange(BaseModel): + """Represents a Git range from an OSV affected block.""" + repo_url: str | None = None + fixed_commit: str | None = None + introduced_commit: str | None = None + + + +def _is_binary_file_path(path: str) -> bool: + """Check if file path has a binary file extension.""" + path_lower = path.lower() + return any(path_lower.endswith(ext) for ext in _BINARY_FILE_EXTENSIONS) + + +def _version_in_range(version: str, introduced: str | None, fixed: str | None) -> bool: + """Check if provided upstream version falls within [introduced, fixed) range. + + Returns True if version >= introduced (or introduced is None) AND version < fixed (or fixed is None). + """ + try: + from packaging.version import parse as parse_version + v = parse_version(version) + if introduced: + try: + if v < parse_version(introduced): + return False + except Exception: + pass + if fixed: + try: + if v >= parse_version(fixed): + return False + except Exception: + pass + return True + except Exception: + return True + + +def _parse_patch_content(patch_content: str, patch_filename: str) -> "ParsedPatch | None": + """Parse patch content string into structured ParsedPatch model. + + Reuses the same logic as code_agent_graph_defs.parse_patch_file but works on string content. + """ + from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch, PatchFile, PatchHunk + + try: + patch_set = PatchSet.from_string(patch_content) + except Exception: + logger.warning("_parse_patch_content: failed to parse patch content") + return None + + files: list[PatchFile] = [] + for patched_file in patch_set: + if patched_file.is_binary_file: + continue + if _is_binary_file_path(patched_file.target_file): + continue + + hunks: list[PatchHunk] = [] + for hunk in patched_file: + context, removed, added = [], [], [] + for line in hunk: + if line.is_context: + context.append(str(line.value).rstrip("\n")) + elif line.is_removed: + removed.append(str(line.value).rstrip("\n")) + elif line.is_added: + added.append(str(line.value).rstrip("\n")) + + hunks.append(PatchHunk( + source_start=hunk.source_start, + source_length=hunk.source_length, + target_start=hunk.target_start, + target_length=hunk.target_length, + context_lines=context, + removed_lines=removed, + added_lines=added, + )) + + files.append(PatchFile( + source_path=patched_file.source_file, + target_path=patched_file.target_file, + hunks=hunks, + is_new_file=patched_file.is_added_file, + is_deleted_file=patched_file.is_removed_file, + )) + + return ParsedPatch(patch_filename=patch_filename, files=files) + + +def _extract_commit_metadata(patch_content: str) -> tuple[str | None, str | None, str | None]: + """Extract commit message, author, and date from GitHub .patch format. + + GitHub .patch format starts with: + From Mon Sep 17 00:00:00 2001 + From: Author Name + Date: Tue, 1 Jan 2024 12:00:00 +0000 + Subject: [PATCH] Commit message + + Extended commit message... + --- + + """ + lines = patch_content.split('\n') + author = None + date = None + subject_lines = [] + in_subject = False + + for line in lines: + if line.startswith('From:'): + author = line[5:].strip() + elif line.startswith('Date:'): + date = line[5:].strip() + elif line.startswith('Subject:'): + in_subject = True + subject_part = line[8:].strip() + if subject_part.startswith('[PATCH'): + idx = subject_part.find(']') + if idx != -1: + subject_part = subject_part[idx + 1:].strip() + subject_lines.append(subject_part) + elif in_subject: + if line.startswith('---') or line.startswith('diff --git'): + break + if line.strip() == '': + in_subject = False + else: + subject_lines.append(line.strip()) + + commit_message = ' '.join(subject_lines).strip() if subject_lines else None + return commit_message, author, date + + +class OSVPatchRetriever: + """Retrieve upstream fix patches from OSV when RPM patches are unavailable. + + Usage: + async with aiohttp.ClientSession() as session: + retriever = OSVPatchRetriever(session=session) + result = await retriever.get_fix_patch("CVE-2024-1234", "3.0.7", "openssl") + if result and result.parsed_patch: + # Use result.parsed_patch for agent context + pass + """ + + def __init__( + self, + session: aiohttp.ClientSession, + osv_timeout: int = _OSV_TIMEOUT_SECONDS, + github_timeout: int = _GITHUB_PATCH_TIMEOUT_SECONDS, + ): + self._session = session + self._osv_timeout = aiohttp.ClientTimeout(total=osv_timeout) + self._github_timeout = aiohttp.ClientTimeout(total=github_timeout) + + async def get_fix_patch( + self, + cve_id: str, + upstream_version: str, + package_name: str | None = None, + ) -> OSVPatchResult | None: + """Main entry point - orchestrates the full workflow. + + Args: + cve_id: CVE identifier (e.g., "CVE-2024-1234") + upstream_version: Upstream version from TargetPackage.version (e.g., "3.0.7") + package_name: Optional package name to help match the correct affected block + + Returns: + OSVPatchResult with patch data, or None if no fix found + """ + try: + osv_data = await self._query_osv(cve_id) + if not osv_data: + return None + + affected = self._find_matching_affected(osv_data, package_name) + if not affected: + logger.info("OSV: No affected block with fix found for %s", cve_id) + return None + + range_info = self._extract_fix_commit(affected) + if not range_info.fixed_commit or not range_info.repo_url: + logger.info("OSV: No fixed commit found for %s", cve_id) + return None + + patch_url = self._build_patch_url(range_info.repo_url, range_info.fixed_commit) + if not patch_url: + logger.info("OSV: Could not build patch URL for %s (non-GitHub repo?)", cve_id) + return None + + patch_content = await self._fetch_github_patch(patch_url) + if not patch_content: + return None + + commit_message, commit_author, commit_date = _extract_commit_metadata(patch_content) + parsed_patch = _parse_patch_content(patch_content, f"{cve_id}_{range_info.fixed_commit[:8]}.patch") + + return OSVPatchResult( + cve_id=cve_id, + fixed_commit=range_info.fixed_commit, + repo_url=range_info.repo_url, + patch_url=patch_url, + patch_content=patch_content, + parsed_patch=parsed_patch, + commit_message=commit_message, + commit_author=commit_author, + commit_date=commit_date, + ) + + except Exception: + logger.warning("OSV patch retrieval failed for %s", cve_id, exc_info=True) + return None + + async def _query_osv(self, cve_id: str) -> dict | None: + """Query OSV API for CVE data. + + Args: + cve_id: CVE identifier + + Returns: + OSV vulnerability data dict, or None on failure + """ + url = f"{_OSV_API_URL}{cve_id}" + try: + async with request_with_retry( + session=self._session, + request_kwargs={ + 'method': 'GET', + 'url': url, + 'timeout': self._osv_timeout, + }, + max_retries=3, + sleep_time=0.5, + log_on_error=False, + ) as response: + return await response.json() + except aiohttp.ClientResponseError as e: + if e.status == 404: + logger.info("OSV: CVE %s not found", cve_id) + else: + logger.warning("OSV query failed for %s: %s", cve_id, e) + return None + except Exception as e: + logger.warning("OSV query failed for %s: %s", cve_id, e) + return None + + def _find_matching_affected( + self, + osv_data: dict, + package_name: str | None = None, + ) -> dict | None: + """Find an affected block that has a GIT range with a fixed commit. + + Args: + osv_data: OSV vulnerability data + package_name: Optional package name to filter affected blocks + + Returns: + Matching affected block dict, or None if no match + """ + for affected in osv_data.get("affected", []): + + + for range_block in affected.get("ranges", []): + if range_block.get("type") == "GIT": + for event in range_block.get("events", []): + if "fixed" in event: + return affected + + return None + + def _extract_fix_commit(self, affected: dict) -> OSVAffectedRange: + """Extract the fixed commit hash and repo URL from an affected block. + + Args: + affected: OSV affected block + + Returns: + OSVAffectedRange with repo_url and fixed_commit + """ + result = OSVAffectedRange() + + ranges = affected.get("ranges", []) + for range_block in ranges: + if range_block.get("type") != "GIT": + continue + + repo = range_block.get("repo") + if repo: + result.repo_url = repo + + events = range_block.get("events", []) + for event in events: + if "introduced" in event and event["introduced"] != "0": + result.introduced_commit = event["introduced"] + if "fixed" in event: + result.fixed_commit = event["fixed"] + + if result.fixed_commit: + break + + return result + + def _build_patch_url(self, repo_url: str, commit_sha: str) -> str | None: + """Build GitHub patch URL from repo URL and commit SHA. + + Args: + repo_url: Git repository URL (e.g., "https://github.com/openssl/openssl") + commit_sha: Git commit hash + + Returns: + Patch URL (e.g., "https://github.com/openssl/openssl/commit/.patch"), + or None if not a GitHub repo + """ + match = _GITHUB_REPO_PATTERN.match(repo_url) + if not match: + if "github.com" in repo_url: + parts = repo_url.rstrip('/').split('/') + if len(parts) >= 2: + repo_path = '/'.join(parts[-2:]).replace('.git', '') + return f"https://github.com/{repo_path}/commit/{commit_sha}.patch" + logger.debug("Non-GitHub repo URL: %s", repo_url) + return None + + repo_path = match.group(1) + return f"https://github.com/{repo_path}/commit/{commit_sha}.patch" + + async def _fetch_github_patch(self, patch_url: str) -> str | None: + """Download patch content from GitHub. + + Args: + patch_url: URL to the .patch file + + Returns: + Patch content string, or None on failure + """ + try: + async with request_with_retry( + session=self._session, + request_kwargs={ + 'method': 'GET', + 'url': patch_url, + 'timeout': self._github_timeout, + }, + max_retries=3, + sleep_time=0.5, + log_on_error=False, + ) as response: + return await response.text() + except aiohttp.ClientResponseError as e: + if e.status == 404: + logger.info("GitHub patch not found: %s", patch_url) + else: + logger.warning("GitHub patch fetch failed: %s - %s", patch_url, e) + return None + except Exception as e: + logger.warning("GitHub patch fetch failed: %s - %s", patch_url, e) + return None From 4d0892ba2bff9c2126a70b68b71ad498cca8a4de Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 13 May 2026 10:59:29 +0000 Subject: [PATCH 45/46] send failure to failed node and add url to report --- .../data_models/checker_status.py | 8 ++ .../functions/cve_http_output.py | 133 ++++++++++++++---- .../functions/cve_source_acquisition.py | 20 ++- src/vuln_analysis/tools/brew_downloader.py | 9 ++ 4 files changed, 142 insertions(+), 28 deletions(-) diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 2be0e68d8..40ac620ae 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -45,6 +45,13 @@ class PackageCheckerStatus(IntEnum): "Intel quality score below threshold - insufficient information for reliable analysis", } +CHECKER_FAILURE_ERROR_TYPES: dict[PackageCheckerStatus, str] = { + PackageCheckerStatus.ERROR_PKG_IDENT_NO_INTEL: "no-intel", + PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM: "srpm-download-failed", + PackageCheckerStatus.PKG_IDENT_CVE_MISMATCH: "invalid-input", +} + + class EnumIdentifyResult(str, Enum): """Result of the PackageIdentify phase for a single CVE.""" YES = "yes" @@ -69,6 +76,7 @@ class AcquiredArtifacts(BaseModel): binary_rpm_path: Path | None = None patch_source_dir: Path | None = None patch_diff_path: Path | None = None + source_url: str | None = None class VulnerabilityIntel(BaseModel): diff --git a/src/vuln_analysis/functions/cve_http_output.py b/src/vuln_analysis/functions/cve_http_output.py index b6b475645..a328faafa 100644 --- a/src/vuln_analysis/functions/cve_http_output.py +++ b/src/vuln_analysis/functions/cve_http_output.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import base64 +from dataclasses import dataclass from http import HTTPStatus from datetime import datetime from aiq.builder.builder import Builder @@ -22,13 +23,21 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id +from exploit_iq_commons.data_models.checker_status import ( + CHECKER_FAILURE_ERROR_TYPES, + PACKAGE_CHECKER_STATUS_DESCRIPTIONS, +) +from exploit_iq_commons.data_models.common import PipelineMode, TypedBaseModel +from exploit_iq_commons.data_models.input import SourceDocumentsInfo from vuln_analysis.data_models.job import Job, LocalDateTime -from exploit_iq_commons.data_models.common import TypedBaseModel import typing -from typing import Any +from typing import Any, TYPE_CHECKING import os import re +if TYPE_CHECKING: + from vuln_analysis.data_models.output import AgentMorpheusOutput, FailureReport + logger = LoggingFactory.get_agent_logger(__name__) @@ -91,43 +100,113 @@ class CVEHttpOutputConfig(FunctionBaseConfig, name="cve_http_output"): mlops_config: MLOpsConfig = Field(..., description="MLOps configuration") +@dataclass +class OutputPayload: + """Encapsulates the HTTP output payload details.""" + json: str + url: str + skip_mlops: bool + + +def _build_output_payload( + message: "AgentMorpheusOutput", + config: CVEHttpOutputConfig, + default_json: str, +) -> OutputPayload: + """ + Determine the payload to send - either the full output or a failure report. + + Returns an OutputPayload with the appropriate JSON, URL, and skip_mlops flag. + """ + from vuln_analysis.data_models.output import FailureReport + + default_url = config.url + config.endpoint + failure_url = config.url + config.failure_endpoint + + if message.input.code_index_success is False: + repo_url = message.input.image.source_info[0].git_repo if message.input.image.source_info else "unknown" + report = FailureReport( + scan_id=message.input.scan.id, + error_type="processing-error", + error_message=f"Failed to clone repository {repo_url}--{message.input.failure_reason}", + ) + logger.info(f"Code index failed for scan {message.input.scan.id}, sending failure report to {failure_url}") + return OutputPayload(json=report.model_dump_json(by_alias=True), url=failure_url, skip_mlops=True) + + checker_ctx = message.info.checker_context + if checker_ctx and checker_ctx.status in CHECKER_FAILURE_ERROR_TYPES: + error_type = CHECKER_FAILURE_ERROR_TYPES[checker_ctx.status] + error_msg = PACKAGE_CHECKER_STATUS_DESCRIPTIONS.get( + checker_ctx.status, + f"Checker failed with status {checker_ctx.status}" + ) + cve_id = message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "unknown" + pkg_name = message.input.image.target_package.name if message.input.image.target_package else "unknown" + report = FailureReport( + scan_id=message.input.scan.id, + error_type=error_type, + error_message=f"{error_msg} (CVE: {cve_id}, package: {pkg_name})", + ) + logger.info( + f"Checker early exit for scan {message.input.scan.id} with status {checker_ctx.status}, " + f"sending failure report to {failure_url}" + ) + return OutputPayload(json=report.model_dump_json(by_alias=True), url=failure_url, skip_mlops=True) + + return OutputPayload(json=default_json, url=default_url, skip_mlops=False) + + @register_function(config_type=CVEHttpOutputConfig) async def output_to_http(config: CVEHttpOutputConfig, builder: Builder): # pylint: disable=unused-argument - from vuln_analysis.data_models.output import AgentMorpheusOutput, FailureReport + from vuln_analysis.data_models.output import AgentMorpheusOutput from vuln_analysis.utils import http_utils async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: trace_id.set(message.input.scan.id) + if message.input.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER: + checker_ctx = message.info.checker_context + artifacts = checker_ctx.artifacts if checker_ctx else None + source_url = artifacts.source_url if artifacts else None + target = message.input.image.target_package + if source_url and target: + nvr = f"{target.name}-{target.version}-{target.release}" + message.input.image.source_info = [ + SourceDocumentsInfo(type="code", git_repo=source_url, ref=nvr) + ] + model_json = message.model_dump_json(by_alias=True) - url = config.url + config.endpoint + + # Save JSON for debugging - compare with local markdown reports + from pathlib import Path + debug_output_dir = Path(".cache/am_cache/checker_json_output") + debug_output_dir.mkdir(parents=True, exist_ok=True) + vuln_id = message.input.scan.vulns[0].vuln_id if message.input.scan.vulns else "unknown" + json_file = debug_output_dir / f"{message.input.scan.id}_{vuln_id}.json" + json_file.write_text(model_json) + logger.info(f"Saved JSON output to {json_file}") + headers = {'Content-type': 'application/json', 'traceId': trace_id.get()} auth_header = get_auth_header(config) if auth_header is not None: headers['Authorization'] = auth_header - verify = True - if config.verify_path: - verify = config.verify_path + verify = config.verify_path if config.verify_path else True + + payload = _build_output_payload(message, config, model_json) try: - skipped_mlops = False - if message.input.code_index_success is False: - repo_url = message.input.image.source_info[0].git_repo if message.input.image.source_info else "unknown" - failure_report = FailureReport( - scan_id=message.input.scan.id, - error_type="processing-error", - error_message=f"Failed to clone repository {repo_url}--{message.input.failure_reason}", - ) - failure_url = config.url + config.failure_endpoint - logger.info(f"Code index failed for scan {message.input.scan.id}, sending failure report to {failure_url}") - model_json = failure_report.model_dump_json(by_alias=True) - url = failure_url - skipped_mlops = True - logger.info(f"Sending output to {url}") - http_utils.request_with_retry(request_kwargs={ - "url": url, "method": "POST", "data": model_json.encode('utf-8'), "headers": headers, "verify": verify - }, accept_status_codes=(HTTPStatus.OK, HTTPStatus.CREATED, HTTPStatus.ACCEPTED)) - if config.enable_mlops and not skipped_mlops: + logger.info(f"Sending output to {payload.url}") + http_utils.request_with_retry( + request_kwargs={ + "url": payload.url, + "method": "POST", + "data": payload.json.encode('utf-8'), + "headers": headers, + "verify": verify, + }, + accept_status_codes=(HTTPStatus.OK, HTTPStatus.CREATED, HTTPStatus.ACCEPTED), + ) + if config.enable_mlops and not payload.skip_mlops: mlops_url = None try: job = _extract_job_data(message) @@ -143,9 +222,9 @@ async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: except Exception as mlops_e: logger.error('Unable to send job to MLOps API at %s. Error: %s', mlops_url, mlops_e) except Exception as e: - logger.error('Unable to send output response to %s. Error: %s', url, e) + logger.error('Unable to send output response to %s. Error: %s', payload.url, e) else: - logger.info('Successfully sent output to %s', url) + logger.info('Successfully sent output to %s', payload.url) return message diff --git a/src/vuln_analysis/functions/cve_source_acquisition.py b/src/vuln_analysis/functions/cve_source_acquisition.py index c4cc42cf6..6a2b2dc65 100644 --- a/src/vuln_analysis/functions/cve_source_acquisition.py +++ b/src/vuln_analysis/functions/cve_source_acquisition.py @@ -14,6 +14,8 @@ # limitations under the License. import hashlib +import json +from datetime import datetime, timezone from aiq.builder.builder import Builder from aiq.builder.framework_enum import LLMFrameworkEnum from aiq.builder.function_info import FunctionInfo @@ -116,11 +118,17 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: if target_dir.exists() and any(target_dir.iterdir()): logger.info("Source cache hit for %s: %s", identifier_key, target_dir) - #build artifacts from target_dir artifacts = AcquiredArtifacts() artifacts.srpm_path = target_dir / "source" artifacts.build_log_path = target_dir / "logs" artifacts.binary_rpm_path = target_dir / "binaries" + metadata_file = target_dir / "metadata.json" + if metadata_file.exists(): + try: + metadata = json.loads(metadata_file.read_text(encoding="utf-8")) + artifacts.source_url = metadata.get("source_url") + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to read metadata.json: %s", e) message.info.checker_context.artifacts = artifacts return message @@ -130,6 +138,16 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: brew_downloader.connect() artifacts = brew_downloader.download_target_artifacts(target_package.name, target_package.version, target_package.release,target_package.arch) message.info.checker_context.artifacts = artifacts + + nvr = f"{target_package.name}-{target_package.version}-{target_package.release}" + metadata = { + "source_url": artifacts.source_url, + "nvr": nvr, + "downloaded_at": datetime.now(timezone.utc).isoformat(), + } + metadata_file = target_dir / "metadata.json" + metadata_file.write_text(json.dumps(metadata, indent=2), encoding="utf-8") + logger.info("Wrote metadata to %s", metadata_file) except BrewDownloaderError as e: logger.error("Failed to download patched SRPM: %s", e) message.info.checker_context.status = PackageCheckerStatus.ERROR_FAILED_TO_DOWNLOAD_SRPM diff --git a/src/vuln_analysis/tools/brew_downloader.py b/src/vuln_analysis/tools/brew_downloader.py index 15416f0e8..d0693b46e 100644 --- a/src/vuln_analysis/tools/brew_downloader.py +++ b/src/vuln_analysis/tools/brew_downloader.py @@ -182,6 +182,14 @@ def _download_file(self, url: str, dest: Path) -> Path: logger.info("Saved %s (%d bytes)", dest.name, dest.stat().st_size) return dest + def _get_srpm_url(self, build: dict) -> str: + """Compute the download URL for the source RPM of *build*.""" + rpms = self._session.listRPMs(buildID=build["id"], arches="src") + if not rpms: + raise BrewDownloadError(f"No source RPM found for build {build['nvr']}") + rpm_info = rpms[0] + return f"{self._pathinfo.build(build)}/{self._pathinfo.rpm(rpm_info)}" + def download_srpm(self, build: dict) -> Path: """Download the .src.rpm for *build* into the shared RPM cache. @@ -267,6 +275,7 @@ def download_target_artifacts(self, name: str, version: str, release: str, arch: if build is None: raise BrewBuildNotFoundError(f"Build not found for {name}-{version}-{release}") + artifacts.source_url = self._get_srpm_url(build) cache_srpm_path = self.download_srpm(build) srpm_target_path = self._checker_dir / "source" From 28a3fe40331a604913033051564fa9b78923ff3c Mon Sep 17 00:00:00 2001 From: Shimon Tanny Date: Wed, 13 May 2026 13:21:52 +0000 Subject: [PATCH 46/46] grep tool support multi pattern query and fix osv retrive --- .../functions/code_agent_graph_defs.py | 2 +- src/vuln_analysis/tools/source_grep.py | 25 +++++-- src/vuln_analysis/tools/source_inspector.py | 18 +++-- .../utils/osv_patch_retriever.py | 67 ++++++++++++++----- 4 files changed, 87 insertions(+), 25 deletions(-) diff --git a/src/vuln_analysis/functions/code_agent_graph_defs.py b/src/vuln_analysis/functions/code_agent_graph_defs.py index 6c4a1cc94..c0748ed17 100644 --- a/src/vuln_analysis/functions/code_agent_graph_defs.py +++ b/src/vuln_analysis/functions/code_agent_graph_defs.py @@ -463,7 +463,7 @@ def is_main_source(path: str) -> bool: - "32-bit": Only 32-bit systems affected (look for phrases like "32-bit systems", "i386", "i686", "on 32-bit", "64-bit systems are not affected") - "64-bit": Only 64-bit systems affected (rare, look for "64-bit only", "x86_64 only") - "both": Both architectures affected (DEFAULT - use when not explicitly stated otherwise) - NOTE: Integer overflow vulnerabilities (CWE-190, CWE-680, CWE-681) often only affect 32-bit due to smaller integer sizes. + NOTE: Do NOT assume an architecture based on the vulnerability type. Default to "both" unless explicitly stated. diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py index 37c1fa0af..496270e97 100644 --- a/src/vuln_analysis/tools/source_grep.py +++ b/src/vuln_analysis/tools/source_grep.py @@ -63,8 +63,8 @@ class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): } -def _parse_query(query: str) -> tuple[str, str | None, str, bool]: - """Parse query string into (pattern, file_glob, target, word_boundary). +def _parse_query(query: str) -> tuple[str | list[str], str | None, str, bool]: + """Parse query string into (pattern(s), file_glob, target, word_boundary). Supports formats: - "pattern" -> search source (default) @@ -73,8 +73,12 @@ def _parse_query(query: str) -> tuple[str, str | None, str, bool]: - "target:pattern,file_glob" -> search target with file filter - "pattern -w" -> search with word boundary (whole words only) - "target:pattern,file_glob -w" -> full format with word boundary + - "pattern1;pattern2,file.c" -> multiple patterns (only with file_glob) Valid targets: source, logs, patch + + Note: Multiple patterns (separated by ';') are only supported when + a file_glob is provided. This prevents overly broad multi-pattern searches. """ query = query.strip().strip('"').strip("'") @@ -92,9 +96,15 @@ def _parse_query(query: str) -> tuple[str, str | None, str, bool]: if "," in query: parts = query.split(",", 1) - pattern = parts[0].strip() + pattern_part = parts[0].strip() file_glob = parts[1].strip() if len(parts) > 1 else None - return pattern, file_glob, target, word_boundary + + # Multi-pattern support: only when file_glob is provided + if file_glob and ";" in pattern_part: + patterns = [p.strip() for p in pattern_part.split(";") if p.strip()] + return patterns, file_glob, target, word_boundary + + return pattern_part, file_glob, target, word_boundary return query, None, target, word_boundary @@ -134,11 +144,13 @@ async def _arun(query: str) -> str: Options: - -w: Match whole words only (word boundary) + - Multiple patterns: use ';' separator ONLY with a specific file Examples: - 'archive_read_open' - search source files - 'archive_read_open,*.c' - search only .c source files - 'archive_read_open -w' - search for whole word only + - 'unsigned int cursor;unsigned int nodes,archive_read.c' - multiple patterns in one file - 'logs:undefined reference' - search build logs for link errors - 'logs:error:' - search build logs for error messages - 'patch:CVE-2026-5121' - find patch for specific CVE @@ -168,7 +180,7 @@ async def _arun(query: str) -> str: pattern, target_dir, target, file_glob or "default extensions", word_boundary) matches = await inspector.grep_native( - pattern=pattern, + patterns=pattern, file_glob=file_glob, word_boundary=word_boundary, context_lines=config.context_lines, @@ -188,9 +200,12 @@ async def _arun(query: str) -> str: "'logs' for build compilation logs, " "'patch' for fixed patches from newer RPM. " "Add ' -w' suffix for whole-word matching. " + "Multiple patterns: use ';' separator ONLY with a specific file, e.g., " + "'pattern1;pattern2,filename.c' searches for both patterns in that file. " "Examples: 'archive_read_open' searches source, " "'archive_read_open,*.c' searches only C source files, " "'archive_read_open -w' searches for whole word only, " + "'unsigned int cursor;unsigned int nodes,archive_read.c' searches multiple patterns in one file, " "'logs:undefined reference' searches build logs, " "'patch:CVE-2026-5121' searches patch files." ), diff --git a/src/vuln_analysis/tools/source_inspector.py b/src/vuln_analysis/tools/source_inspector.py index 825e5b55b..d881c3a9b 100644 --- a/src/vuln_analysis/tools/source_inspector.py +++ b/src/vuln_analysis/tools/source_inspector.py @@ -116,7 +116,7 @@ def grep_content( async def grep_native( self, - pattern: str, + patterns: str | list[str], file_glob: str | None = None, *, case_insensitive: bool = False, @@ -129,8 +129,9 @@ async def grep_native( Parameters ---------- - pattern: - Search pattern (passed to grep as-is, supports basic regex). + patterns: + Search pattern(s). Can be a single string or list of patterns. + When multiple patterns are provided, matches ANY of them (OR logic). file_glob: Optional file pattern (e.g., ``"*.c"``, ``"*.h"``). If provided, overrides default_extensions. @@ -169,7 +170,16 @@ async def grep_native( for ext in default_extensions: cmd.extend(["--include", ext]) - cmd.extend(["-m", str(max_results), "--", pattern, str(self._root)]) + cmd.extend(["-m", str(max_results)]) + + # Handle single or multiple patterns + if isinstance(patterns, list): + for p in patterns: + cmd.extend(["-e", p]) + else: + cmd.extend(["--", patterns]) + + cmd.append(str(self._root)) def _run_grep() -> str: result = subprocess.run( diff --git a/src/vuln_analysis/utils/osv_patch_retriever.py b/src/vuln_analysis/utils/osv_patch_retriever.py index c6a5701d5..aaba10a22 100644 --- a/src/vuln_analysis/utils/osv_patch_retriever.py +++ b/src/vuln_analysis/utils/osv_patch_retriever.py @@ -226,18 +226,31 @@ async def get_fix_patch( osv_data = await self._query_osv(cve_id) if not osv_data: return None - - affected = self._find_matching_affected(osv_data, package_name) - if not affected: - logger.info("OSV: No affected block with fix found for %s", cve_id) - return None - - range_info = self._extract_fix_commit(affected) - if not range_info.fixed_commit or not range_info.repo_url: - logger.info("OSV: No fixed commit found for %s", cve_id) - return None - - patch_url = self._build_patch_url(range_info.repo_url, range_info.fixed_commit) + + # 1. Try to get the highly-specific patch URL from references first + patch_url = self._extract_commit_from_references(osv_data) + fixed_commit = None + repo_url = None + if patch_url: + # Extract repo_url and fixed_commit from the patch_url for the result object + repo_url = patch_url.split('/commit/')[0] if '/commit/' in patch_url else patch_url.split('/pull/')[0] + fixed_commit = patch_url.split('/')[-1].replace('.patch', '') + logger.info("OSV: Found precise fix commit in references for %s", cve_id) + else: + # second try to find the fix commit from the affected block + affected = self._find_matching_affected(osv_data, package_name) + if not affected: + logger.info("OSV: No affected block with fix found for %s", cve_id) + return None + + range_info = self._extract_fix_commit(affected) + if not range_info.fixed_commit or not range_info.repo_url: + logger.info("OSV: No fixed commit found for %s", cve_id) + return None + + patch_url = self._build_patch_url(range_info.repo_url, range_info.fixed_commit) + fixed_commit = range_info.fixed_commit[:8] + repo_url = range_info.repo_url if not patch_url: logger.info("OSV: Could not build patch URL for %s (non-GitHub repo?)", cve_id) return None @@ -247,12 +260,12 @@ async def get_fix_patch( return None commit_message, commit_author, commit_date = _extract_commit_metadata(patch_content) - parsed_patch = _parse_patch_content(patch_content, f"{cve_id}_{range_info.fixed_commit[:8]}.patch") + parsed_patch = _parse_patch_content(patch_content, f"{cve_id}_{fixed_commit}.patch") return OSVPatchResult( cve_id=cve_id, - fixed_commit=range_info.fixed_commit, - repo_url=range_info.repo_url, + fixed_commit=fixed_commit, + repo_url=repo_url, patch_url=patch_url, patch_content=patch_content, parsed_patch=parsed_patch, @@ -298,6 +311,28 @@ async def _query_osv(self, cve_id: str) -> dict | None: logger.warning("OSV query failed for %s: %s", cve_id, e) return None + def _extract_commit_from_references(self, osv_data: dict) -> str | None: + """Attempt to find the exact fix commit URL from the OSV references array. + + Args: + osv_data: OSV vulnerability data dict + + Returns: + The patch URL if found, otherwise None + """ + references = osv_data.get("references", []) + + for ref in references: + if ref.get("type") == "FIX": + url = ref.get("url", "") + # We look for GitHub URLs containing either /commit/ or /pull/ + if "github.com" in url and ("/commit/" in url or "/pull/" in url): + if not url.endswith(".patch"): + return f"{url}.patch" + return url + + return None + def _find_matching_affected( self, osv_data: dict, @@ -312,6 +347,8 @@ def _find_matching_affected( Returns: Matching affected block dict, or None if no match """ + + for affected in osv_data.get("affected", []):