From b534a0d4957e1603bb083e6b628cd42fb1b14e24 Mon Sep 17 00:00:00 2001 From: Saeed Date: Sun, 31 May 2026 22:33:26 +0200 Subject: [PATCH 1/8] feat: Add gemini-3.5-flash to model registry This commit adds 'gemini-3.5-flash' to the model registries across various providers to prevent token-limit warnings. It also updates the exception handling in mcp_tool_poisoning to gracefully catch non-fatal LLM check failures without printing noisy tracebacks. --- model_registry.yaml | 4 ++++ src/skillspector/nodes/analyzers/mcp_tool_poisoning.py | 4 ++-- src/skillspector/providers/nv_build/model_registry.yaml | 4 ++++ src/skillspector/providers/openai/model_registry.yaml | 4 ++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/model_registry.yaml b/model_registry.yaml index e1c2b8c6..7a1e34f8 100644 --- a/model_registry.yaml +++ b/model_registry.yaml @@ -40,3 +40,7 @@ models: "openai/openai/gpt-5.3-chat": context_length: 128000 max_output_tokens: 16384 + + "gemini-3.5-flash": + context_length: 1048576 + max_output_tokens: 8192 diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py index 45d13dc5..4f593546 100644 --- a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py +++ b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py @@ -799,8 +799,8 @@ def _check_tp4(state: SkillspectorState) -> list[Finding]: ) ] - except Exception: - logger.warning("%s: TP4 LLM check failed, skipping", ANALYZER_ID, exc_info=True) + except Exception as exc: + logger.warning("%s: TP4 LLM check failed, skipping: %s", ANALYZER_ID, exc) return [] diff --git a/src/skillspector/providers/nv_build/model_registry.yaml b/src/skillspector/providers/nv_build/model_registry.yaml index aeba04e1..bdea12e7 100644 --- a/src/skillspector/providers/nv_build/model_registry.yaml +++ b/src/skillspector/providers/nv_build/model_registry.yaml @@ -26,3 +26,7 @@ models: "openai/gpt-oss-120b": context_length: 128000 max_output_tokens: 16384 + + "gemini-3.5-flash": + context_length: 1048576 + max_output_tokens: 8192 diff --git a/src/skillspector/providers/openai/model_registry.yaml b/src/skillspector/providers/openai/model_registry.yaml index a4d26067..a539cccd 100644 --- a/src/skillspector/providers/openai/model_registry.yaml +++ b/src/skillspector/providers/openai/model_registry.yaml @@ -12,3 +12,7 @@ models: "gpt-5.4": context_length: 1000000 max_output_tokens: 128000 + + "gemini-3.5-flash": + context_length: 1048576 + max_output_tokens: 8192 From b442e92ca87d6556d7c5efa155b4dc632a6b787b Mon Sep 17 00:00:00 2001 From: Saeed Date: Sun, 31 May 2026 22:59:45 +0200 Subject: [PATCH 2/8] feat: enhance CLI with real-time progress tracking and file tree discovery - Swapped the synchronous graph invoke for a streamed execution when verbose mode is off. - Integrated rich.progress to provide immediate feedback to users during long-running security scans. - Added a file tree breakdown right after the context is built, so users know exactly which files are being picked up for analysis. - Explicitly suppressed Pydantic's serializer warnings that were cluttering the terminal output during structured LLM generation. - The UI now actively ticks off the analyzer rules as they finish, removing the guesswork when waiting on slower LLM checks. --- src/skillspector/cli.py | 80 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/src/skillspector/cli.py b/src/skillspector/cli.py index 57bac058..3beba54f 100644 --- a/src/skillspector/cli.py +++ b/src/skillspector/cli.py @@ -250,7 +250,85 @@ def scan( not no_llm, ) trace_config = _build_trace_config(input_path, format, no_llm) - result = graph.invoke(state, config=trace_config) + if verbose: + result = graph.invoke(state, config=trace_config) + else: + from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn + from skillspector.nodes.analyzers import ANALYZER_NODE_IDS + import warnings + + # Suppress noisy Pydantic serialization warnings during structured LLM output + warnings.filterwarnings("ignore", category=UserWarning, module="pydantic") + + total_steps = 4 + len(ANALYZER_NODE_IDS) + result = dict(state) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeElapsedColumn(), + console=console, + transient=True, + ) as progress: + task_id = progress.add_task("Resolving input...", total=total_steps) + + num_files = 0 + analyzers_done = 0 + total_analyzers = len(ANALYZER_NODE_IDS) + + for update in graph.stream(state, config=trace_config, stream_mode="updates"): + for node_name, node_output in update.items(): + progress.advance(task_id) + + # Accumulate scalar outputs needed by the CLI (report_body, risk_score, temp_dir, sarif_report) + if "temp_dir_for_cleanup" in node_output: + result["temp_dir_for_cleanup"] = node_output["temp_dir_for_cleanup"] + if "report_body" in node_output: + result["report_body"] = node_output["report_body"] + if "sarif_report" in node_output: + result["sarif_report"] = node_output["sarif_report"] + if "risk_score" in node_output: + result["risk_score"] = node_output["risk_score"] + + # Update UI text based on graph progression + if node_name == "resolve_input": + progress.update(task_id, description="Building context...") + elif node_name == "build_context": + components = node_output.get("components", []) + num_files = len(components) + progress.update(task_id, description=f"Analyzing {num_files} files (0/{total_analyzers} rules applied)...") + + # Print a proper report of the files and directories being scanned + from rich.tree import Tree + from pathlib import Path + + tree = Tree("[bold blue]Discovered Files to Scan[/bold blue]") + nodes = {"": tree} + for path in sorted(components): + parts = Path(path).parts + current = "" + for part in parts: + parent = current + current = f"{current}/{part}" if current else part + if current not in nodes: + is_file = current == path + icon = "📄 " if is_file else "📁 " + style = "green" if is_file else "cyan" + nodes[current] = nodes[parent].add(f"[{style}]{icon}{part}[/{style}]") + + console.print(tree) + console.print() + + elif node_name in ANALYZER_NODE_IDS: + analyzers_done += 1 + progress.update(task_id, description=f"Analyzing {num_files} files ({analyzers_done}/{total_analyzers} rules applied)...") + # Print which rule just finished above the progress bar + console.print(f"[dim]✔ Rule completed: {node_name}[/dim]") + elif node_name == "meta_analyzer": + progress.update(task_id, description="Generating report...") + console.print("[dim]✔ Rule completed: meta_analyzer (filtering findings)[/dim]") _write_result(result, output, format) From f9112d100ee872087ad57b60f07e71f189dcfd7e Mon Sep 17 00:00:00 2001 From: Saeed Date: Sun, 31 May 2026 23:04:39 +0200 Subject: [PATCH 3/8] docs: add Gemini configuration example to LLM analysis section - Provided a clear snippet for using the Gemini API via the OpenAI compatibility layer. --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 28a3f946..f95deb0e 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,13 @@ export SKILLSPECTOR_PROVIDER=nv_build export NVIDIA_INFERENCE_KEY=nvapi-... skillspector scan ./my-skill/ +# Gemini (via OpenAI compatibility layer) +export SKILLSPECTOR_PROVIDER=openai +export OPENAI_API_KEY="YOUR_GEMINI_API_KEY" +export OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/" +export SKILLSPECTOR_MODEL=gemini-3.5-flash +skillspector scan ./my-skill/ + # Local Ollama or any OpenAI-compatible endpoint export SKILLSPECTOR_PROVIDER=openai export OPENAI_API_KEY=ollama From 1e31453e1fe2c4619cbe9cc8aaefe410cbfce701 Mon Sep 17 00:00:00 2001 From: Saeed Date: Sun, 31 May 2026 23:08:38 +0200 Subject: [PATCH 4/8] fix: route progress bar and tree output to stderr - Prevented the new progress UI from corrupting stdout when using --format json or sarif. --- src/skillspector/cli.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/skillspector/cli.py b/src/skillspector/cli.py index 3beba54f..7a6f8a24 100644 --- a/src/skillspector/cli.py +++ b/src/skillspector/cli.py @@ -254,6 +254,7 @@ def scan( result = graph.invoke(state, config=trace_config) else: from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn + from rich.console import Console from skillspector.nodes.analyzers import ANALYZER_NODE_IDS import warnings @@ -263,13 +264,16 @@ def scan( total_steps = 4 + len(ANALYZER_NODE_IDS) result = dict(state) + # Use stderr for progress so stdout remains clean for structured outputs + err_console = Console(stderr=True) + with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn(), - console=console, + console=err_console, transient=True, ) as progress: task_id = progress.add_task("Resolving input...", total=total_steps) @@ -318,17 +322,17 @@ def scan( style = "green" if is_file else "cyan" nodes[current] = nodes[parent].add(f"[{style}]{icon}{part}[/{style}]") - console.print(tree) - console.print() + err_console.print(tree) + err_console.print() elif node_name in ANALYZER_NODE_IDS: analyzers_done += 1 progress.update(task_id, description=f"Analyzing {num_files} files ({analyzers_done}/{total_analyzers} rules applied)...") # Print which rule just finished above the progress bar - console.print(f"[dim]✔ Rule completed: {node_name}[/dim]") + err_console.print(f"[dim]✔ Rule completed: {node_name}[/dim]") elif node_name == "meta_analyzer": progress.update(task_id, description="Generating report...") - console.print("[dim]✔ Rule completed: meta_analyzer (filtering findings)[/dim]") + err_console.print("[dim]✔ Rule completed: meta_analyzer (filtering findings)[/dim]") _write_result(result, output, format) From 2019c8fc3b6f94bcf17e914408dbdb674c7f4af6 Mon Sep 17 00:00:00 2001 From: Saeed Date: Sun, 31 May 2026 23:11:33 +0200 Subject: [PATCH 5/8] chore: ignore generated report files - Added `report*` to .gitignore to prevent accidentally committing local scan reports. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3bedddb4..224813cf 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,6 @@ credentials.json # LangGraph API server .langgraph_api/ + +# Reports +report* \ No newline at end of file From 9b7deb3ac4c0e6b3205eb2dac032a2fb15e4d87c Mon Sep 17 00:00:00 2001 From: Saeed Date: Tue, 23 Jun 2026 00:09:38 +0200 Subject: [PATCH 6/8] fix: anchor .gitignore report* pattern and strip trailing whitespace in cli.py - .gitignore: anchor report* to repo root with specific extensions (/report*.md, /report*.json, /report*.sarif) so source files like src/skillspector/nodes/report.py are no longer silently ignored - cli.py: strip trailing whitespace from blank lines to pass ruff W293 --- .gitignore | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 224813cf..dc32329f 100644 --- a/.gitignore +++ b/.gitignore @@ -108,5 +108,7 @@ credentials.json # LangGraph API server .langgraph_api/ -# Reports -report* \ No newline at end of file +# Reports (generated artifacts only — anchored to repo root) +/report*.md +/report*.json +/report*.sarif \ No newline at end of file From 17883c13d96c6b03440fafbd028ca3dcdfa2f195 Mon Sep 17 00:00:00 2001 From: Saeed Date: Tue, 23 Jun 2026 00:10:04 +0200 Subject: [PATCH 7/8] fix: scope warnings filter, restore TP4 exc_info, remove misplaced Gemini entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cli.py: replace process-wide warnings.filterwarnings() with scoped warnings.catch_warnings() so Pydantic suppression only lives during the graph run; move inline imports to module level (ruff I001 clean) - mcp_tool_poisoning.py: restore exc_info=True on TP4 exception handler to preserve full tracebacks when LLM check fails - nv_build/model_registry.yaml: remove gemini-3.5-flash — Gemini is accessed via OpenAI-compatible path, not NVIDIA build endpoints; entry remains in openai/ and root registries where it belongs --- src/skillspector/cli.py | 19 +++++++------------ .../nodes/analyzers/mcp_tool_poisoning.py | 4 ++-- .../providers/nv_build/model_registry.yaml | 4 ---- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/skillspector/cli.py b/src/skillspector/cli.py index 7a6f8a24..16ed5008 100644 --- a/src/skillspector/cli.py +++ b/src/skillspector/cli.py @@ -24,6 +24,7 @@ import json import os import shutil +import warnings from enum import StrEnum from pathlib import Path from typing import Annotated @@ -31,11 +32,14 @@ import typer from langchain_core.runnables import RunnableConfig from rich.console import Console +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn +from rich.tree import Tree from skillspector import __version__ from skillspector.graph import graph from skillspector.logging_config import get_logger, set_level from skillspector.multi_skill import MultiSkillDetectionResult, detect_skills +from skillspector.nodes.analyzers import ANALYZER_NODE_IDS logger = get_logger(__name__) @@ -253,21 +257,14 @@ def scan( if verbose: result = graph.invoke(state, config=trace_config) else: - from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn - from rich.console import Console - from skillspector.nodes.analyzers import ANALYZER_NODE_IDS - import warnings - - # Suppress noisy Pydantic serialization warnings during structured LLM output - warnings.filterwarnings("ignore", category=UserWarning, module="pydantic") - total_steps = 4 + len(ANALYZER_NODE_IDS) result = dict(state) # Use stderr for progress so stdout remains clean for structured outputs err_console = Console(stderr=True) - with Progress( + # Suppress noisy Pydantic serialization warnings scoped to the graph run + with warnings.catch_warnings(), Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), @@ -276,6 +273,7 @@ def scan( console=err_console, transient=True, ) as progress: + warnings.filterwarnings("ignore", category=UserWarning, module="pydantic") task_id = progress.add_task("Resolving input...", total=total_steps) num_files = 0 @@ -305,9 +303,6 @@ def scan( progress.update(task_id, description=f"Analyzing {num_files} files (0/{total_analyzers} rules applied)...") # Print a proper report of the files and directories being scanned - from rich.tree import Tree - from pathlib import Path - tree = Tree("[bold blue]Discovered Files to Scan[/bold blue]") nodes = {"": tree} for path in sorted(components): diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py index 4f593546..45d13dc5 100644 --- a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py +++ b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py @@ -799,8 +799,8 @@ def _check_tp4(state: SkillspectorState) -> list[Finding]: ) ] - except Exception as exc: - logger.warning("%s: TP4 LLM check failed, skipping: %s", ANALYZER_ID, exc) + except Exception: + logger.warning("%s: TP4 LLM check failed, skipping", ANALYZER_ID, exc_info=True) return [] diff --git a/src/skillspector/providers/nv_build/model_registry.yaml b/src/skillspector/providers/nv_build/model_registry.yaml index bdea12e7..aeba04e1 100644 --- a/src/skillspector/providers/nv_build/model_registry.yaml +++ b/src/skillspector/providers/nv_build/model_registry.yaml @@ -26,7 +26,3 @@ models: "openai/gpt-oss-120b": context_length: 128000 max_output_tokens: 16384 - - "gemini-3.5-flash": - context_length: 1048576 - max_output_tokens: 8192 From eb0786883e73452c569bf98230724d39646d6ae3 Mon Sep 17 00:00:00 2001 From: Saeed Date: Tue, 30 Jun 2026 10:38:59 +0200 Subject: [PATCH 8/8] test: add integration test to ensure stream and invoke parity for CLI outputs and clean up imports in cli.py --- .gitignore | 2 +- src/skillspector/cli.py | 2 +- .../integration/test_stream_invoke_parity.py | 104 ++++++++++++++++++ 3 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_stream_invoke_parity.py diff --git a/.gitignore b/.gitignore index dc32329f..bbdfeea3 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,4 @@ credentials.json # Reports (generated artifacts only — anchored to repo root) /report*.md /report*.json -/report*.sarif \ No newline at end of file +/report*.sarif diff --git a/src/skillspector/cli.py b/src/skillspector/cli.py index 68f55a9c..e9f7754f 100644 --- a/src/skillspector/cli.py +++ b/src/skillspector/cli.py @@ -24,8 +24,8 @@ import json import os import shutil -import warnings import sys +import warnings from enum import StrEnum from pathlib import Path from typing import Annotated diff --git a/tests/integration/test_stream_invoke_parity.py b/tests/integration/test_stream_invoke_parity.py new file mode 100644 index 00000000..7c01615f --- /dev/null +++ b/tests/integration/test_stream_invoke_parity.py @@ -0,0 +1,104 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parity test: non-verbose (stream) and verbose (invoke) paths must produce the same CLI-consumed keys. + +The CLI ``scan`` command has two code paths: + - ``--verbose``: uses ``graph.invoke()`` → returns full final state. + - default (non-verbose): uses ``graph.stream()`` and manually accumulates a + subset of keys into a ``result`` dict. + +If the streaming accumulation loop drifts (e.g. a new key is consumed downstream +but never accumulated), the non-verbose path silently produces wrong output. +This test guards against that. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from skillspector.graph import graph + + +# Keys the CLI reads from the result dict *after* the graph run. +# Derived from cli.py: _write_result, _cleanup_result, exit-code check. +_CLI_CONSUMED_KEYS = frozenset( + { + "report_body", + "sarif_report", + "risk_score", + "temp_dir_for_cleanup", + } +) + + +def _stream_result(state: dict) -> dict: + """Simulate the non-verbose streaming accumulation from cli.py.""" + result: dict = dict(state) + for update in graph.stream(state, stream_mode="updates"): + for _node_name, node_output in update.items(): + if "temp_dir_for_cleanup" in node_output: + result["temp_dir_for_cleanup"] = node_output["temp_dir_for_cleanup"] + if "report_body" in node_output: + result["report_body"] = node_output["report_body"] + if "sarif_report" in node_output: + result["sarif_report"] = node_output["sarif_report"] + if "risk_score" in node_output: + result["risk_score"] = node_output["risk_score"] + return result + + +@pytest.mark.integration +def test_stream_and_invoke_produce_same_cli_keys(tmp_path: Path) -> None: + """Non-verbose (stream) result contains every key that verbose (invoke) produces and the CLI consumes.""" + (tmp_path / "SKILL.md").write_text( + "---\nname: parity-test\n---\n# Safe skill\n", encoding="utf-8" + ) + state: dict = { + "skill_path": str(tmp_path), + "output_format": "json", + "use_llm": False, + } + + invoke_result = graph.invoke(dict(state)) + stream_result = _stream_result(dict(state)) + + # Every key the CLI consumes must be present in *both* results. + for key in _CLI_CONSUMED_KEYS: + assert key in invoke_result, f"invoke result missing CLI key: {key}" + assert key in stream_result, f"stream result missing CLI key: {key}" + + # The actual *values* of the CLI-consumed keys should match (structurally). + # For report_body we compare parsed JSON keys because timestamps differ + # between separate runs. + for key in _CLI_CONSUMED_KEYS: + inv = invoke_result.get(key) + stm = stream_result.get(key) + if key == "report_body": + # Both should parse to JSON with the same top-level keys + inv_parsed = json.loads(inv) + stm_parsed = json.loads(stm) + assert set(inv_parsed.keys()) == set(stm_parsed.keys()), ( + f"report_body top-level keys differ: " + f"invoke={set(inv_parsed.keys())}, stream={set(stm_parsed.keys())}" + ) + else: + assert inv == stm, ( + f"value mismatch for CLI key {key!r}: " + f"invoke={inv!r}, stream={stm!r}" + )