Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,8 @@ credentials.json

# LangGraph API server
.langgraph_api/

# Reports (generated artifacts only — anchored to repo root)
/report*.md
/report*.json
/report*.sarif
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ export SKILLSPECTOR_PROVIDER=nv_build
export NVIDIA_INFERENCE_KEY=nvapi-...
skillspector scan ./my-skill/

# Gemini (via OpenAI compatibility layer)
export SKILLSPECTOR_PROVIDER=openai
export OPENAI_API_KEY="YOUR_GEMINI_API_KEY"
export OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
export SKILLSPECTOR_MODEL=gemini-3.5-flash

# Local Claude CLI — no API key; uses your existing `claude auth login` session
# Requires: claude CLI installed and authenticated (claude auth login)
export SKILLSPECTOR_PROVIDER=claude_cli
Expand Down
4 changes: 4 additions & 0 deletions model_registry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ models:
"openai/openai/gpt-5.3-chat":
context_length: 128000
max_output_tokens: 16384

"gemini-3.5-flash":
context_length: 1048576
max_output_tokens: 8192
79 changes: 78 additions & 1 deletion src/skillspector/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,22 @@
import os
import shutil
import sys
import warnings
from enum import StrEnum
from pathlib import Path
from typing import Annotated

import typer
from langchain_core.runnables import RunnableConfig
from rich.console import Console
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
from rich.tree import Tree

from skillspector import __version__
from skillspector.graph import graph
from skillspector.logging_config import get_logger, set_level
from skillspector.multi_skill import MultiSkillDetectionResult, detect_skills
from skillspector.nodes.analyzers import ANALYZER_NODE_IDS
from skillspector.suppression import build_baseline_dict, dump_baseline, load_baseline

logger = get_logger(__name__)
Expand Down Expand Up @@ -318,7 +322,80 @@ def scan(
not no_llm,
)
trace_config = _build_trace_config(input_path, format, no_llm)
result = graph.invoke(state, config=trace_config)
if verbose:
result = graph.invoke(state, config=trace_config)
else:
total_steps = 4 + len(ANALYZER_NODE_IDS)
result = dict(state)

# Use stderr for progress so stdout remains clean for structured outputs
err_console = Console(stderr=True)

# Suppress noisy Pydantic serialization warnings scoped to the graph run
with warnings.catch_warnings(), Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
TimeElapsedColumn(),
console=err_console,
transient=True,
) as progress:
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
task_id = progress.add_task("Resolving input...", total=total_steps)

num_files = 0
analyzers_done = 0
total_analyzers = len(ANALYZER_NODE_IDS)

for update in graph.stream(state, config=trace_config, stream_mode="updates"):
for node_name, node_output in update.items():
progress.advance(task_id)

# Accumulate scalar outputs needed by the CLI (report_body, risk_score, temp_dir, sarif_report)
if "temp_dir_for_cleanup" in node_output:
result["temp_dir_for_cleanup"] = node_output["temp_dir_for_cleanup"]
if "report_body" in node_output:
result["report_body"] = node_output["report_body"]
if "sarif_report" in node_output:
result["sarif_report"] = node_output["sarif_report"]
if "risk_score" in node_output:
result["risk_score"] = node_output["risk_score"]

# Update UI text based on graph progression
if node_name == "resolve_input":
progress.update(task_id, description="Building context...")
elif node_name == "build_context":
components = node_output.get("components", [])
num_files = len(components)
progress.update(task_id, description=f"Analyzing {num_files} files (0/{total_analyzers} rules applied)...")

# Print a proper report of the files and directories being scanned
tree = Tree("[bold blue]Discovered Files to Scan[/bold blue]")
nodes = {"": tree}
for path in sorted(components):
parts = Path(path).parts
current = ""
for part in parts:
parent = current
current = f"{current}/{part}" if current else part
if current not in nodes:
is_file = current == path
icon = "📄 " if is_file else "📁 "
style = "green" if is_file else "cyan"
nodes[current] = nodes[parent].add(f"[{style}]{icon}{part}[/{style}]")

err_console.print(tree)
err_console.print()

elif node_name in ANALYZER_NODE_IDS:
analyzers_done += 1
progress.update(task_id, description=f"Analyzing {num_files} files ({analyzers_done}/{total_analyzers} rules applied)...")
# Print which rule just finished above the progress bar
err_console.print(f"[dim]✔ Rule completed: {node_name}[/dim]")
elif node_name == "meta_analyzer":
progress.update(task_id, description="Generating report...")
err_console.print("[dim]✔ Rule completed: meta_analyzer (filtering findings)[/dim]")

_write_result(result, output, format)

Expand Down
4 changes: 4 additions & 0 deletions src/skillspector/providers/openai/model_registry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ models:
"gpt-5.4":
context_length: 1000000
max_output_tokens: 128000

"gemini-3.5-flash":
context_length: 1048576
max_output_tokens: 8192
104 changes: 104 additions & 0 deletions tests/integration/test_stream_invoke_parity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Parity test: non-verbose (stream) and verbose (invoke) paths must produce the same CLI-consumed keys.

The CLI ``scan`` command has two code paths:
- ``--verbose``: uses ``graph.invoke()`` → returns full final state.
- default (non-verbose): uses ``graph.stream()`` and manually accumulates a
subset of keys into a ``result`` dict.

If the streaming accumulation loop drifts (e.g. a new key is consumed downstream
but never accumulated), the non-verbose path silently produces wrong output.
This test guards against that.
"""

from __future__ import annotations

import json
from pathlib import Path

import pytest

from skillspector.graph import graph


# Keys the CLI reads from the result dict *after* the graph run.
# Derived from cli.py: _write_result, _cleanup_result, exit-code check.
_CLI_CONSUMED_KEYS = frozenset(
{
"report_body",
"sarif_report",
"risk_score",
"temp_dir_for_cleanup",
}
)


def _stream_result(state: dict) -> dict:
"""Simulate the non-verbose streaming accumulation from cli.py."""
result: dict = dict(state)
for update in graph.stream(state, stream_mode="updates"):
for _node_name, node_output in update.items():
if "temp_dir_for_cleanup" in node_output:
result["temp_dir_for_cleanup"] = node_output["temp_dir_for_cleanup"]
if "report_body" in node_output:
result["report_body"] = node_output["report_body"]
if "sarif_report" in node_output:
result["sarif_report"] = node_output["sarif_report"]
if "risk_score" in node_output:
result["risk_score"] = node_output["risk_score"]
return result


@pytest.mark.integration
def test_stream_and_invoke_produce_same_cli_keys(tmp_path: Path) -> None:
"""Non-verbose (stream) result contains every key that verbose (invoke) produces and the CLI consumes."""
(tmp_path / "SKILL.md").write_text(
"---\nname: parity-test\n---\n# Safe skill\n", encoding="utf-8"
)
state: dict = {
"skill_path": str(tmp_path),
"output_format": "json",
"use_llm": False,
}

invoke_result = graph.invoke(dict(state))
stream_result = _stream_result(dict(state))

# Every key the CLI consumes must be present in *both* results.
for key in _CLI_CONSUMED_KEYS:
assert key in invoke_result, f"invoke result missing CLI key: {key}"
assert key in stream_result, f"stream result missing CLI key: {key}"

# The actual *values* of the CLI-consumed keys should match (structurally).
# For report_body we compare parsed JSON keys because timestamps differ
# between separate runs.
for key in _CLI_CONSUMED_KEYS:
inv = invoke_result.get(key)
stm = stream_result.get(key)
if key == "report_body":
# Both should parse to JSON with the same top-level keys
inv_parsed = json.loads(inv)
stm_parsed = json.loads(stm)
assert set(inv_parsed.keys()) == set(stm_parsed.keys()), (
f"report_body top-level keys differ: "
f"invoke={set(inv_parsed.keys())}, stream={set(stm_parsed.keys())}"
)
else:
assert inv == stm, (
f"value mismatch for CLI key {key!r}: "
f"invoke={inv!r}, stream={stm!r}"
)