From 4ba69510a35291415fa985b1127f4c9170d0117d Mon Sep 17 00:00:00 2001 From: Ahmed ElMallah Date: Thu, 18 Jun 2026 11:17:33 -0700 Subject: [PATCH 1/5] fix(python): make pip/poetry scans deterministic under install-first The pip detector resolves graphs from `pip inspect --local`, which reads the active interpreter's site-packages. Running --install-first as a plain `pip install` into the ambient interpreter captured unrelated runner tooling (poetry, build, keyring, virtualenv, date-versioned helpers), so smoke output drifted daily. Poetry's lock fast-path was also bypassed by --install-first, dropping transitive edges (depends_on became empty). - Isolate pip --install-first into a clean, project-scoped virtualenv (internal/detectors/python/venv.go) so pip install and pip inspect run against only the declared deps, not the ambient environment. - Add a pip requirements.lock fast-path (internal/detectors/python/piplock.go) mirroring poetry.lock: build the graph directly from a committed, fully-pinned, `# via`-annotated lock with no install/inspect. - Drop --install-first from the scan-python-poetry target so the committed poetry.lock fast-path runs (stable versions + transitive edges). - Document the decision in docs/ARCHITECTURE.md. Co-Authored-By: Claude Opus 4.8 --- docs/ARCHITECTURE.md | 9 + internal/benchmark/testdata/scan_targets.json | 1 - internal/detectors/python/pip.go | 30 ++- internal/detectors/python/piplock.go | 247 ++++++++++++++++++ internal/detectors/python/piplock_test.go | 176 +++++++++++++ internal/detectors/python/venv.go | 95 +++++++ internal/detectors/python/venv_test.go | 79 ++++++ 7 files changed, 627 insertions(+), 10 deletions(-) create mode 100644 internal/detectors/python/piplock.go create mode 100644 internal/detectors/python/piplock_test.go create mode 100644 internal/detectors/python/venv.go create mode 100644 internal/detectors/python/venv_test.go diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 77d0c35f..1d000fa8 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -173,6 +173,15 @@ Some native detector chains intentionally prefer a build-tool command over a com `bomly benchmark` is a hidden maintainer command backed by `internal/benchmark`. It scans public GitHub repositories with native detectors, compares the filtered dependency graph against GitHub Dependency Graph and external Syft SBOMs, and writes deterministic artifacts under `.benchmark-runs/latest`. Bomly scan and SBOM diff execution run in-process through the engine and output model; only the external `git` and `syft` tools remain subprocesses. The in-process adapter builds a native-only registry directly so local configuration and managed-plugin discovery cannot distort benchmark results. Package and relationship scores are comparative engineering signals, not pass/fail gates and not claims that a baseline is ground truth. The benchmark is intentionally local-only so exploratory scoring does not become a release or merge gate before it is calibrated. +### Decision: Python install-first isolates into a venv; pip prefers a committed lock + +The pip detector resolves graphs from `pip inspect --local`, which reads whatever lives in the active interpreter's site-packages. Running `--install-first` as a plain `pip install` into the ambient interpreter therefore captured unrelated tooling (the runner's `poetry`, `build`, `keyring`, `virtualenv`, and date-versioned helpers), making output non-deterministic. Two changes address this: + +1. **Isolated install (`internal/detectors/python/venv.go`).** `--install-first` for pip now (re)creates a clean, project-scoped virtualenv under the temp dir — keyed by a hash of the absolute working dir so the install and inspect phases agree — and both `pip install` and `pip inspect` run against that venv. The ambient site-packages no longer leak into resolution. The venv is recreated per run so stale state cannot persist. +2. **Lock fast-path (`internal/detectors/python/piplock.go`).** When a committed, fully-pinned `requirements.lock` (pip-compile style, with `# via` edge annotations) is present, the detector builds the graph directly from it — no install, no inspect — mirroring the existing `poetry.lock` fast-path. Direct dependencies are those whose `# via` references an input file (`-r foo.in`); a file matching `dev` marks development scope, and runtime wins over development during BFS propagation. + +The smoke/benchmark Python targets rely on the fast-paths for determinism: `scan-python-poetry` drops `--install-first` so the committed `poetry.lock` fast-path runs, and `scan-python-pip` commits a `requirements.lock`. The venv isolation remains the correctness backstop for real-world pip projects scanned with `--install-first` and no committed lock. + ## Build Modes Syft and Grype each support two build modes: diff --git a/internal/benchmark/testdata/scan_targets.json b/internal/benchmark/testdata/scan_targets.json index 70f13f55..aa1c62e1 100644 --- a/internal/benchmark/testdata/scan_targets.json +++ b/internal/benchmark/testdata/scan_targets.json @@ -139,7 +139,6 @@ "url": "https://github.com/bomly-dev/example-python-poetry", "ref": "v1.0.0", "ecosystem": "python", - "args": ["--install-first"], "tools": ["poetry"], "benchmark_enabled": true }, diff --git a/internal/detectors/python/pip.go b/internal/detectors/python/pip.go index 2b993f3f..dc04b172 100644 --- a/internal/detectors/python/pip.go +++ b/internal/detectors/python/pip.go @@ -49,7 +49,20 @@ func (d PipDetector) Descriptor() sdk.DetectorDescriptor { // ResolveGraph resolves a Python dependency graph with pip inspect. func (d PipDetector) ResolveGraph(_ context.Context, req sdk.DetectionRequest) (sdk.DetectionResult, error) { - command, err := pipInspectCommand() + workingDir := d.base().workingDir(req.ProjectPath) + + // Fast-path: a committed requirements.lock carries the full transitive tree + // and pinned versions, so we can build the graph without installing into + // (and inspecting) the ambient Python environment. + if lockPath := pipLockFilePath(workingDir); lockPath != "" { + if depsGraph, err := depGraphFromRequirementsLock(lockPath, workingDir); err == nil { + return sdk.DetectionResult{ + Graphs: sdk.SingleGraphContainer(depsGraph, detectors.InferManifestMetadata(req, pipEvidencePatterns)), + }, nil + } + } + + command, err := pipInspectCommandForProject(workingDir) if err != nil { return sdk.DetectionResult{}, err } @@ -81,29 +94,28 @@ func (d PipDetector) base() baseDetector { } } -// Install prepares pip dependencies before graph resolution. +// Install prepares pip dependencies before graph resolution. It installs into a +// clean, project-scoped virtualenv so the subsequent `pip inspect` sees only +// the declared dependencies, not whatever tooling lives in the ambient +// site-packages. func (d PipDetector) Install(ctx context.Context, req sdk.DetectionRequest) error { workingDir := d.base().workingDir(req.ProjectPath) requirementsFile, err := installRequirementsPath(workingDir) if err != nil { return err } - command, err := pythonCommand() + venvPython, err := createPythonVenv(ctx, d.base(), req, "pip detector", pythonVenvDir(workingDir)) if err != nil { return err } - command = append(command, "-m", "pip", "install", "-r", requirementsFile) + command := []string{venvPython, "-m", "pip", "install", "-r", requirementsFile} if err := d.base().install(ctx, req, "pip detector", command); err != nil { return err } // Also install requirements-dev.txt when present alongside the primary file. devReqPath := filepath.Join(workingDir, "requirements-dev.txt") if exists, _ := system.FileExists(devReqPath); pipShouldInstallDevRequirements(req.ScopeFilter, requirementsFile, exists) { - devCommand, err := pythonCommand() - if err != nil { - return err - } - devCommand = append(devCommand, "-m", "pip", "install", "-r", "requirements-dev.txt") + devCommand := []string{venvPython, "-m", "pip", "install", "-r", "requirements-dev.txt"} if err := d.base().install(ctx, req, "pip detector (dev)", devCommand); err != nil { return err } diff --git a/internal/detectors/python/piplock.go b/internal/detectors/python/piplock.go new file mode 100644 index 00000000..83034a8c --- /dev/null +++ b/internal/detectors/python/piplock.go @@ -0,0 +1,247 @@ +package python + +import ( + "bufio" + "bytes" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/bomly-dev/bomly-cli/sdk" +) + +// pipLockFileName is the committed, fully-pinned lock the pip fast-path reads. +// It is a pip-compile-style requirements file: every package pinned with "==" +// and annotated with "# via" comments that record the edges. Reading it avoids +// installing into (and inspecting) the ambient Python environment, which would +// otherwise capture whatever tooling happens to live in site-packages. +const pipLockFileName = "requirements.lock" + +// pipLockPinnedLine matches a top-level pinned requirement ("name==version"). +// The leading character class excludes whitespace and '#' so indented comment +// lines never match. +var pipLockPinnedLine = regexp.MustCompile(`^([A-Za-z0-9][A-Za-z0-9._-]*)\s*==\s*([^\s;\\]+)`) + +// pipLockDevHint marks a "-r"/"-c" input file that denotes development scope +// (e.g. requirements-dev.in). Matched case-insensitively. +var pipLockDevHint = regexp.MustCompile(`(?i)dev`) + +// pipLockEntry is one pinned package plus the sources that pulled it in. +type pipLockEntry struct { + name string + version string + viaPkgs []string // parent package names (transitive edges) + viaFiles []string // "-r"/"-c" input files (direct dependencies) +} + +// pipLockFilePath returns the path to requirements.lock if it exists inside +// projectPath, or an empty string if it does not. +func pipLockFilePath(projectPath string) string { + p := filepath.Join(projectPath, pipLockFileName) + if _, err := os.Stat(p); err == nil { + return p + } + return "" +} + +// depGraphFromRequirementsLock parses a pip-compile-style requirements.lock and +// builds a dependency graph with transitive edges and runtime/development +// scope. Direct dependencies are those whose "# via" annotation references an +// input file ("-r foo.in"); a file matching pipLockDevHint marks development +// scope. Runtime always wins over development during BFS propagation. +func depGraphFromRequirementsLock(lockPath, projectPath string) (*sdk.Graph, error) { + data, err := os.ReadFile(lockPath) + if err != nil { + return nil, fmt.Errorf("read %s: %w", pipLockFileName, err) + } + + entries, err := parsePipLock(data) + if err != nil { + return nil, err + } + if len(entries) == 0 { + return nil, fmt.Errorf("%s contains no pinned packages", pipLockFileName) + } + + nodesByName := make(map[string]*sdk.Dependency, len(entries)) + for _, e := range entries { + node := sdk.NewDependency(sdk.Dependency{Coordinates: sdk.Coordinates{Ecosystem: sdk.EcosystemPython, + Name: e.name, + Version: e.version, + PackageManager: sdk.PackageManagerPip, + Language: "python", + Type: sdk.PackageTypePackage, + PURL: sdk.BuildPackageURL("pypi", "", e.name, e.version)}, + }) + nodesByName[e.name] = node + } + + g := sdk.New() + root := sdk.NewDependency(sdk.Dependency{Coordinates: sdk.Coordinates{Ecosystem: sdk.EcosystemPython, + Name: "root", + PackageManager: sdk.PackageManagerPip, + Language: "python", + Type: sdk.PackageTypeApplication}, + }) + if err := g.AddNode(root); err != nil { + return nil, fmt.Errorf("add root node: %w", err) + } + for _, node := range nodesByName { + if err := addNodeIfMissing(g, node); err != nil { + return nil, err + } + } + + // Wire edges and seed direct-dependency scopes. + directScope := make(map[string]sdk.Scope, len(entries)) + for _, e := range entries { + child := nodesByName[e.name] + if child == nil { + continue + } + for _, file := range e.viaFiles { + scope := sdk.ScopeRuntime + if pipLockDevHint.MatchString(file) { + scope = sdk.ScopeDevelopment + } + directScope[child.ID] = sdk.MergeScope(directScope[child.ID], scope) + if err := g.AddEdge(root.ID, child.ID); err != nil { + return nil, fmt.Errorf("wire root→%s: %w", e.name, err) + } + } + for _, parentName := range e.viaPkgs { + parent := nodesByName[normalizePythonName(parentName)] + if parent == nil || parent.ID == child.ID { + continue + } + _ = g.AddEdge(parent.ID, child.ID) + } + } + + // Orphans (no incoming edge) attach to root to keep a single-root graph. + for _, node := range nodesByName { + if node == nil { + continue + } + if dependents, _ := g.Dependents(node.ID); len(dependents) == 0 { + _ = g.AddEdge(root.ID, node.ID) + } + } + + propagatePipScopes(g, root, directScope) + return g, nil +} + +// parsePipLock tokenizes a pip-compile-style lock into pinned entries. A pinned +// line ("name==version") opens an entry; the indented "# via ..." comment lines +// that follow attribute the entry to parent packages or input files. +func parsePipLock(data []byte) ([]*pipLockEntry, error) { + var entries []*pipLockEntry + var current *pipLockEntry + + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for scanner.Scan() { + line := scanner.Text() + trimmed := strings.TrimSpace(line) + switch { + case trimmed == "": + continue + case !strings.HasPrefix(line, " ") && !strings.HasPrefix(trimmed, "#"): + m := pipLockPinnedLine.FindStringSubmatch(line) + if m == nil { + // Unpinned/unsupported line (e.g. a bare "-e ." editable install). + current = nil + continue + } + current = &pipLockEntry{name: normalizePythonName(m[1]), version: m[2]} + entries = append(entries, current) + case current != nil && strings.HasPrefix(trimmed, "#"): + parsePipLockViaLine(trimmed, current) + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("scan %s: %w", pipLockFileName, err) + } + return entries, nil +} + +// parsePipLockViaLine consumes one annotation comment line and records its +// sources on the entry. Handles both the "# via requests" single-line form and +// the multi-line form ("# via" followed by "# requests" / "# -r foo.in"). +func parsePipLockViaLine(comment string, entry *pipLockEntry) { + body := strings.TrimSpace(strings.TrimPrefix(comment, "#")) + switch { + case body == "via": + body = "" + case strings.HasPrefix(body, "via "): + body = strings.TrimSpace(body[len("via "):]) + } + if body == "" { + return + } + fields := strings.FieldsFunc(body, func(r rune) bool { return r == ' ' || r == '\t' || r == ',' }) + for i := 0; i < len(fields); i++ { + switch fields[i] { + case "-r", "-c", "--requirement", "--constraint": + if i+1 < len(fields) { + entry.viaFiles = append(entry.viaFiles, fields[i+1]) + i++ + } + default: + entry.viaPkgs = append(entry.viaPkgs, fields[i]) + } + } +} + +// propagatePipScopes seeds direct-dependency scopes and BFS-propagates them so +// that any package reachable on a runtime path is marked runtime even if it is +// also a development dependency. Remaining unscoped packages default to runtime. +func propagatePipScopes(g *sdk.Graph, root *sdk.Dependency, directScope map[string]sdk.Scope) { + directDeps, _ := g.DirectDependencies(root.ID) + propagated := make(map[string]sdk.Scope, g.Size()) + queue := make([]*sdk.Dependency, 0, len(directDeps)) + for _, dep := range directDeps { + if dep == nil { + continue + } + scope := directScope[dep.ID] + if scope == sdk.ScopeUnknown { + scope = sdk.ScopeRuntime + } + propagated[dep.ID] = sdk.MergeScope(propagated[dep.ID], scope) + dep.AddScope(propagated[dep.ID]) + queue = append(queue, dep) + } + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + scope := propagated[current.ID] + if scope == sdk.ScopeUnknown { + continue + } + children, err := g.DirectDependencies(current.ID) + if err != nil { + continue + } + for _, child := range children { + if child == nil || child.ID == root.ID { + continue + } + next := sdk.MergeScope(propagated[child.ID], scope) + if next == propagated[child.ID] && child.PrimaryScope() == next { + continue + } + propagated[child.ID] = next + child.AddScope(next) + queue = append(queue, child) + } + } + for _, pkg := range g.Nodes() { + if pkg != nil && pkg.ID != root.ID && pkg.PrimaryScope() == sdk.ScopeUnknown { + pkg.AddScope(sdk.ScopeRuntime) + } + } +} diff --git a/internal/detectors/python/piplock_test.go b/internal/detectors/python/piplock_test.go new file mode 100644 index 00000000..fe83b87b --- /dev/null +++ b/internal/detectors/python/piplock_test.go @@ -0,0 +1,176 @@ +package python + +import ( + "os" + "path/filepath" + "sort" + "testing" + + "github.com/bomly-dev/bomly-cli/sdk" +) + +const sampleRequirementsLock = `# +# This file is autogenerated by pip-compile. +# +certifi==2024.8.30 + # via requests +charset-normalizer==2.0.12 + # via requests +django==1.11.29 + # via -r requirements.in +idna==2.8 + # via requests +requests==2.21.0 + # via -r requirements.in +urllib3==1.24.3 + # via + # -r requirements.in + # requests +pytest==7.4.3 + # via -r requirements-dev.in +` + +func writeLock(t *testing.T, body string) (string, string) { + t.Helper() + dir := t.TempDir() + lockPath := filepath.Join(dir, pipLockFileName) + if err := os.WriteFile(lockPath, []byte(body), 0o644); err != nil { + t.Fatalf("write lock: %v", err) + } + return lockPath, dir +} + +func directDepIDs(t *testing.T, g *sdk.Graph, id string) []string { + t.Helper() + deps, err := g.DirectDependencies(id) + if err != nil { + t.Fatalf("direct deps of %s: %v", id, err) + } + ids := make([]string, 0, len(deps)) + for _, d := range deps { + ids = append(ids, d.ID) + } + sort.Strings(ids) + return ids +} + +func TestDepGraphFromRequirementsLock(t *testing.T) { + lockPath, dir := writeLock(t, sampleRequirementsLock) + g, err := depGraphFromRequirementsLock(lockPath, dir) + if err != nil { + t.Fatalf("depGraphFromRequirementsLock: %v", err) + } + + // Pinned versions become nodes. + for _, want := range []string{ + "certifi@2024.8.30", + "idna@2.8", + "requests@2.21.0", + "urllib3@1.24.3", + "pytest@7.4.3", + } { + if _, ok := g.Node(want); !ok { + t.Errorf("missing node %s", want) + } + } + + // requests pulls in its transitive deps via "# via requests". + got := directDepIDs(t, g, "requests@2.21.0") + want := []string{ + "certifi@2024.8.30", + "charset-normalizer@2.0.12", + "idna@2.8", + "urllib3@1.24.3", + } + if len(got) != len(want) { + t.Fatalf("requests edges = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("requests edges = %v, want %v", got, want) + } + } + + // Direct deps (via -r requirements.in) hang off the root. + rootDeps := directDepIDs(t, g, findRootID(t, g)) + if !contains(rootDeps, "requests@2.21.0") || !contains(rootDeps, "django@1.11.29") { + t.Errorf("root direct deps = %v, want requests + django", rootDeps) + } +} + +func TestRequirementsLockScopes(t *testing.T) { + lockPath, dir := writeLock(t, sampleRequirementsLock) + g, err := depGraphFromRequirementsLock(lockPath, dir) + if err != nil { + t.Fatalf("depGraphFromRequirementsLock: %v", err) + } + // pytest is dev-only (via requirements-dev.in). + pytest, ok := g.Node("pytest@7.4.3") + if !ok { + t.Fatal("missing pytest node") + } + if pytest.PrimaryScope() != sdk.ScopeDevelopment { + t.Errorf("pytest scope = %v, want development", pytest.PrimaryScope()) + } + // urllib3 is reachable on a runtime path (requests) even though it is also + // listed as a direct runtime dep — runtime must win. + urllib3, ok := g.Node("urllib3@1.24.3") + if !ok { + t.Fatal("missing urllib3 node") + } + if urllib3.PrimaryScope() != sdk.ScopeRuntime { + t.Errorf("urllib3 scope = %v, want runtime", urllib3.PrimaryScope()) + } +} + +func TestParsePipLockMultilineVia(t *testing.T) { + entries, err := parsePipLock([]byte(sampleRequirementsLock)) + if err != nil { + t.Fatalf("parsePipLock: %v", err) + } + var urllib3 *pipLockEntry + for _, e := range entries { + if e.name == "urllib3" { + urllib3 = e + } + } + if urllib3 == nil { + t.Fatal("urllib3 entry not parsed") + } + if !contains(urllib3.viaPkgs, "requests") { + t.Errorf("urllib3 viaPkgs = %v, want requests", urllib3.viaPkgs) + } + if !contains(urllib3.viaFiles, "requirements.in") { + t.Errorf("urllib3 viaFiles = %v, want requirements.in", urllib3.viaFiles) + } +} + +func TestPipLockFilePath(t *testing.T) { + _, dir := writeLock(t, sampleRequirementsLock) + if got := pipLockFilePath(dir); got == "" { + t.Error("expected lock path, got empty") + } + if got := pipLockFilePath(t.TempDir()); got != "" { + t.Errorf("expected empty path for dir without lock, got %q", got) + } +} + +func findRootID(t *testing.T, g *sdk.Graph) string { + t.Helper() + for _, n := range g.Nodes() { + if n.Type == sdk.PackageTypeApplication { + return n.ID + } + } + t.Fatal("no application root node") + return "" +} + +func contains(s []string, v string) bool { + for _, x := range s { + if x == v { + return true + } + } + return false +} diff --git a/internal/detectors/python/venv.go b/internal/detectors/python/venv.go new file mode 100644 index 00000000..5a5cec73 --- /dev/null +++ b/internal/detectors/python/venv.go @@ -0,0 +1,95 @@ +package python + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "runtime" + "time" + + "github.com/bomly-dev/bomly-cli/internal/logging" + "github.com/bomly-dev/bomly-cli/internal/system" + "github.com/bomly-dev/bomly-cli/sdk" + "go.uber.org/zap" +) + +// pythonVenvDir returns a deterministic, project-scoped virtualenv directory. +// The install-first step populates it and graph resolution inspects it, so both +// phases must derive the same path from the same working dir. +func pythonVenvDir(workingDir string) string { + abs, err := filepath.Abs(workingDir) + if err != nil { + abs = workingDir + } + sum := sha256.Sum256([]byte(abs)) + return filepath.Join(os.TempDir(), "bomly-pyvenv-"+hex.EncodeToString(sum[:8])) +} + +// venvPythonPath returns the python executable inside venvDir, or "" when the +// venv has not been created yet. +func venvPythonPath(venvDir string) string { + candidate := filepath.Join(venvDir, "bin", "python") + if runtime.GOOS == "windows" { + candidate = filepath.Join(venvDir, "Scripts", "python.exe") + } + if ok, _ := system.FileExists(candidate); ok { + return candidate + } + return "" +} + +// pipInspectCommandForProject prefers the project's isolated venv for +// `pip inspect`, falling back to the ambient interpreter when no venv exists +// (i.e. install-first was not run). Inspecting the venv keeps the resolved +// graph free of whatever unrelated tooling lives in the ambient site-packages. +func pipInspectCommandForProject(workingDir string) ([]string, error) { + if py := venvPythonPath(pythonVenvDir(workingDir)); py != "" { + return []string{py, "-m", "pip", "inspect", "--local"}, nil + } + return pipInspectCommand() +} + +// createPythonVenv (re)creates a clean virtualenv at venvDir using the ambient +// interpreter and returns the path to the venv's python executable. The venv is +// recreated from scratch so a stale environment never leaks into resolution. +func createPythonVenv(ctx context.Context, base baseDetector, req sdk.DetectionRequest, detectorName, venvDir string) (string, error) { + logger := base.Logger + if logger == nil { + logger = zap.NewNop() + } + if err := os.RemoveAll(venvDir); err != nil { + return "", fmt.Errorf("reset venv %s: %w", venvDir, err) + } + pythonCmd, err := pythonCommand() + if err != nil { + return "", err + } + command := append(append([]string{}, pythonCmd...), "-m", "venv", venvDir) + + cmd := system.Command(command[0], command[1:]...) + cmd.Dir = base.workingDir(req.ProjectPath) + cmd.Env = pythonCommandEnv() + commandStderr := logging.NewCommandStderr(req.Stderr, req.Verbose) + cmd.Stderr = commandStderr + started := time.Now() + logger.Info(fmt.Sprintf("%s creating isolated virtualenv", detectorName)) + logger.Debug("creating python virtualenv", zap.String("detector", detectorName), zap.String("working_dir", cmd.Dir), zap.String("venv", venvDir), zap.String("executable", command[0]), zap.Strings("args", command[1:])) + if err := cmd.Run(); err != nil { + fields := []zap.Field{zap.Error(err)} + if commandStderr.String() != "" { + fields = append(fields, zap.String("stderr", commandStderr.String())) + } + logger.Debug("python virtualenv creation failed", fields...) + return "", fmt.Errorf("create venv: %w", err) + } + logger.Info(fmt.Sprintf("%s virtualenv ready in %s", detectorName, logging.FormatDuration(time.Since(started)))) + + venvPython := venvPythonPath(venvDir) + if venvPython == "" { + return "", fmt.Errorf("venv python not found under %s", venvDir) + } + return venvPython, nil +} diff --git a/internal/detectors/python/venv_test.go b/internal/detectors/python/venv_test.go new file mode 100644 index 00000000..094bca00 --- /dev/null +++ b/internal/detectors/python/venv_test.go @@ -0,0 +1,79 @@ +package python + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestPythonVenvDirIsDeterministicAndScoped(t *testing.T) { + a := pythonVenvDir("/tmp/project-a") + b := pythonVenvDir("/tmp/project-b") + if a == b { + t.Errorf("different projects mapped to the same venv dir: %s", a) + } + if a != pythonVenvDir("/tmp/project-a") { + t.Error("venv dir is not stable for the same working dir") + } + if filepath.Dir(a) != filepath.Clean(os.TempDir()) { + t.Errorf("venv dir %s not under temp dir %s", a, os.TempDir()) + } +} + +func TestVenvPythonPath(t *testing.T) { + venvDir := t.TempDir() + if got := venvPythonPath(venvDir); got != "" { + t.Errorf("expected empty path for empty venv, got %q", got) + } + + rel := filepath.Join("bin", "python") + if runtime.GOOS == "windows" { + rel = filepath.Join("Scripts", "python.exe") + } + pyPath := filepath.Join(venvDir, rel) + if err := os.MkdirAll(filepath.Dir(pyPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(pyPath, []byte("#!/bin/sh\n"), 0o755); err != nil { + t.Fatal(err) + } + if got := venvPythonPath(venvDir); got != pyPath { + t.Errorf("venvPythonPath = %q, want %q", got, pyPath) + } +} + +func TestPipInspectCommandPrefersVenv(t *testing.T) { + // With no venv present the command falls back to the ambient interpreter. + workingDir := t.TempDir() + cmd, err := pipInspectCommandForProject(workingDir) + if err != nil { + t.Skipf("no ambient python available: %v", err) + } + if strings.Contains(strings.Join(cmd, " "), "bomly-pyvenv-") { + t.Errorf("expected ambient interpreter without a venv, got %v", cmd) + } + + // Create the project's venv python; the command must now target it. + venvDir := pythonVenvDir(workingDir) + rel := filepath.Join("bin", "python") + if runtime.GOOS == "windows" { + rel = filepath.Join("Scripts", "python.exe") + } + pyPath := filepath.Join(venvDir, rel) + t.Cleanup(func() { _ = os.RemoveAll(venvDir) }) + if err := os.MkdirAll(filepath.Dir(pyPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(pyPath, []byte("#!/bin/sh\n"), 0o755); err != nil { + t.Fatal(err) + } + cmd, err = pipInspectCommandForProject(workingDir) + if err != nil { + t.Fatalf("pipInspectCommandForProject: %v", err) + } + if len(cmd) == 0 || cmd[0] != pyPath { + t.Errorf("pipInspectCommandForProject = %v, want it to target %q", cmd, pyPath) + } +} From 1b38f08503c69ad7fea07b9c8551ffa80d0d439b Mon Sep 17 00:00:00 2001 From: Ahmed ElMallah Date: Thu, 18 Jun 2026 12:02:38 -0700 Subject: [PATCH 2/5] test(smoke): point pip targets at the committed requirements.lock example-python-pip now ships a fully-pinned requirements.lock, so the pip detector resolves a deterministic graph via its lock fast-path: - scan-python-pip: pin to the lock commit and drop --install-first (the fast-path needs no install, and installing into the ambient interpreter was the source of the drift). - scan-python-pip-reachability: repoint to bomly-dev/example-python-pip at the same commit; its main.py keeps the jwt/django/rsa/requests call paths reachability needs, and the committed lock makes the graph stable. Co-Authored-By: Claude Opus 4.8 --- internal/benchmark/testdata/scan_targets.json | 3 +-- test/smoke/smoke_test.go | 24 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/internal/benchmark/testdata/scan_targets.json b/internal/benchmark/testdata/scan_targets.json index aa1c62e1..95f0279f 100644 --- a/internal/benchmark/testdata/scan_targets.json +++ b/internal/benchmark/testdata/scan_targets.json @@ -26,9 +26,8 @@ { "name": "scan-python-pip", "url": "https://github.com/bomly-dev/example-python-pip", - "ref": "v1.0.0", + "ref": "fe04c758134b95dab102e1fce10275f7d18c0cf2", "ecosystem": "python", - "args": ["--install-first"], "tools": ["pip"], "benchmark_enabled": true }, diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index 80d0897f..b3bc9785 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -158,18 +158,20 @@ func TestScan(t *testing.T) { tools: []string{"npm"}, }, { - // pyreach smoke pinned to veracode/example-python3-pip, - // a deliberately-vulnerable demo. main.py imports - // jwt / django / rsa / requests directly; requirements.txt - // pins ten more deps that are either unimported (feedparser, - // sgmllib3k) or reachable only transitively (urllib3, idna, - // chardet, certifi, pyasn1, pytz). Exercises the - // directly-imported, transitively-reachable, and - // unreachable branches plus the module-to-distribution - // override (jwt → pyjwt). Goldens scrub timestamps via - // normalizeReachability. + // pyreach smoke pinned to bomly-dev/example-python-pip, a + // deliberately-vulnerable demo. main.py imports + // jwt / django / rsa / requests directly; the committed + // requirements.lock pins the full transitive closure so the + // detector resolves a stable graph via the requirements.lock + // fast-path instead of inspecting the ambient environment. + // Exercises the directly-imported, transitively-reachable + // (urllib3, idna, chardet, certifi via requests; pyasn1 via + // rsa; pytz via django), and unimported (feedparser, sgmllib3k, + // jinja2, pyyaml, sqlalchemy) branches plus the + // module-to-distribution override (jwt → pyjwt). Goldens scrub + // timestamps via normalizeReachability. name: "scan-python-pip-reachability", - args: []string{"scan", "--url", "https://github.com/veracode/example-python3-pip", "--ref", "e19d10938caf3e06730c23047ae118cd59638e41", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-python-pip", "--ref", "fe04c758134b95dab102e1fce10275f7d18c0cf2", "--enrich", "--analyze", "--format", "json"}, tools: []string{"pip"}, }, { From 7e4f5e796052d9ee6bd1361d2c5fc21a41cb9e12 Mon Sep 17 00:00:00 2001 From: Ahmed ElMallah Date: Thu, 18 Jun 2026 12:11:36 -0700 Subject: [PATCH 3/5] test(smoke): move all fixtures to bomly-dev example repos Repoint every smoke/audit target that used an external demo repo to the equivalent bomly-dev fixture (pinned by commit; all carry committed lockfiles / go.sum so resolution is deterministic): - scan-go-reachability -> example-go-gomod (x/text language.Parse reached via main -> sub3.Baz) - scan-java-maven-reachability -> example-java-maven (Main.java imports commons-fileupload / xmlsec / jbcrypt / spring-web) - scan-npm-reachability -> example-javascript-npm (js-yaml.load, direct + transitive via `to`) - scan-npm-scope-runtime -> example-javascript-npm (dev dep mocha excluded under --scope runtime) - scan-npm-audit -> example-javascript-npm (vulnerable deps) Also scrub residual third-party branding from a maven detector test fixture (com.srcclr -> com.bomly) and a prose template note. Goldens for the repointed targets need regeneration. Co-Authored-By: Claude Opus 4.8 --- internal/detectors/maven/detector_test.go | 4 +- internal/support/prose/README.md | 2 +- test/smoke/audit_test.go | 2 +- test/smoke/smoke_test.go | 54 ++++++++++++----------- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/internal/detectors/maven/detector_test.go b/internal/detectors/maven/detector_test.go index 92685c0e..7ddd06e6 100644 --- a/internal/detectors/maven/detector_test.go +++ b/internal/detectors/maven/detector_test.go @@ -56,7 +56,7 @@ func TestDepGraphFromMavenTGF_WithMavenLogPrefixes(t *testing.T) { [INFO] Scanning for projects... [INFO] [INFO] --- maven-dependency-plugin:2.8:tree (default-cli) @ example-java-maven --- -[INFO] 319144230 com.srcclr:example-java-maven:jar:1.0-SNAPSHOT +[INFO] 319144230 com.bomly:example-java-maven:jar:1.0-SNAPSHOT [INFO] 1268237485 org.apache.struts:struts2-core:jar:2.5.12:compile [INFO] 1983948209 org.freemarker:freemarker:jar:2.3.23:compile [INFO] 1778257620 org.mindrot:jbcrypt:jar:0.3m:compile @@ -77,7 +77,7 @@ func TestDepGraphFromMavenTGF_WithMavenLogPrefixes(t *testing.T) { t.Fatalf("expected 4 packages, got %d", g.Size()) } - rootDeps, err := g.DirectDependencies("com.srcclr:example-java-maven@1.0-SNAPSHOT") + rootDeps, err := g.DirectDependencies("com.bomly:example-java-maven@1.0-SNAPSHOT") if err != nil { t.Fatalf("dependencies(root) error = %v", err) } diff --git a/internal/support/prose/README.md b/internal/support/prose/README.md index 7bf3d2be..4cb6a088 100644 --- a/internal/support/prose/README.md +++ b/internal/support/prose/README.md @@ -22,7 +22,7 @@ prose/ ## Template -Each page is structured like a Veracode "Find vulnerabilities in X" page: +Each page is structured like a "Find vulnerabilities in X" landing page: brief intro, prerequisites, examples, limitations. Default H2 spine: ```markdown diff --git a/test/smoke/audit_test.go b/test/smoke/audit_test.go index 575cacbc..82505194 100644 --- a/test/smoke/audit_test.go +++ b/test/smoke/audit_test.go @@ -101,7 +101,7 @@ func TestAuditScan(t *testing.T) { }, { name: "scan-npm-audit", - args: []string{"scan", "--url", "https://github.com/ljharb/qs", "--ref", "v6.13.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, tools: []string{"npm"}, }, } diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index b3bc9785..0c7bbfb7 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -132,29 +132,29 @@ func TestScan(t *testing.T) { tools []string // required tools - skip if any missing }{ { - // Reachability smoke pinned to veracode/example-go-modules - // at the "Adding a known vulnerable method" commit. The + // Reachability smoke pinned to bomly-dev/example-go-gomod. The // repo deliberately calls into golang.org/x/text v0.3.5's - // language.Parse (GHSA-69ch-w2m2-3vjp / CVE-2022-32149), - // which the analyzer reports as reachable at the symbol - // tier with a non-empty call_paths slice. Goldens scrub - // volatile fields (call frame line numbers, file paths, - // analyzed_at) via normalizeReachability. + // language.Parse (GHSA-69ch-w2m2-3vjp / CVE-2022-32149) via + // main → sub3.Baz, which the analyzer reports as reachable at + // the symbol tier with a non-empty call_paths slice. go.sum + // pins the graph. Goldens scrub volatile fields (call frame + // line numbers, file paths, analyzed_at) via + // normalizeReachability. name: "scan-go-reachability", - args: []string{"scan", "--url", "https://github.com/veracode/example-go-modules", "--ref", "555ebe70813318ce80f46e3c4fc6623012e0317d", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "97f0e49268e11fda722e41e28176ce10f66bc491", "--enrich", "--analyze", "--format", "json"}, tools: []string{"go"}, }, { - // jsreach smoke pinned to snyk-labs/nodejs-goof, a real - // vulnerable demo Node.js todo app. The project's app.js - // imports a meaningful subset of its npm dependencies - // directly (mongoose, lodash, express-fileupload, etc.) - // while many transitive ones are unreachable from app - // code, so the smoke exercises both "reachable (package)" - // and "unreachable (package)" branches of the analyzer. - // Goldens scrub timestamps via normalizeReachability. + // jsreach smoke pinned to bomly-dev/example-javascript-npm, a + // deliberately-vulnerable demo Node.js app. server.js calls + // js-yaml.load directly (RCE) and transitively through the + // `to` lib, and imports lodash/marked, while other deps are + // unreachable from app code — so the smoke exercises both + // "reachable (package)" and "unreachable (package)" branches + // of the analyzer. package-lock.json pins the graph. Goldens + // scrub timestamps via normalizeReachability. name: "scan-npm-reachability", - args: []string{"scan", "--url", "https://github.com/snyk-labs/nodejs-goof", "--ref", "add14ba59e98240d9e00a235dd7d42cd61ae9912", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--enrich", "--analyze", "--format", "json"}, tools: []string{"npm"}, }, { @@ -175,23 +175,27 @@ func TestScan(t *testing.T) { tools: []string{"pip"}, }, { - // jvmreach smoke pinned to veracode/example-java-maven, a + // jvmreach smoke pinned to bomly-dev/example-java-maven, a // deliberately-vulnerable Maven demo. Main.java imports // Apache Commons FileUpload, Apache XMLSec, jBCrypt, and - // Spring Web. requirements include Struts2, Keycloak, + // Spring Web. Dependencies include Struts2, Keycloak, // H2, Kafka, OrientDB, JavaMelody, Sling — most of which // are unimported from app source but reachable through - // dep edges. Exercises directly-imported, transitively- - // reachable, and unreachable branches plus the - // package-prefix map. Goldens scrub timestamps via - // normalizeReachability. + // dep edges. Maven resolves the pinned pom deterministically. + // Exercises directly-imported, transitively-reachable, and + // unreachable branches plus the package-prefix map. Goldens + // scrub timestamps via normalizeReachability. name: "scan-java-maven-reachability", - args: []string{"scan", "--url", "https://github.com/veracode/example-java-maven", "--ref", "509948ba5a02ffab48e7260031d4a1e78d010891", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-java-maven", "--ref", "93bb3aae614e2f2c6cb65f5ea2315846f5234150", "--enrich", "--analyze", "--format", "json"}, tools: []string{"mvn"}, }, { + // Scope smoke pinned to bomly-dev/example-javascript-npm. With + // --scope runtime the dev dependency (mocha) is excluded, so the + // golden proves runtime-only filtering. package-lock.json pins + // the graph. name: "scan-npm-scope-runtime", - args: []string{"scan", "--url", "https://github.com/ljharb/qs", "--ref", "v6.13.0", "--format", "json", "--scope", "runtime"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--format", "json", "--scope", "runtime"}, tools: []string{"npm"}, }, { From 229ade81cf9170a4696e986b28854d5cb60ade6b Mon Sep 17 00:00:00 2001 From: Ahmed ElMallah Date: Thu, 18 Jun 2026 15:20:49 -0700 Subject: [PATCH 4/5] test(smoke): move basic go scan/diff/explain off google/uuid Repoint the audit and lite go targets (scan-go-enrich/audit/audit-high, diff-go-audit, explain-go-enrich, lite-scan-go, lite-diff-go, lite-explain-go) from google/uuid to bomly-dev/example-go-gomod, matching the convention already used by the basic scan-go/diff-go/explain-go cases (tags v1.0.0 / v0.9.0..v1.0.0; explain targets golang.org/x/text). Every smoke/audit scan --url target now resolves to a bomly-dev example repo. Co-Authored-By: Claude Opus 4.8 --- test/smoke/audit_test.go | 12 ++++++------ test/smoke/lite_test.go | 6 +++--- test/smoke/smoke_test.go | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/test/smoke/audit_test.go b/test/smoke/audit_test.go index 82505194..acc3c767 100644 --- a/test/smoke/audit_test.go +++ b/test/smoke/audit_test.go @@ -86,22 +86,22 @@ func TestAuditScan(t *testing.T) { }{ { name: "scan-go-enrich", - args: []string{"scan", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json", "--enrich", "--matchers", "osv"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json", "--enrich", "--matchers", "osv"}, tools: []string{"go"}, }, { name: "scan-go-audit", - args: []string{"scan", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, tools: []string{"go"}, }, { name: "scan-go-audit-high", - args: []string{"scan", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json", "--enrich", "--audit", "--fail-on", "high", "--matchers", "osv", "--auditors", "vulnerability"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json", "--enrich", "--audit", "--fail-on", "high", "--matchers", "osv", "--auditors", "vulnerability"}, tools: []string{"go"}, }, { name: "scan-npm-audit", - args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "v1.0.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, tools: []string{"npm"}, }, } @@ -175,12 +175,12 @@ func TestAuditDiffAndExplain(t *testing.T) { }{ { name: "diff-go-audit", - args: []string{"diff", "--url", "https://github.com/google/uuid", "--base", "v1.5.0", "--head", "v1.6.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, + args: []string{"diff", "--url", "https://github.com/bomly-dev/example-go-gomod", "--base", "v0.9.0", "--head", "v1.0.0", "--format", "json", "--enrich", "--audit", "--matchers", "osv", "--auditors", "vulnerability"}, tools: []string{"go"}, }, { name: "explain-go-enrich", - args: []string{"explain", "github.com/google/uuid", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json", "--enrich", "--matchers", "osv"}, + args: []string{"explain", "golang.org/x/text", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json", "--enrich", "--matchers", "osv"}, tools: []string{"go"}, }, } diff --git a/test/smoke/lite_test.go b/test/smoke/lite_test.go index 7f5167f4..175065b2 100644 --- a/test/smoke/lite_test.go +++ b/test/smoke/lite_test.go @@ -76,7 +76,7 @@ func TestLiteScan(t *testing.T) { }{ { name: "lite-scan-go", - args: []string{"scan", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json"}, tools: []string{"go"}, }, { @@ -122,7 +122,7 @@ func TestLiteDiff(t *testing.T) { }{ { name: "lite-diff-go", - args: []string{"diff", "--url", "https://github.com/google/uuid", "--base", "v1.5.0", "--head", "v1.6.0", "--format", "json"}, + args: []string{"diff", "--url", "https://github.com/bomly-dev/example-go-gomod", "--base", "v0.9.0", "--head", "v1.0.0", "--format", "json"}, tools: []string{"go"}, }, } @@ -160,7 +160,7 @@ func TestLiteExplain(t *testing.T) { }{ { name: "lite-explain-go", - args: []string{"explain", "github.com/google/uuid", "--url", "https://github.com/google/uuid", "--ref", "v1.6.0", "--format", "json"}, + args: []string{"explain", "golang.org/x/text", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--format", "json"}, tools: []string{"go"}, }, } diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index 0c7bbfb7..8da8355e 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -141,7 +141,7 @@ func TestScan(t *testing.T) { // line numbers, file paths, analyzed_at) via // normalizeReachability. name: "scan-go-reachability", - args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "97f0e49268e11fda722e41e28176ce10f66bc491", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-go-gomod", "--ref", "v1.0.0", "--enrich", "--analyze", "--format", "json"}, tools: []string{"go"}, }, { @@ -154,7 +154,7 @@ func TestScan(t *testing.T) { // of the analyzer. package-lock.json pins the graph. Goldens // scrub timestamps via normalizeReachability. name: "scan-npm-reachability", - args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "v1.0.0", "--enrich", "--analyze", "--format", "json"}, tools: []string{"npm"}, }, { @@ -186,7 +186,7 @@ func TestScan(t *testing.T) { // unreachable branches plus the package-prefix map. Goldens // scrub timestamps via normalizeReachability. name: "scan-java-maven-reachability", - args: []string{"scan", "--url", "https://github.com/bomly-dev/example-java-maven", "--ref", "93bb3aae614e2f2c6cb65f5ea2315846f5234150", "--enrich", "--analyze", "--format", "json"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-java-maven", "--ref", "v1.0.0", "--enrich", "--analyze", "--format", "json"}, tools: []string{"mvn"}, }, { @@ -195,7 +195,7 @@ func TestScan(t *testing.T) { // golden proves runtime-only filtering. package-lock.json pins // the graph. name: "scan-npm-scope-runtime", - args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "559a762aeef68b0e5c818f62dfba67abc369912f", "--format", "json", "--scope", "runtime"}, + args: []string{"scan", "--url", "https://github.com/bomly-dev/example-javascript-npm", "--ref", "v1.0.0", "--format", "json", "--scope", "runtime"}, tools: []string{"npm"}, }, { From e2b6858ac3904c1afb5bb1e3c25b02bcba54d4d3 Mon Sep 17 00:00:00 2001 From: Ahmed ElMallah Date: Thu, 18 Jun 2026 15:40:11 -0700 Subject: [PATCH 5/5] test(python): add testdata-fixture lockfile integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the node detector's lockfile_integration_test.go: drive each Python lock fast-path end-to-end through ResolveGraph against real manifest fixtures under testdata/lockfiles/{pip,poetry,uv,pipenv}, with node-style helpers (requirePyPackage / requirePyEdge / requirePyScope / requirePySingleRoot). Covers the binary-free parsers — requirements.lock (the new pip fast-path), poetry.lock, uv.lock, Pipfile.lock — asserting package set, transitive edges, single application root, and runtime/development scope. Co-Authored-By: Claude Opus 4.8 --- .../python/lockfile_integration_test.go | 196 ++++++++++++++++++ .../testdata/lockfiles/pip/requirements.lock | 15 ++ .../python/testdata/lockfiles/pipenv/Pipfile | 13 ++ .../testdata/lockfiles/pipenv/Pipfile.lock | 43 ++++ .../testdata/lockfiles/poetry/poetry.lock | 71 +++++++ .../testdata/lockfiles/poetry/pyproject.toml | 16 ++ .../testdata/lockfiles/uv/pyproject.toml | 12 ++ .../python/testdata/lockfiles/uv/uv.lock | 59 ++++++ 8 files changed, 425 insertions(+) create mode 100644 internal/detectors/python/lockfile_integration_test.go create mode 100644 internal/detectors/python/testdata/lockfiles/pip/requirements.lock create mode 100644 internal/detectors/python/testdata/lockfiles/pipenv/Pipfile create mode 100644 internal/detectors/python/testdata/lockfiles/pipenv/Pipfile.lock create mode 100644 internal/detectors/python/testdata/lockfiles/poetry/poetry.lock create mode 100644 internal/detectors/python/testdata/lockfiles/poetry/pyproject.toml create mode 100644 internal/detectors/python/testdata/lockfiles/uv/pyproject.toml create mode 100644 internal/detectors/python/testdata/lockfiles/uv/uv.lock diff --git a/internal/detectors/python/lockfile_integration_test.go b/internal/detectors/python/lockfile_integration_test.go new file mode 100644 index 00000000..3b3fcd45 --- /dev/null +++ b/internal/detectors/python/lockfile_integration_test.go @@ -0,0 +1,196 @@ +package python + +import ( + "context" + "path/filepath" + "testing" + + "github.com/bomly-dev/bomly-cli/sdk" +) + +// These tests drive each Python detector's lock fast-path end-to-end through +// ResolveGraph against real manifest fixtures under testdata/lockfiles, mirroring +// the node detector's lockfile_integration_test.go. They cover the binary-free +// parsers only (requirements.lock, poetry.lock, uv.lock, Pipfile.lock); the +// install + pip-inspect paths are exercised elsewhere. + +// pyStableID returns the graph node ID a parser assigns: "name@version", or just +// "name" when the version is empty. +func pyStableID(name, version string) string { + if version == "" { + return name + } + return name + "@" + version +} + +func pyFixture(name string) string { + return filepath.Join("testdata", "lockfiles", name) +} + +func resolvePyLockGraph(t *testing.T, detector sdk.Detector, projectDir string) *sdk.Graph { + t.Helper() + result, err := detector.ResolveGraph(context.Background(), sdk.DetectionRequest{ProjectPath: projectDir}) + if err != nil { + t.Fatalf("%T.ResolveGraph(%s): %v", detector, projectDir, err) + } + g, err := result.Graphs.ConsolidatedGraph() + if err != nil { + t.Fatalf("consolidated graph: %v", err) + } + return g +} + +func pyGraphIDs(g *sdk.Graph) []string { + nodes := g.Nodes() + ids := make([]string, len(nodes)) + for i, n := range nodes { + ids[i] = n.ID + } + return ids +} + +func requirePyPackage(t *testing.T, g *sdk.Graph, name, version string) *sdk.Dependency { + t.Helper() + id := pyStableID(name, version) + pkg, ok := g.Node(id) + if !ok { + t.Fatalf("expected package %s in graph; present: %v", id, pyGraphIDs(g)) + } + return pkg +} + +func requirePyEdge(t *testing.T, g *sdk.Graph, fromName, fromVersion, toName, toVersion string) { + t.Helper() + fromID := pyStableID(fromName, fromVersion) + toID := pyStableID(toName, toVersion) + deps, err := g.DirectDependencies(fromID) + if err != nil { + t.Fatalf("dependencies(%s): %v", fromID, err) + } + for _, dep := range deps { + if dep.ID == toID { + return + } + } + t.Errorf("expected edge %s → %s", fromID, toID) +} + +func requirePyScope(t *testing.T, g *sdk.Graph, name, version string, scope sdk.Scope) { + t.Helper() + pkg := requirePyPackage(t, g, name, version) + if got := pkg.PrimaryScope(); got != scope { + t.Errorf("expected %s scope %q, got %q", pyStableID(name, version), scope, got) + } +} + +// requirePySingleRoot asserts the graph has exactly one root with the expected ID. +func requirePySingleRoot(t *testing.T, g *sdk.Graph, rootID string) { + t.Helper() + roots := g.Roots() + if len(roots) != 1 { + t.Fatalf("expected exactly one root, got %d: %v", len(roots), pyGraphIDs(g)) + } + if roots[0].ID != rootID { + t.Errorf("expected root %q, got %q", rootID, roots[0].ID) + } +} + +// ---- pip (requirements.lock fast-path) ------------------------------------- + +func TestPipRequirementsLockFixture(t *testing.T) { + g := resolvePyLockGraph(t, PipDetector{}, pyFixture("pip")) + + for _, want := range [][2]string{ + {"requests", "2.32.3"}, {"certifi", "2024.8.30"}, + {"charset-normalizer", "3.4.0"}, {"idna", "3.10"}, + {"urllib3", "2.2.3"}, {"pytest", "8.3.3"}, + } { + requirePyPackage(t, g, want[0], want[1]) + } + + // requests pulls its four transitive deps via "# via requests". + requirePyEdge(t, g, "requests", "2.32.3", "certifi", "2024.8.30") + requirePyEdge(t, g, "requests", "2.32.3", "idna", "3.10") + requirePyEdge(t, g, "requests", "2.32.3", "urllib3", "2.2.3") + + // Scope: runtime deps stay runtime; the requirements-dev.in input marks pytest dev. + requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime) + requirePyScope(t, g, "urllib3", "2.2.3", sdk.ScopeRuntime) + requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment) +} + +// ---- poetry (poetry.lock + pyproject.toml fast-path) ----------------------- + +func TestPoetryLockFixture(t *testing.T) { + g := resolvePyLockGraph(t, PoetryDetector{}, pyFixture("poetry")) + + requirePySingleRoot(t, g, pyStableID("demo-app", "1.0.0")) + for _, want := range [][2]string{ + {"requests", "2.32.3"}, {"certifi", "2024.8.30"}, + {"charset-normalizer", "3.4.0"}, {"idna", "3.10"}, + {"urllib3", "2.2.3"}, {"pytest", "8.3.3"}, {"pluggy", "1.5.0"}, + } { + requirePyPackage(t, g, want[0], want[1]) + } + + // Direct deps off the project root, plus transitive edges from the lock. + requirePyEdge(t, g, "demo-app", "1.0.0", "requests", "2.32.3") + requirePyEdge(t, g, "requests", "2.32.3", "idna", "3.10") + requirePyEdge(t, g, "pytest", "8.3.3", "pluggy", "1.5.0") + + // "main" group → runtime; "dev" group → development, propagated transitively. + requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime) + requirePyScope(t, g, "idna", "3.10", sdk.ScopeRuntime) + requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment) + requirePyScope(t, g, "pluggy", "1.5.0", sdk.ScopeDevelopment) +} + +// ---- uv (uv.lock fast-path) ------------------------------------------------ + +func TestUVLockFixture(t *testing.T) { + g := resolvePyLockGraph(t, UVDetector{}, pyFixture("uv")) + + requirePySingleRoot(t, g, pyStableID("demo-app", "1.0.0")) + for _, want := range [][2]string{ + {"requests", "2.32.3"}, {"certifi", "2024.8.30"}, + {"idna", "3.10"}, {"urllib3", "2.2.3"}, + {"pytest", "8.3.3"}, {"pluggy", "1.5.0"}, + } { + requirePyPackage(t, g, want[0], want[1]) + } + + requirePyEdge(t, g, "demo-app", "1.0.0", "requests", "2.32.3") + requirePyEdge(t, g, "requests", "2.32.3", "urllib3", "2.2.3") + requirePyEdge(t, g, "pytest", "8.3.3", "pluggy", "1.5.0") + + // Runtime deps (and their transitives) vs. the dev-dependency group. + requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime) + requirePyScope(t, g, "urllib3", "2.2.3", sdk.ScopeRuntime) + requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment) + requirePyScope(t, g, "pluggy", "1.5.0", sdk.ScopeDevelopment) +} + +// ---- pipenv (Pipfile.lock fast-path) --------------------------------------- + +func TestPipenvLockFixture(t *testing.T) { + g := resolvePyLockGraph(t, PipenvDetector{}, pyFixture("pipenv")) + + for _, want := range [][2]string{ + {"requests", "2.32.3"}, {"certifi", "2024.8.30"}, + {"charset-normalizer", "3.4.0"}, {"idna", "3.10"}, + {"urllib3", "2.2.3"}, {"pytest", "8.3.3"}, {"pluggy", "1.5.0"}, + } { + requirePyPackage(t, g, want[0], want[1]) + } + + // Pipfile.lock has no transitive edges; default/develop hang off the root. + requirePyEdge(t, g, "root", "", "requests", "2.32.3") + requirePyEdge(t, g, "root", "", "pytest", "8.3.3") + + // Scope is re-derived from the Pipfile's [packages] / [dev-packages]: + // requests is runtime, pytest is development. pluggy is only a transitive + // dependency of pytest, but Pipfile.lock is flat (no edge records it), so it + // stays runtime — a known limitation of the lock-only fast-path. + requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime) + requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment) +} diff --git a/internal/detectors/python/testdata/lockfiles/pip/requirements.lock b/internal/detectors/python/testdata/lockfiles/pip/requirements.lock new file mode 100644 index 00000000..fb436f27 --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/pip/requirements.lock @@ -0,0 +1,15 @@ +# +# This file is autogenerated by pip-compile. +# +certifi==2024.8.30 + # via requests +charset-normalizer==3.4.0 + # via requests +idna==3.10 + # via requests +requests==2.32.3 + # via -r requirements.in +urllib3==2.2.3 + # via requests +pytest==8.3.3 + # via -r requirements-dev.in diff --git a/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile b/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile new file mode 100644 index 00000000..084854ea --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile @@ -0,0 +1,13 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[packages] +requests = "*" + +[dev-packages] +pytest = "*" + +[requires] +python_version = "3.11" diff --git a/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile.lock b/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile.lock new file mode 100644 index 00000000..dc9e87b8 --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/pipenv/Pipfile.lock @@ -0,0 +1,43 @@ +{ + "_meta": { + "hash": { + "sha256": "fixturedoesnotvalidatehash" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "version": "==2024.8.30" + }, + "charset-normalizer": { + "version": "==3.4.0" + }, + "idna": { + "version": "==3.10" + }, + "requests": { + "version": "==2.32.3" + }, + "urllib3": { + "version": "==2.2.3" + } + }, + "develop": { + "pluggy": { + "version": "==1.5.0" + }, + "pytest": { + "version": "==8.3.3" + } + } +} diff --git a/internal/detectors/python/testdata/lockfiles/poetry/poetry.lock b/internal/detectors/python/testdata/lockfiles/poetry/poetry.lock new file mode 100644 index 00000000..85d44d7f --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/poetry/poetry.lock @@ -0,0 +1,71 @@ +# This file is automatically @generated by Poetry and should not be changed by hand. + +[[package]] +name = "certifi" +version = "2024.8.30" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +groups = ["main"] + +[[package]] +name = "charset-normalizer" +version = "3.4.0" +description = "The Real First Universal Charset Detector." +optional = false +python-versions = ">=3.7.0" +groups = ["main"] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +groups = ["main"] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] + +[[package]] +name = "pytest" +version = "8.3.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] + +[package.dependencies] +pluggy = ">=1.5,<2" + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +groups = ["main"] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[[package]] +name = "urllib3" +version = "2.2.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +groups = ["main"] + +[metadata] +lock-version = "2.1" +python-versions = "^3.11" +content-hash = "fixturedoesnotvalidatehash" diff --git a/internal/detectors/python/testdata/lockfiles/poetry/pyproject.toml b/internal/detectors/python/testdata/lockfiles/poetry/pyproject.toml new file mode 100644 index 00000000..4b80b3db --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/poetry/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "demo-app" +version = "1.0.0" +description = "Lock-fixture project for the poetry detector" +authors = ["Bomly "] + +[tool.poetry.dependencies] +python = "^3.11" +requests = "^2.32" + +[tool.poetry.group.dev.dependencies] +pytest = "^8.3" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/internal/detectors/python/testdata/lockfiles/uv/pyproject.toml b/internal/detectors/python/testdata/lockfiles/uv/pyproject.toml new file mode 100644 index 00000000..16088932 --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/uv/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "demo-app" +version = "1.0.0" +requires-python = ">=3.11" +dependencies = [ + "requests>=2.32", +] + +[dependency-groups] +dev = [ + "pytest>=8.3", +] diff --git a/internal/detectors/python/testdata/lockfiles/uv/uv.lock b/internal/detectors/python/testdata/lockfiles/uv/uv.lock new file mode 100644 index 00000000..4bed202c --- /dev/null +++ b/internal/detectors/python/testdata/lockfiles/uv/uv.lock @@ -0,0 +1,59 @@ +version = 1 +requires-python = ">=3.11" + +[[package]] +name = "demo-app" +version = "1.0.0" +source = { editable = "." } +dependencies = [ + { name = "requests" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + +[[package]] +name = "certifi" +version = "2024.8.30" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "charset-normalizer" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "pytest" +version = "8.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pluggy" }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" }