Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,15 @@ Some native detector chains intentionally prefer a build-tool command over a com

`bomly benchmark` is a hidden maintainer command backed by `internal/benchmark`. It scans public GitHub repositories with native detectors, compares the filtered dependency graph against GitHub Dependency Graph and external Syft SBOMs, and writes deterministic artifacts under `.benchmark-runs/latest`. Bomly scan and SBOM diff execution run in-process through the engine and output model; only the external `git` and `syft` tools remain subprocesses. The in-process adapter builds a native-only registry directly so local configuration and managed-plugin discovery cannot distort benchmark results. Package and relationship scores are comparative engineering signals, not pass/fail gates and not claims that a baseline is ground truth. The benchmark is intentionally local-only so exploratory scoring does not become a release or merge gate before it is calibrated.

### Decision: Python install-first isolates into a venv; pip prefers a committed lock

The pip detector resolves graphs from `pip inspect --local`, which reads whatever lives in the active interpreter's site-packages. Running `--install-first` as a plain `pip install` into the ambient interpreter therefore captured unrelated tooling (the runner's `poetry`, `build`, `keyring`, `virtualenv`, and date-versioned helpers), making output non-deterministic. Two changes address this:

1. **Isolated install (`internal/detectors/python/venv.go`).** `--install-first` for pip now (re)creates a clean, project-scoped virtualenv under the temp dir — keyed by a hash of the absolute working dir so the install and inspect phases agree — and both `pip install` and `pip inspect` run against that venv. The ambient site-packages no longer leak into resolution. The venv is recreated per run so stale state cannot persist.
2. **Lock fast-path (`internal/detectors/python/piplock.go`).** When a committed, fully-pinned `requirements.lock` (pip-compile style, with `# via` edge annotations) is present, the detector builds the graph directly from it — no install, no inspect — mirroring the existing `poetry.lock` fast-path. Direct dependencies are those whose `# via` references an input file (`-r foo.in`); a file matching `dev` marks development scope, and runtime wins over development during BFS propagation.

The smoke/benchmark Python targets rely on the fast-paths for determinism: `scan-python-poetry` drops `--install-first` so the committed `poetry.lock` fast-path runs, and `scan-python-pip` commits a `requirements.lock`. The venv isolation remains the correctness backstop for real-world pip projects scanned with `--install-first` and no committed lock.

## Build Modes

Syft and Grype each support two build modes:
Expand Down
4 changes: 1 addition & 3 deletions internal/benchmark/testdata/scan_targets.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@
{
"name": "scan-python-pip",
"url": "https://github.com/bomly-dev/example-python-pip",
"ref": "v1.0.0",
"ref": "fe04c758134b95dab102e1fce10275f7d18c0cf2",
"ecosystem": "python",
"args": ["--install-first"],
"tools": ["pip"],
"benchmark_enabled": true
},
Expand Down Expand Up @@ -139,7 +138,6 @@
"url": "https://github.com/bomly-dev/example-python-poetry",
"ref": "v1.0.0",
"ecosystem": "python",
"args": ["--install-first"],
"tools": ["poetry"],
"benchmark_enabled": true
},
Expand Down
4 changes: 2 additions & 2 deletions internal/detectors/maven/detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func TestDepGraphFromMavenTGF_WithMavenLogPrefixes(t *testing.T) {
[INFO] Scanning for projects...
[INFO]
[INFO] --- maven-dependency-plugin:2.8:tree (default-cli) @ example-java-maven ---
[INFO] 319144230 com.srcclr:example-java-maven:jar:1.0-SNAPSHOT
[INFO] 319144230 com.bomly:example-java-maven:jar:1.0-SNAPSHOT
[INFO] 1268237485 org.apache.struts:struts2-core:jar:2.5.12:compile
[INFO] 1983948209 org.freemarker:freemarker:jar:2.3.23:compile
[INFO] 1778257620 org.mindrot:jbcrypt:jar:0.3m:compile
Expand All @@ -77,7 +77,7 @@ func TestDepGraphFromMavenTGF_WithMavenLogPrefixes(t *testing.T) {
t.Fatalf("expected 4 packages, got %d", g.Size())
}

rootDeps, err := g.DirectDependencies("com.srcclr:example-java-maven@1.0-SNAPSHOT")
rootDeps, err := g.DirectDependencies("com.bomly:example-java-maven@1.0-SNAPSHOT")
if err != nil {
t.Fatalf("dependencies(root) error = %v", err)
}
Expand Down
196 changes: 196 additions & 0 deletions internal/detectors/python/lockfile_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package python

import (
"context"
"path/filepath"
"testing"

"github.com/bomly-dev/bomly-cli/sdk"
)

// These tests drive each Python detector's lock fast-path end-to-end through
// ResolveGraph against real manifest fixtures under testdata/lockfiles, mirroring
// the node detector's lockfile_integration_test.go. They cover the binary-free
// parsers only (requirements.lock, poetry.lock, uv.lock, Pipfile.lock); the
// install + pip-inspect paths are exercised elsewhere.

// pyStableID returns the graph node ID a parser assigns: "name@version", or just
// "name" when the version is empty.
func pyStableID(name, version string) string {
if version == "" {
return name
}
return name + "@" + version
}

func pyFixture(name string) string {
return filepath.Join("testdata", "lockfiles", name)
}

func resolvePyLockGraph(t *testing.T, detector sdk.Detector, projectDir string) *sdk.Graph {
t.Helper()
result, err := detector.ResolveGraph(context.Background(), sdk.DetectionRequest{ProjectPath: projectDir})
if err != nil {
t.Fatalf("%T.ResolveGraph(%s): %v", detector, projectDir, err)
}
g, err := result.Graphs.ConsolidatedGraph()
if err != nil {
t.Fatalf("consolidated graph: %v", err)
}
return g
}

func pyGraphIDs(g *sdk.Graph) []string {
nodes := g.Nodes()
ids := make([]string, len(nodes))
for i, n := range nodes {
ids[i] = n.ID
}
return ids
}

func requirePyPackage(t *testing.T, g *sdk.Graph, name, version string) *sdk.Dependency {
t.Helper()
id := pyStableID(name, version)
pkg, ok := g.Node(id)
if !ok {
t.Fatalf("expected package %s in graph; present: %v", id, pyGraphIDs(g))
}
return pkg
}

func requirePyEdge(t *testing.T, g *sdk.Graph, fromName, fromVersion, toName, toVersion string) {
t.Helper()
fromID := pyStableID(fromName, fromVersion)
toID := pyStableID(toName, toVersion)
deps, err := g.DirectDependencies(fromID)
if err != nil {
t.Fatalf("dependencies(%s): %v", fromID, err)
}
for _, dep := range deps {
if dep.ID == toID {
return
}
}
t.Errorf("expected edge %s → %s", fromID, toID)
}

func requirePyScope(t *testing.T, g *sdk.Graph, name, version string, scope sdk.Scope) {
t.Helper()
pkg := requirePyPackage(t, g, name, version)
if got := pkg.PrimaryScope(); got != scope {
t.Errorf("expected %s scope %q, got %q", pyStableID(name, version), scope, got)
}
}

// requirePySingleRoot asserts the graph has exactly one root with the expected ID.
func requirePySingleRoot(t *testing.T, g *sdk.Graph, rootID string) {
t.Helper()
roots := g.Roots()
if len(roots) != 1 {
t.Fatalf("expected exactly one root, got %d: %v", len(roots), pyGraphIDs(g))
}
if roots[0].ID != rootID {
t.Errorf("expected root %q, got %q", rootID, roots[0].ID)
}
}

// ---- pip (requirements.lock fast-path) -------------------------------------

func TestPipRequirementsLockFixture(t *testing.T) {
g := resolvePyLockGraph(t, PipDetector{}, pyFixture("pip"))

for _, want := range [][2]string{
{"requests", "2.32.3"}, {"certifi", "2024.8.30"},
{"charset-normalizer", "3.4.0"}, {"idna", "3.10"},
{"urllib3", "2.2.3"}, {"pytest", "8.3.3"},
} {
requirePyPackage(t, g, want[0], want[1])
}

// requests pulls its four transitive deps via "# via requests".
requirePyEdge(t, g, "requests", "2.32.3", "certifi", "2024.8.30")
requirePyEdge(t, g, "requests", "2.32.3", "idna", "3.10")
requirePyEdge(t, g, "requests", "2.32.3", "urllib3", "2.2.3")

// Scope: runtime deps stay runtime; the requirements-dev.in input marks pytest dev.
requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime)
requirePyScope(t, g, "urllib3", "2.2.3", sdk.ScopeRuntime)
requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment)
}

// ---- poetry (poetry.lock + pyproject.toml fast-path) -----------------------

func TestPoetryLockFixture(t *testing.T) {
g := resolvePyLockGraph(t, PoetryDetector{}, pyFixture("poetry"))

requirePySingleRoot(t, g, pyStableID("demo-app", "1.0.0"))
for _, want := range [][2]string{
{"requests", "2.32.3"}, {"certifi", "2024.8.30"},
{"charset-normalizer", "3.4.0"}, {"idna", "3.10"},
{"urllib3", "2.2.3"}, {"pytest", "8.3.3"}, {"pluggy", "1.5.0"},
} {
requirePyPackage(t, g, want[0], want[1])
}

// Direct deps off the project root, plus transitive edges from the lock.
requirePyEdge(t, g, "demo-app", "1.0.0", "requests", "2.32.3")
requirePyEdge(t, g, "requests", "2.32.3", "idna", "3.10")
requirePyEdge(t, g, "pytest", "8.3.3", "pluggy", "1.5.0")

// "main" group → runtime; "dev" group → development, propagated transitively.
requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime)
requirePyScope(t, g, "idna", "3.10", sdk.ScopeRuntime)
requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment)
requirePyScope(t, g, "pluggy", "1.5.0", sdk.ScopeDevelopment)
}

// ---- uv (uv.lock fast-path) ------------------------------------------------

func TestUVLockFixture(t *testing.T) {
g := resolvePyLockGraph(t, UVDetector{}, pyFixture("uv"))

requirePySingleRoot(t, g, pyStableID("demo-app", "1.0.0"))
for _, want := range [][2]string{
{"requests", "2.32.3"}, {"certifi", "2024.8.30"},
{"idna", "3.10"}, {"urllib3", "2.2.3"},
{"pytest", "8.3.3"}, {"pluggy", "1.5.0"},
} {
requirePyPackage(t, g, want[0], want[1])
}

requirePyEdge(t, g, "demo-app", "1.0.0", "requests", "2.32.3")
requirePyEdge(t, g, "requests", "2.32.3", "urllib3", "2.2.3")
requirePyEdge(t, g, "pytest", "8.3.3", "pluggy", "1.5.0")

// Runtime deps (and their transitives) vs. the dev-dependency group.
requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime)
requirePyScope(t, g, "urllib3", "2.2.3", sdk.ScopeRuntime)
requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment)
requirePyScope(t, g, "pluggy", "1.5.0", sdk.ScopeDevelopment)
}

// ---- pipenv (Pipfile.lock fast-path) ---------------------------------------

func TestPipenvLockFixture(t *testing.T) {
g := resolvePyLockGraph(t, PipenvDetector{}, pyFixture("pipenv"))

for _, want := range [][2]string{
{"requests", "2.32.3"}, {"certifi", "2024.8.30"},
{"charset-normalizer", "3.4.0"}, {"idna", "3.10"},
{"urllib3", "2.2.3"}, {"pytest", "8.3.3"}, {"pluggy", "1.5.0"},
} {
requirePyPackage(t, g, want[0], want[1])
}

// Pipfile.lock has no transitive edges; default/develop hang off the root.
requirePyEdge(t, g, "root", "", "requests", "2.32.3")
requirePyEdge(t, g, "root", "", "pytest", "8.3.3")

// Scope is re-derived from the Pipfile's [packages] / [dev-packages]:
// requests is runtime, pytest is development. pluggy is only a transitive
// dependency of pytest, but Pipfile.lock is flat (no edge records it), so it
// stays runtime — a known limitation of the lock-only fast-path.
requirePyScope(t, g, "requests", "2.32.3", sdk.ScopeRuntime)
requirePyScope(t, g, "pytest", "8.3.3", sdk.ScopeDevelopment)
}
30 changes: 21 additions & 9 deletions internal/detectors/python/pip.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,20 @@ func (d PipDetector) Descriptor() sdk.DetectorDescriptor {

// ResolveGraph resolves a Python dependency graph with pip inspect.
func (d PipDetector) ResolveGraph(_ context.Context, req sdk.DetectionRequest) (sdk.DetectionResult, error) {
command, err := pipInspectCommand()
workingDir := d.base().workingDir(req.ProjectPath)

// Fast-path: a committed requirements.lock carries the full transitive tree
// and pinned versions, so we can build the graph without installing into
// (and inspecting) the ambient Python environment.
if lockPath := pipLockFilePath(workingDir); lockPath != "" {
if depsGraph, err := depGraphFromRequirementsLock(lockPath, workingDir); err == nil {
return sdk.DetectionResult{
Graphs: sdk.SingleGraphContainer(depsGraph, detectors.InferManifestMetadata(req, pipEvidencePatterns)),
}, nil
}
}

command, err := pipInspectCommandForProject(workingDir)
if err != nil {
return sdk.DetectionResult{}, err
}
Expand Down Expand Up @@ -81,29 +94,28 @@ func (d PipDetector) base() baseDetector {
}
}

// Install prepares pip dependencies before graph resolution.
// Install prepares pip dependencies before graph resolution. It installs into a
// clean, project-scoped virtualenv so the subsequent `pip inspect` sees only
// the declared dependencies, not whatever tooling lives in the ambient
// site-packages.
func (d PipDetector) Install(ctx context.Context, req sdk.DetectionRequest) error {
workingDir := d.base().workingDir(req.ProjectPath)
requirementsFile, err := installRequirementsPath(workingDir)
if err != nil {
return err
}
command, err := pythonCommand()
venvPython, err := createPythonVenv(ctx, d.base(), req, "pip detector", pythonVenvDir(workingDir))
if err != nil {
return err
}
command = append(command, "-m", "pip", "install", "-r", requirementsFile)
command := []string{venvPython, "-m", "pip", "install", "-r", requirementsFile}
if err := d.base().install(ctx, req, "pip detector", command); err != nil {
return err
}
// Also install requirements-dev.txt when present alongside the primary file.
devReqPath := filepath.Join(workingDir, "requirements-dev.txt")
if exists, _ := system.FileExists(devReqPath); pipShouldInstallDevRequirements(req.ScopeFilter, requirementsFile, exists) {
devCommand, err := pythonCommand()
if err != nil {
return err
}
devCommand = append(devCommand, "-m", "pip", "install", "-r", "requirements-dev.txt")
devCommand := []string{venvPython, "-m", "pip", "install", "-r", "requirements-dev.txt"}
if err := d.base().install(ctx, req, "pip detector (dev)", devCommand); err != nil {
return err
}
Expand Down
Loading