Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions docs/content/en/structured-finding-metadata.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
---
title: "Structured Finding Metadata"
weight: 10
---

# Structured Finding Metadata

Poutine findings now include structured metadata fields that provide programmatic access to security-relevant information. These fields enable library users to build automated triage workflows, correlate findings with secrets exposure, and integrate with downstream security tooling without parsing human-readable text.

## New Finding Fields

### `injection_sources`

**Type:** `[]string`

A sorted array of the specific expression sources that are being injected into a sink (shell script, JavaScript, etc.).

**Example:**
```json
{
"rule_id": "injection",
"meta": {
"details": "Sources: github.event.issue.title github.head_ref",
"injection_sources": ["github.event.issue.title", "github.head_ref"]
}
}
```

**Use case:** Programmatically identify which untrusted inputs are exploitable without parsing the `details` string.

---

### `lotp_tool`

**Type:** `string`

The "Living Off The Pipeline" build tool detected after an untrusted checkout. Common values include `npm`, `pip`, `make`, `bash`, `cargo`, `gradle`, etc.

**Example:**
```json
{
"rule_id": "untrusted_checkout_exec",
"meta": {
"details": "Detected usage of `npm`",
"lotp_tool": "npm"
}
}
```

**Use case:** Filter findings by tool type, prioritize based on tool risk, or build tool-specific remediation guidance.

---

### `lotp_action`

**Type:** `string`

The GitHub Action identified as a "Living Off The Pipeline" vector (e.g., actions that execute code from the checked-out repository).

**Example:**
```json
{
"rule_id": "untrusted_checkout_exec",
"meta": {
"details": "Detected usage the GitHub Action `bridgecrewio/checkov-action`",
"lotp_action": "bridgecrewio/checkov-action"
}
}
```

**Use case:** Track which third-party actions introduce code execution risks, build action allowlists.

---

### `referenced_secrets`

**Type:** `[]string`

A sorted array of secret names referenced in the job where the vulnerability was found. The `GITHUB_TOKEN` is excluded since it's always available.

Supports both dot notation (`secrets.MY_SECRET`) and bracket notation (`secrets['MY_SECRET']`).

**Example:**
```json
{
"rule_id": "untrusted_checkout_exec",
"meta": {
"lotp_tool": "npm",
"referenced_secrets": ["API_KEY", "DATABASE_PASSWORD", "DEPLOY_TOKEN"]
}
}
```

**Use case:** Assess blast radius of a vulnerability - if a job with an injection vulnerability also references `PROD_DEPLOY_KEY`, the finding is more critical than one with no secrets.

---

## Usage Examples

### Prioritize by Secrets Exposure

```python
def calculate_priority(finding):
secrets = finding.get("meta", {}).get("referenced_secrets", [])
high_value = ["DEPLOY", "PROD", "AWS", "GCP", "AZURE", "NPM_TOKEN"]

if any(s for s in secrets if any(h in s for h in high_value)):
return "critical"
elif secrets:
return "high"
return "medium"
```

### Filter Injection Sources

```python
def is_pr_body_injection(finding):
sources = finding.get("meta", {}).get("injection_sources", [])
pr_body_patterns = ["pull_request.body", "issue.body", "comment.body"]
return any(p in s for s in sources for p in pr_body_patterns)
```

### Group by LOTP Tool

```python
from collections import defaultdict

def group_by_tool(findings):
by_tool = defaultdict(list)
for f in findings:
if f["rule_id"] == "untrusted_checkout_exec":
tool = f["meta"].get("lotp_tool") or f["meta"].get("lotp_action", "unknown")
by_tool[tool].append(f)
return dict(by_tool)
```

---

## Backward Compatibility

These fields are additive - the existing `details` field continues to provide human-readable descriptions. Tools parsing `details` will continue to work, but new integrations should prefer the structured fields for reliability.

**JSON behavior:**
- `injection_sources`, `lotp_tool`, `lotp_action`: Omitted when not applicable
- `referenced_secrets`: Present as `[]` (empty array) for GitHub Actions findings even when no secrets are found; omitted for other CI systems

---

## Supported Rules

| Rule | `injection_sources` | `lotp_tool` | `lotp_action` | `referenced_secrets` |
|------|---------------------|-------------|---------------|----------------------|
| `injection` (GitHub Actions) | Yes | - | - | Yes |
| `injection` (GitLab CI) | Yes | - | - | - |
| `injection` (Azure Pipelines) | Yes | - | - | - |
| `injection` (Tekton) | Yes | - | - | - |
| `untrusted_checkout_exec` (GitHub Actions) | - | Yes | Yes | Yes |
| `untrusted_checkout_exec` (Azure DevOps) | - | Yes | - | - |
| `untrusted_checkout_exec` (Tekton) | - | Yes | - | - |
16 changes: 16 additions & 0 deletions opa/rego/poutine.rego
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package poutine

import data.poutine.utils
import rego.v1

rule(chain) = {
Expand All @@ -24,10 +25,25 @@ rule(chain) = {
)
}

# finding with _job field - extracts referenced_secrets automatically
finding(rule, pkg_purl, meta) = {
"rule_id": rule.id,
"purl": pkg_purl,
"meta": object.union(
object.remove(meta, ["_job"]),
{"referenced_secrets": utils.job_referenced_secrets(meta._job)},
),
} if {
meta._job
}

# finding without _job field - no automatic secrets extraction
finding(rule, pkg_purl, meta) = {
"rule_id": rule.id,
"purl": pkg_purl,
"meta": meta,
} if {
not meta._job
}

_rule_config(rule_id, meta) = object.union(rule_config, config_values) if {
Expand Down
40 changes: 39 additions & 1 deletion opa/rego/poutine/utils.rego
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,42 @@ find_first_uses_in_job(job, uses) := xs if {
s := job.steps[i]
startswith(s.uses, sprintf("%v@", [uses[_]]))
}
}
}

########################################################################
# extract_referenced_secrets
# Extracts all secrets.* references from GitHub Actions expressions (${{ }})
# Excludes GITHUB_TOKEN. Handles dot and bracket notation.
########################################################################

# Dot notation: ${{ secrets.FOO }} or ${{ format(secrets.FOO) }}
_secrets_dot_notation(str) := {m[1] |
matches := regex.find_all_string_submatch_n("\\$\\{\\{[^}]*?secrets\\.([a-zA-Z_][a-zA-Z0-9_]*)", str, -1)
m := matches[_]
m[1] != "GITHUB_TOKEN"
}

# Bracket notation with single quotes: ${{ secrets['FOO'] }}
_secrets_bracket_single(str) := {m[1] |
matches := regex.find_all_string_submatch_n("\\$\\{\\{[^}]*?secrets\\['([a-zA-Z_][a-zA-Z0-9_]*)'\\]", str, -1)
m := matches[_]
m[1] != "GITHUB_TOKEN"
}

# Bracket notation with double quotes: ${{ secrets["FOO"] }}
# Also handles JSON-escaped quotes: secrets[\"FOO\"] (after json.marshal)
_secrets_bracket_double(str) := {m[1] |
matches := regex.find_all_string_submatch_n("\\$\\{\\{[^}]*?secrets\\[\\\\?\"([a-zA-Z_][a-zA-Z0-9_]*)\\\\?\"\\]", str, -1)
m := matches[_]
m[1] != "GITHUB_TOKEN"
}

extract_referenced_secrets(str) := sort(secrets) if {
secrets := _secrets_dot_notation(str) | _secrets_bracket_single(str) | _secrets_bracket_double(str)
}

# Extract secrets from a job by marshaling to JSON and searching
job_referenced_secrets(job) := secrets if {
job_json := json.marshal(job)
secrets := extract_referenced_secrets(job_json)
}
6 changes: 6 additions & 0 deletions opa/rego/rules/injection.rego
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ results contains poutine.finding(rule, pkg.purl, {
"job": job.id,
"step": i,
"details": sprintf("Sources: %s", [concat(" ", exprs)]),
"injection_sources": sort(exprs),
"_job": job,
"event_triggers": [event | event := workflow.events[j].name],
}) if {
pkg = input.packages[_]
Expand All @@ -48,6 +50,7 @@ results contains poutine.finding(rule, pkg.purl, {
"line": line,
"step": i,
"details": sprintf("Sources: %s", [concat(" ", exprs)]),
"injection_sources": sort(exprs),
"event_triggers": [event | event := action.events[j].name],
}) if {
pkg = input.packages[_]
Expand All @@ -69,6 +72,7 @@ results contains poutine.finding(rule, pkg.purl, {
"path": config.path,
"job": sprintf("%s.%s[%d]", [job.name, attr, i]),
"details": sprintf("Sources: %s", [concat(" ", exprs)]),
"injection_sources": sort(exprs),
"line": job[attr][i].line,
}) if {
pkg = input.packages[_]
Expand All @@ -94,6 +98,7 @@ results contains poutine.finding(rule, pkg.purl, {
"step": step_id,
"line": step.lines[attr],
"details": sprintf("Sources: %s", [concat(" ", exprs)]),
"injection_sources": sort(exprs),
}) if {
some attr in {"script", "powershell", "pwsh", "bash"}
pkg := input.packages[_]
Expand All @@ -117,6 +122,7 @@ results contains poutine.finding(rule, pkg.purl, {
"step": step_idx,
"line": step.lines.start,
"details": sprintf("Sources: %s", [concat(" ", exprs)]),
"injection_sources": sort(exprs),
}) if {
pkg := input.packages[_]
pipeline := pkg.pipeline_as_code_tekton[_]
Expand Down
21 changes: 16 additions & 5 deletions opa/rego/rules/untrusted_checkout_exec.rego
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,13 @@ build_commands[cmd] = {
results contains poutine.finding(rule, pkg_purl, {
"path": workflow_path,
"line": step.lines.run,
"job": job_id,
"lotp_tool": cmd,
"_job": job_obj,
"details": sprintf("Detected usage of `%s`", [cmd]),
"event_triggers": workflow_events,
}) if {
[pkg_purl, workflow_path, workflow_events, step] := _steps_after_untrusted_checkout[_]
[pkg_purl, workflow_path, workflow_events, step, job_id, job_obj] := _steps_after_untrusted_checkout[_]
regex.match(
sprintf("([^a-z]|^)(%v)", [concat("|", build_commands[cmd])]),
step.run,
Expand All @@ -112,10 +115,13 @@ results contains poutine.finding(rule, pkg_purl, {
results contains poutine.finding(rule, pkg_purl, {
"path": workflow_path,
"line": step.lines.uses,
"job": job_id,
"lotp_action": step.action,
"_job": job_obj,
"details": sprintf("Detected usage the GitHub Action `%s`", [step.action]),
"event_triggers": workflow_events,
}) if {
[pkg_purl, workflow_path, workflow_events, step] := _steps_after_untrusted_checkout[_]
[pkg_purl, workflow_path, workflow_events, step, job_id, job_obj] := _steps_after_untrusted_checkout[_]
regex.match(
sprintf("([^a-z]|^)(%v)@", [concat("|", build_github_actions[_])]),
step.uses,
Expand All @@ -126,17 +132,20 @@ results contains poutine.finding(rule, pkg_purl, {
results contains poutine.finding(rule, pkg_purl, {
"path": workflow_path,
"line": step.lines.uses,
"job": job_id,
"lotp_action": step.action,
"_job": job_obj,
"details": sprintf("Detected usage of a Local GitHub Action at path: `%s`", [step.action]),
"event_triggers": workflow_events,
}) if {
[pkg_purl, workflow_path, workflow_events, step] := _steps_after_untrusted_checkout[_]
[pkg_purl, workflow_path, workflow_events, step, job_id, job_obj] := _steps_after_untrusted_checkout[_]
regex.match(
`^\./`,
step.action,
)
}

_steps_after_untrusted_checkout contains [pkg.purl, workflow.path, events, s.step] if {
_steps_after_untrusted_checkout contains [pkg.purl, workflow.path, events, s.step, workflow.jobs[s.job_idx].id, workflow.jobs[s.job_idx]] if {
pkg := input.packages[_]
workflow := pkg.github_actions_workflows[_]

Expand All @@ -147,7 +156,7 @@ _steps_after_untrusted_checkout contains [pkg.purl, workflow.path, events, s.ste
s := utils.workflow_steps_after(pr_checkout)[_]
}

_steps_after_untrusted_checkout contains [pkg_purl, workflow.path, events, s.step] if {
_steps_after_untrusted_checkout contains [pkg_purl, workflow.path, events, s.step, workflow.jobs[s.job_idx].id, workflow.jobs[s.job_idx]] if {
[pkg_purl, workflow] := _workflows_runs_from_pr[_]

events := [event | event := workflow.events[i].name]
Expand All @@ -170,6 +179,7 @@ results contains poutine.finding(rule, pkg_purl, {
"job": job,
"step": s.step_idx,
"line": s.step.lines[attr],
"lotp_tool": cmd,
"details": sprintf("Detected usage of `%s`", [cmd]),
}) if {
[pkg_purl, pipeline_path, s, job] := _steps_after_untrusted_checkout_ado[_]
Expand Down Expand Up @@ -213,6 +223,7 @@ results contains poutine.finding(rule, pkg.purl, {
"job": task.name,
"step": step_idx,
"line": step.lines.script,
"lotp_tool": cmd,
"details": sprintf("Detected usage of `%s`", [cmd]),
}) if {
pkg := input.packages[_]
Expand Down
10 changes: 8 additions & 2 deletions results/results.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package results

import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"strconv"

"github.com/rs/zerolog/log"
Expand All @@ -23,6 +23,12 @@ type FindingMeta struct {
Details string `json:"details,omitempty"`
EventTriggers []string `json:"event_triggers,omitempty"`
BlobSHA string `json:"blobsha,omitempty"`

// Structured fields for programmatic access
InjectionSources []string `json:"injection_sources,omitempty"` // Sources confirmed as injected into a sink
LOTPTool string `json:"lotp_tool,omitempty"` // Living Off The Pipeline tool (e.g., npm, pip)
LOTPAction string `json:"lotp_action,omitempty"` // Living Off The Pipeline GitHub Action
ReferencedSecrets []string `json:"referenced_secrets,omitempty"` // Secrets referenced in workflow (excludes GITHUB_TOKEN)
}

type Finding struct {
Expand All @@ -36,7 +42,7 @@ func (f *Finding) GenerateFindingFingerprint() string {
h := sha256.New()
h.Write([]byte(fingerprintString))
fingerprint := h.Sum(nil)
return fmt.Sprintf("%x", fingerprint)
return hex.EncodeToString(fingerprint)
}

func (m *FindingMeta) UnmarshalJSON(data []byte) error {
Expand Down
1 change: 1 addition & 0 deletions scanner/inventory_scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func TestGithubWorkflows(t *testing.T) {
".github/workflows/anchors_job.yml",
".github/workflows/anchors_multiple.yml",
".github/workflows/anchors_with_vulnerability.yml",
".github/workflows/test_new_fields.yml",
})
}

Expand Down
Loading
Loading