Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 177 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,13 @@ jobs:
shell: bash

# =========================================================================
# Stage 2.5: Mutation testing (main push only, informational)
# Sharded: 1 file per job for ~4x speedup (was ~45min sequential).
# Stage 2.5: Mutation testing (main push only, informational, full matrix)
# Sharded: ~1 file per job to keep wall-clock per shard under 60 min.
# Scope expanded 2026-04-26: + src/server/dispatch/ (3 shards),
# + src/routing/classify/ (2 shards). See docs/explanation/.
# =========================================================================
mutants:
name: Mutation Testing (shard ${{ matrix.shard }}/${{ strategy.job-total }})
name: Mutation Testing (shard ${{ matrix.shard }})
if: github.ref == 'refs/heads/main'
needs: [test-ubuntu, test-other]
runs-on: ubuntu-latest
Expand All @@ -453,8 +455,8 @@ jobs:
- shard: "2"
file: src/features/dlp/mod.rs
mutants_shard: ""
# pii.rs depasse le budget 60min sur un seul job — split natif
# cargo-mutants `--shard K/N` sans toucher au code prod.
# pii.rs exceeds 60min budget on a single job — native split via
# cargo-mutants `--shard K/N`, no production code change.
- shard: "3a"
file: src/features/dlp/pii.rs
mutants_shard: "0/2"
Expand All @@ -464,6 +466,30 @@ jobs:
- shard: "4"
file: src/features/dlp/dfa.rs
mutants_shard: ""
# Dispatch pipeline (T-CI-0e, 2026-04-26): mod.rs (575 LoC) and
# retry.rs (478 LoC) are the largest; split into 3 shards. The
# provider_loop.rs / resolver.rs / telemetry.rs siblings are
# bundled into shard 5c via two --file passes.
- shard: "5a"
file: src/server/dispatch/mod.rs
mutants_shard: "0/2"
- shard: "5b"
file: src/server/dispatch/mod.rs
mutants_shard: "1/2"
- shard: "5c"
file: src/server/dispatch/retry.rs
mutants_shard: ""
- shard: "5d"
file: src/server/dispatch/provider_loop.rs
mutants_shard: ""
# Classify engine (T-CI-0e, 2026-04-26): mod.rs (476 LoC) +
# classify.rs (512 LoC) split into 2 shards each by file.
- shard: "6a"
file: src/routing/classify/mod.rs
mutants_shard: ""
- shard: "6b"
file: src/routing/classify/classify.rs
mutants_shard: ""
steps:
- uses: step-security/harden-runner@v2
with:
Expand Down Expand Up @@ -494,6 +520,152 @@ jobs:
path: mutants.out/
if-no-files-found: ignore

# =========================================================================
# Stage 2.6: Mutation testing on PRs (diff-based sampling, 25 min cap)
# Runs only on the files the PR touches, restricted to the curated
# mutation-tested scope (router, dispatch, classify, dlp). Skips when
# no Rust files in scope changed. Always informational — never blocks
# the merge. The full matrix on `main` remains the source of truth.
# =========================================================================
mutants-pr:
name: Mutation Testing (PR diff)
if: github.event_name == 'pull_request'
needs: [test-ubuntu]
runs-on: ubuntu-latest
timeout-minutes: 30
continue-on-error: true
permissions:
contents: read
pull-requests: write # post sticky comment with sampling summary
steps:
- uses: step-security/harden-runner@v2
with:
egress-policy: audit
- uses: actions/checkout@v6
with:
# Need the merge-base to compute `BASE...HEAD` diff.
fetch-depth: 0
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
shared-key: mutants-pr
save-if: false # never write the cache from PR runs
- name: Install cargo-mutants
run: cargo install cargo-mutants@24.11.2 --locked # pinned — update manually
- name: Run mutation testing on changed files
id: mutate
env:
MUTATION_TIMEOUT_SECONDS: "1500"
MUTATION_PER_MUTANT_TIMEOUT: "120"
run: |
set +e
./scripts/mutation-pr.sh "origin/${{ github.event.pull_request.base.ref }}"
rc=$?
set -e
# Exit codes: 0 clean, 1 missed mutants, 2 timed out, 3 nothing in scope.
# All are informational — never fail the job from this step.
echo "Mutation script exit: ${rc}"
exit 0
- name: Upload mutation testing results
if: always()
uses: actions/upload-artifact@v7
with:
name: mutants-pr-results-${{ github.sha }}
path: mutants.out/
if-no-files-found: ignore
- name: Write PR sampling summary to job summary
if: always()
env:
STATUS: ${{ steps.mutate.outputs.status || 'unknown' }}
DURATION: ${{ steps.mutate.outputs.duration_s || 'n/a' }}
TOTAL: ${{ steps.mutate.outputs.total || 'n/a' }}
CAUGHT: ${{ steps.mutate.outputs.caught || 'n/a' }}
MISSED: ${{ steps.mutate.outputs.missed || 'n/a' }}
TIMEOUT_N: ${{ steps.mutate.outputs.timeout || 'n/a' }}
UNVIABLE: ${{ steps.mutate.outputs.unviable || 'n/a' }}
ARTIFACT: mutants-pr-results-${{ github.sha }}
run: |
# Plain ASCII output — backticks elided to dodge shellcheck SC2016
# noise inside the actionlint pipeline.
{
echo "## Mutation testing (PR diff sample)"
echo
echo "Informational only — never blocks merge. Full matrix runs on main."
echo
echo "| Metric | Value |"
echo "|--------|-------|"
echo "| Status | ${STATUS} |"
echo "| Duration | ${DURATION} s |"
echo "| Total mutants | ${TOTAL} |"
echo "| Caught | ${CAUGHT} |"
echo "| Missed | ${MISSED} |"
echo "| Timeout | ${TIMEOUT_N} |"
echo "| Unviable | ${UNVIABLE} |"
echo
echo "Status legend: clean (no survivors), missed (inspect artifact),"
echo "timed-out (25 min cap reached), skipped-* (no in-scope diff)."
echo
echo "Artifact: ${ARTIFACT}."
} >>"${GITHUB_STEP_SUMMARY}"
- name: Comment PR with sampling summary
# Skip on forks (no token write access) and when nothing was sampled.
if: always() && github.event.pull_request.head.repo.full_name == github.repository && steps.mutate.outputs.status != 'skipped-no-rust' && steps.mutate.outputs.status != 'skipped-out-of-scope'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
STATUS: ${{ steps.mutate.outputs.status || 'unknown' }}
DURATION: ${{ steps.mutate.outputs.duration_s || 'n/a' }}
TOTAL: ${{ steps.mutate.outputs.total || 'n/a' }}
CAUGHT: ${{ steps.mutate.outputs.caught || 'n/a' }}
MISSED: ${{ steps.mutate.outputs.missed || 'n/a' }}
TIMEOUT_N: ${{ steps.mutate.outputs.timeout || 'n/a' }}
UNVIABLE: ${{ steps.mutate.outputs.unviable || 'n/a' }}
ARTIFACT: mutants-pr-results-${{ github.sha }}
run: |
# Build the comment body via heredoc-style echos (no single-quoted
# printf format; shellcheck SC2016-friendly). The marker line is
# the discriminator we grep for to upsert prior comments.
MARKER="<!-- mutants-pr-summary -->"
BODY_FILE="$(mktemp)"
{
echo "${MARKER}"
echo "## Mutation testing (PR diff sample)"
echo
echo "Informational — never blocks merge. Full matrix runs on main."
echo
echo "| Metric | Value |"
echo "|--------|-------|"
echo "| Status | ${STATUS} |"
echo "| Duration | ${DURATION} s |"
echo "| Total | ${TOTAL} |"
echo "| Caught | ${CAUGHT} |"
echo "| Missed | ${MISSED} |"
echo "| Timeout | ${TIMEOUT_N} |"
echo "| Unviable | ${UNVIABLE} |"
echo
echo "Legend: clean (no survivors), missed (inspect artifact), timed-out (25 min cap reached)."
echo
echo "Artifact: ${ARTIFACT}."
} >"${BODY_FILE}"

# Look for an existing summary comment to update; otherwise post a
# new one. `gh issue comment` does not support upsert natively, so
# we list comments, filter by marker, and PATCH if found.
COMMENTS_URL="repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments"
EXISTING_ID=$(gh api "${COMMENTS_URL}" --jq "[.[] | select(.body | startswith(\"${MARKER}\"))][0].id" || true)

if [ -n "${EXISTING_ID}" ] && [ "${EXISTING_ID}" != "null" ]; then
# PATCH a comment by ID — body comes from the file via jq -Rs,
# which slurps the whole file as a single JSON string for the
# request body envelope.
jq -Rs '{body: .}' "${BODY_FILE}" \
| gh api --method PATCH "repos/${GITHUB_REPOSITORY}/issues/comments/${EXISTING_ID}" --input - >/dev/null
echo "Updated existing comment ${EXISTING_ID}."
else
gh pr comment "${PR_NUMBER}" --body-file "${BODY_FILE}"
fi
rm -f "${BODY_FILE}"

# =========================================================================
# Stage 3: Cross build (main push + tag push only, not PRs)
# =========================================================================
Expand Down
185 changes: 185 additions & 0 deletions scripts/mutation-pr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env bash
#
# Diff-based mutation testing for pull requests.
#
# Runs cargo-mutants on Rust files touched by the PR (vs. the base ref),
# capping wall-clock time at MUTATION_TIMEOUT_SECONDS so PR CI stays under
# the 30 min budget. Files outside the curated coverage list (router,
# dispatch, classify, dlp) are skipped — they were not part of the audited
# mutation scope.
#
# Usage: scripts/mutation-pr.sh [BASE_REF]
# BASE_REF defaults to origin/main.
#
# Environment:
# MUTATION_TIMEOUT_SECONDS Hard wall-clock cap (default: 1500 = 25 min).
# MUTATION_PER_MUTANT_TIMEOUT cargo-mutants per-mutant timeout (default: 120).
# GITHUB_OUTPUT If set, writes status keys for the workflow.
#
# Exit codes:
# 0 Mutation testing converged within the time budget (clean run).
# 1 cargo-mutants reported surviving mutations or unexpected failure.
# 2 Time budget exhausted before convergence (PR is informational only).
# 3 No Rust files in scope — caller should skip the job.

set -euo pipefail

readonly SCRIPT_NAME="$(basename "$0")"
readonly BASE_REF="${1:-origin/main}"
readonly TIMEOUT_SECONDS="${MUTATION_TIMEOUT_SECONDS:-1500}"
readonly PER_MUTANT_TIMEOUT="${MUTATION_PER_MUTANT_TIMEOUT:-120}"

# Curated scope: files where we have invested in mutation coverage. Limiting
# the PR run to these paths keeps signal high (no false alarms from modules
# that have never been mutation-tested) and runtime bounded.
readonly -a SCOPE_PREFIXES=(
"src/router/"
"src/server/dispatch/"
"src/routing/classify/"
"src/features/dlp/"
)

log() { printf '[%s] %s\n' "${SCRIPT_NAME}" "$*" >&2; }

emit() {
# Append "key=value" to GITHUB_OUTPUT when running inside Actions.
local key="$1" value="$2"
if [[ -n "${GITHUB_OUTPUT:-}" ]]; then
printf '%s=%s\n' "${key}" "${value}" >>"${GITHUB_OUTPUT}"
fi
}

in_scope() {
local file="$1" prefix
for prefix in "${SCOPE_PREFIXES[@]}"; do
[[ "${file}" == "${prefix}"* ]] && return 0
done
return 1
}

main() {
log "Base ref: ${BASE_REF}"
log "Wall-clock cap: ${TIMEOUT_SECONDS}s, per-mutant cap: ${PER_MUTANT_TIMEOUT}s"

# Resolve the diff base. In CI we may need to fetch the base ref first.
if ! git rev-parse --verify "${BASE_REF}" >/dev/null 2>&1; then
log "Cannot resolve ${BASE_REF}; attempting fetch."
git fetch --no-tags --depth=50 origin "${BASE_REF#origin/}" || true
fi

# `name-only` + `--diff-filter=ACMR` keeps Added/Copied/Modified/Renamed,
# excludes Deleted (cargo-mutants can't mutate a file that was removed).
local merge_base
merge_base="$(git merge-base "${BASE_REF}" HEAD 2>/dev/null || echo "${BASE_REF}")"
log "Merge base: ${merge_base}"

# Filter by extension in shell rather than via pathspec — avoids relying
# on `:(glob)` semantics that vary across git versions on different runners.
# Path filter `-- src` keeps git's diff scoped to the source tree; the
# `*.rs` suffix and `src/` prefix tests below ensure arbitrary nesting.
local -a all_changed=()
while IFS= read -r line; do
[[ "${line}" == src/* && "${line}" == *.rs ]] || continue
all_changed+=("${line}")
done < <(git diff --name-only --diff-filter=ACMR "${merge_base}...HEAD" -- src || true)

if [[ "${#all_changed[@]}" -eq 0 ]]; then
log "No Rust source files changed vs. ${BASE_REF}."
emit "status" "skipped-no-rust"
exit 3
fi

log "Changed Rust files (${#all_changed[@]}):"
printf ' - %s\n' "${all_changed[@]}" >&2

local -a in_scope_files=()
local f
for f in "${all_changed[@]}"; do
if in_scope "${f}"; then
in_scope_files+=("${f}")
fi
done

if [[ "${#in_scope_files[@]}" -eq 0 ]]; then
log "No changed files fall within mutation-tested scope."
emit "status" "skipped-out-of-scope"
exit 3
fi

log "Files in mutation scope (${#in_scope_files[@]}):"
printf ' - %s\n' "${in_scope_files[@]}" >&2

# Build --file ARG for each in-scope file.
local -a mutants_args=()
for f in "${in_scope_files[@]}"; do
mutants_args+=(--file "${f}")
done

local start_ts end_ts duration_s exit_code=0
start_ts="$(date +%s)"

# `timeout --foreground` so SIGTERM propagates to cargo-mutants and its
# cargo subprocesses; `--preserve-status` so we can distinguish a real
# cargo-mutants failure from the wall-clock kill.
set +e
timeout --foreground --preserve-status "${TIMEOUT_SECONDS}" \
cargo mutants \
--package grob \
--timeout "${PER_MUTANT_TIMEOUT}" \
-j 2 \
--no-shuffle \
--colors=never \
"${mutants_args[@]}" \
-- --lib
exit_code=$?
set -e

end_ts="$(date +%s)"
duration_s=$((end_ts - start_ts))
log "cargo-mutants duration: ${duration_s}s, exit: ${exit_code}"

# Parse a coarse summary from mutants.out/outcomes.json if produced.
# cargo-mutants writes a single JSON document of shape {"outcomes": [...]}.
# Each outcome carries a `summary` field with values CAUGHT, MISSED,
# TIMEOUT, UNVIABLE, FAILURE, SUCCESS. We project on `summary` rather than
# the top-level shape to stay forward-compatible.
local total=0 caught=0 missed=0 timeout_n=0 unviable=0
if [[ -r mutants.out/outcomes.json ]] && command -v jq >/dev/null 2>&1; then
# `..` walks the entire tree to find every `summary` field, regardless
# of whether outcomes are at the root or nested under `outcomes:`.
total=$(jq '[.. | objects | select(has("summary"))] | length' mutants.out/outcomes.json 2>/dev/null || echo 0)
caught=$(jq '[.. | objects | select(.summary == "CAUGHT")] | length' mutants.out/outcomes.json 2>/dev/null || echo 0)
missed=$(jq '[.. | objects | select(.summary == "MISSED")] | length' mutants.out/outcomes.json 2>/dev/null || echo 0)
timeout_n=$(jq '[.. | objects | select(.summary == "TIMEOUT")] | length' mutants.out/outcomes.json 2>/dev/null || echo 0)
unviable=$(jq '[.. | objects | select(.summary == "UNVIABLE")] | length' mutants.out/outcomes.json 2>/dev/null || echo 0)
fi

emit "duration_s" "${duration_s}"
emit "exit_code" "${exit_code}"
emit "total" "${total}"
emit "caught" "${caught}"
emit "missed" "${missed}"
emit "timeout" "${timeout_n}"
emit "unviable" "${unviable}"

# `timeout` exits 124 on SIGTERM, 137 on SIGKILL when --preserve-status is
# absent; with --preserve-status, exit_code reflects cargo-mutants' last
# state. We can't distinguish reliably — fall back on duration.
if (( duration_s >= TIMEOUT_SECONDS - 5 )); then
log "Wall-clock budget exhausted (${duration_s}s >= ${TIMEOUT_SECONDS}s)."
emit "status" "timed-out"
exit 2
fi

if (( exit_code == 0 )); then
log "Mutation testing clean (caught=${caught}, total=${total})."
emit "status" "clean"
exit 0
fi

log "Mutation testing surfaced ${missed} missed mutant(s)."
emit "status" "missed"
exit 1
}

main "$@"
Loading