diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
index d70fa31..3cbdbbe 100644
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -33,6 +33,11 @@ on:
         required: false
         type: string
         default: ''
+      max-diff-lines:
+        description: 'Auto-filter cap: if the diff exceeds this many lines after removing score-0 files, lowest-risk files are progressively excluded until it fits. Set to 0 to disable. Default: 3000.'
+        required: false
+        type: number
+        default: 3000
       trigger-run-id:
         description: "Workflow run ID from pr-review-trigger.yml — used to download the event context artifact. When set, resolve-context job downloads the artifact and routes to the appropriate job."
         required: false
@@ -339,6 +344,7 @@ jobs:
           additional-prompt: ${{ inputs.additional-prompt }}
           add-prompt-files: ${{ inputs.add-prompt-files }}
           exclude-paths: ${{ inputs.exclude-paths }}
+          max-diff-lines: ${{ inputs.max-diff-lines }}
           model: ${{ inputs.model }}
           github-token: ${{ env.GITHUB_APP_TOKEN || github.token }}
           anthropic-api-key: ${{ env.ANTHROPIC_API_KEY_FROM_SSM || secrets.ANTHROPIC_API_KEY }}
diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml
index 1b25776..39aeaa8 100644
--- a/.github/workflows/test-e2e.yml
+++ b/.github/workflows/test-e2e.yml
@@ -6,6 +6,12 @@ on:
     branches: [main]
   push:
     branches: [main]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to run mention-reply E2E tests against'
+        required: true
+        type: string
 
 permissions:
   contents: read
@@ -188,13 +194,13 @@ jobs:
   test-mention-reply-toplevel:
     name: Mention Reply (Top-Level) E2E Test
     runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request'
+    if: github.event_name == 'workflow_dispatch'
     permissions:
       contents: read
       id-token: write
       issues: write
     env:
-      TEST_PR_NUMBER: ${{ github.event.pull_request.number }}
+      TEST_PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
     steps:
       - name: Check if fork PR
         id: fork-check
@@ -233,11 +239,26 @@ jobs:
         if: steps.fork-check.outputs.is_fork != 'true'
         uses: docker/cagent-action/setup-credentials@2a43a3882401f45e3114df7f6d66eca184993a90 # v1.5.2
 
+      - name: Create anchor issue comment on current PR
+        if: steps.fork-check.outputs.is_fork != 'true'
+        id: create-anchor
+        env:
+          GH_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+        run: |
+          COMMENT_ID=$(gh api repos/docker/cagent-action/issues/$TEST_PR_NUMBER/comments \
+            --method POST \
+            --raw-field body="@docker-agent this is an automated e2e test — please reply with a brief acknowledgement." \
+            --jq .id)
+          echo "Created test anchor comment ID: $COMMENT_ID"
+          echo "test_comment_id=$COMMENT_ID" >> $GITHUB_OUTPUT
+
       - name: Write synthetic issue_comment event
         if: steps.fork-check.outputs.is_fork != 'true'
         run: |
+          COMMENT_ID="${{ steps.create-anchor.outputs.test_comment_id }}"
           jq -n \
             --arg actor "${{ github.actor }}" \
+            --argjson comment_id "$COMMENT_ID" \
             --argjson pr_number "$TEST_PR_NUMBER" \
             '{
               "action": "created",
@@ -246,7 +267,7 @@ jobs:
                 "pull_request": { "url": ("https://api.github.com/repos/docker/cagent-action/pulls/" + ($pr_number | tostring)) }
               },
               "comment": {
-                "id": 9999999901,
+                "id": $comment_id,
                 "body": "@docker-agent this is an automated e2e test — please reply with a brief acknowledgement.",
                 "user": { "login": $actor, "type": "User" }
               },
@@ -260,13 +281,13 @@ jobs:
       - name: Run mention-reply handler
         if: steps.fork-check.outputs.is_fork != 'true'
         id: mention-handler
-        uses: ./.github/actions/mention-reply
         env:
-          GITHUB_EVENT_PATH: /tmp/test-event-toplevel.json
-          GITHUB_EVENT_NAME: issue_comment
-        with:
-          github-token: ${{ env.GITHUB_APP_TOKEN || github.token }}
-          org-membership-token: ${{ env.ORG_MEMBERSHIP_TOKEN || github.token }}
+          INPUT_GITHUB-TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          INPUT_ORG-MEMBERSHIP-TOKEN: ${{ env.ORG_MEMBERSHIP_TOKEN || github.token }}
+        run: |
+          export GITHUB_EVENT_PATH=/tmp/test-event-toplevel.json
+          export GITHUB_EVENT_NAME=issue_comment
+          node "$GITHUB_WORKSPACE/dist/mention-reply.js"
 
       - name: Assert should-reply output
         if: steps.fork-check.outputs.is_fork != 'true'
@@ -312,10 +333,11 @@ jobs:
           echo "✅ Reply posted successfully ($FOUND comment(s) found)"
 
       - name: Cleanup test comments
-        if: always() && steps.fork-check.outputs.is_fork != 'true' && steps.mention-handler.outputs.should-reply == 'true'
+        if: always() && steps.fork-check.outputs.is_fork != 'true'
         continue-on-error: true
         env:
           GH_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          ANCHOR_ID: ${{ steps.create-anchor.outputs.test_comment_id }}
         run: |
           # Delete any test reply comments posted in the last 5 minutes
           gh api repos/docker/cagent-action/issues/$TEST_PR_NUMBER/comments \
@@ -324,17 +346,22 @@ jobs:
             gh api "repos/docker/cagent-action/issues/comments/$comment_id" -X DELETE || true
             echo "Deleted comment $comment_id"
           done
+          # Delete the anchor comment itself
+          if [ -n "$ANCHOR_ID" ]; then
+            gh api "repos/docker/cagent-action/issues/comments/$ANCHOR_ID" -X DELETE || true
+            echo "Deleted anchor comment $ANCHOR_ID"
+          fi
 
   test-mention-reply-inline:
     name: Mention Reply (Inline) E2E Test
     runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request'
+    if: github.event_name == 'workflow_dispatch'
     permissions:
       contents: read
       id-token: write
       pull-requests: write
     env:
-      TEST_PR_NUMBER: ${{ github.event.pull_request.number }}
+      TEST_PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
     steps:
       - name: Check if fork PR
         id: fork-check
@@ -427,13 +454,13 @@ jobs:
       - name: Run mention-reply handler
         if: steps.fork-check.outputs.is_fork != 'true'
         id: mention-handler
-        uses: ./.github/actions/mention-reply
         env:
-          GITHUB_EVENT_PATH: /tmp/test-event-inline.json
-          GITHUB_EVENT_NAME: pull_request_review_comment
-        with:
-          github-token: ${{ env.GITHUB_APP_TOKEN || github.token }}
-          org-membership-token: ${{ env.ORG_MEMBERSHIP_TOKEN || github.token }}
+          INPUT_GITHUB-TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          INPUT_ORG-MEMBERSHIP-TOKEN: ${{ env.ORG_MEMBERSHIP_TOKEN || github.token }}
+        run: |
+          export GITHUB_EVENT_PATH=/tmp/test-event-inline.json
+          export GITHUB_EVENT_NAME=pull_request_review_comment
+          node "$GITHUB_WORKSPACE/dist/mention-reply.js"
 
       - name: Assert should-reply output
         if: steps.fork-check.outputs.is_fork != 'true'
diff --git a/.npmrc b/.npmrc
new file mode 100644
index 0000000..7426f50
--- /dev/null
+++ b/.npmrc
@@ -0,0 +1,21 @@
+# pnpm safe defaults — registry & TLS settings
+#
+# In pnpm v10+, most settings live in pnpm-workspace.yaml.
+# Only auth, registry, SSL, and proxy settings belong in .npmrc.
+#
+# Copy this file alongside pnpm-workspace.yaml to your project root.
+# https://github.com/docker-security/safe-defaults
+
+# --- TLS & Registry Security ---
+
+# Require valid TLS certificates when connecting to registries.
+# Never disable this. If you need a custom CA, use `cafile=` instead.
+strict-ssl=true
+
+# Make the default registry explicit and auditable.
+registry=https://registry.npmjs.org/
+
+# --- Optional: scoped / private registries ---
+
+# @myorg:registry=https://npm.pkg.github.com
+# //npm.pkg.github.com/:_authToken=${NPM_TOKEN}
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 64df508..ddb6232 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -184,7 +184,7 @@ The action runs untrusted input (PR titles, bodies, comments, diffs) through an
 
 ```bash
 # Install (uses pnpm via Corepack, see packageManager in package.json)
-pnpm install
+pnpm install --frozen-lockfile
 
 # Build TypeScript bundles → dist/
 pnpm build
diff --git a/package.json b/package.json
index 5087ff1..606cc4e 100644
--- a/package.json
+++ b/package.json
@@ -32,7 +32,7 @@
     "typescript": "5.9.3",
     "vitest": "4.0.18"
   },
-  "packageManager": "pnpm@10.19.0",
+  "packageManager": "pnpm@10.26.0",
   "pnpm": {
     "onlyBuiltDependencies": [
       "esbuild"
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
new file mode 100644
index 0000000..3b41dff
--- /dev/null
+++ b/pnpm-workspace.yaml
@@ -0,0 +1,82 @@
+# Not using workspaces — this file exists solely for pnpm-specific settings.
+# minimumReleaseAge and minimumReleaseAgeExclude are pnpm-workspace.yaml settings
+# in pnpm 10.x, not .npmrc settings.
+packages: []
+
+# pnpm safe defaults for developer laptops and projects
+#
+# Requires pnpm >= 10.26.0 (for blockExoticSubdeps, trustPolicy)
+#
+# https://github.com/docker-security/safe-defaults
+
+# --- Supply Chain Protection ---
+
+# Refuse to install packages published less than 3 days ago.
+# Most supply chain attacks are discovered within 24-72 hours.
+# Value is in minutes: 10080 = 7 days.
+minimumReleaseAge: 10080
+
+# Packages exempt from the age gate (e.g. urgent security patches).
+# Supports exact names, scopes (@myorg/*), and pinned versions (pkg@1.2.3).
+# minimumReleaseAgeExclude:
+#   - "@myorg/*"
+
+# Block transitive dependencies from using exotic sources (git repos,
+# tarball URLs). Only direct dependencies may use these.
+# Default since v10.26.0.
+blockExoticSubdeps: true
+
+# Fail if a package's trust level has decreased compared to previous
+# releases (e.g. lost provenance or trusted-publisher status).
+trustPolicy: no-downgrade
+
+# Packages exempt from trust-policy checks.
+# trustPolicyExclude:
+#   - "some-package@1.2.3"
+
+# --- Build Script Control ---
+
+# Fail if any dependency has unreviewed build scripts.
+# Default since v10.3.0. Forces explicit decisions about which
+# packages may run preinstall/install/postinstall scripts.
+strictDepBuilds: true
+
+# Allowlist of packages permitted to run build scripts.
+# Add packages here as needed (e.g. native addons).
+allowBuilds: {}
+  # esbuild: true
+  # @swc/core: true
+  # sharp: true
+
+# --- Dependency Resolution ---
+
+# Resolve subdependencies from versions published close to the direct
+# dependency's publish date. Reduces subdependency hijacking risk.
+resolutionMode: time-based
+
+# --- Integrity & Reproducibility ---
+
+# Save exact versions (1.2.3) instead of semver ranges (^1.2.3).
+# Prevents silent minor/patch drift between installs.
+# Pair with Dependabot or Renovate for controlled updates.
+savePrefix: ""
+
+# Verify store integrity on every install.
+# Default true — made explicit.
+verifyStoreIntegrity: true
+
+# --- Peer Dependencies ---
+
+# Treat unmet peer dependencies as errors instead of warnings.
+strictPeerDependencies: true
+
+# Exempt first-party Docker packages from the age gate so internal releases
+# aren't blocked.
+minimumReleaseAgeExclude:
+  - '@docker/*'
+
+# --- Optional: stricter settings (uncomment to enable) ---
+
+# Treat engine version mismatches as errors.
+# Useful when your pnpm-workspace.yaml or package.json declares engines.
+# engineStrict: true
\ No newline at end of file
diff --git a/review-pr/action.yml b/review-pr/action.yml
index 391762b..51ee263 100644
--- a/review-pr/action.yml
+++ b/review-pr/action.yml
@@ -63,6 +63,10 @@ inputs:
     description: 'Newline-separated list of path prefixes to strip from the diff before chunking. Files matching these prefixes are excluded from review entirely.'
     required: false
     default: ''
+  max-diff-lines:
+    description: "Auto-filter cap: if the diff exceeds this many lines after removing score-0 files, lowest-risk files are progressively excluded until it fits. Set to 0 to disable. Default: 3000."
+    required: false
+    default: "3000"
 
 outputs:
   exit-code:
@@ -264,6 +268,26 @@ runs:
       run: |
         node "$ACTION_PATH/../dist/filter-diff.js" pr.diff "$EXCLUDE_PATHS"
 
+    - name: Score file risk
+      if: hashFiles('pr.diff') != ''
+      shell: bash
+      env:
+        ACTION_PATH: ${{ github.action_path }}
+        EXCLUDE_PATHS: ${{ inputs.exclude-paths }}
+      run: |
+        set -euo pipefail
+        node "$ACTION_PATH/../dist/score-risk.js" pr.diff "$EXCLUDE_PATHS"
+        echo "✅ File risk scores: $(jq -c . /tmp/file_risk_scores.json)"
+
+    - name: Auto-filter low-risk files
+      if: hashFiles('pr.diff') != '' && steps.lock-check.outputs.skip != 'true'
+      shell: bash
+      env:
+        ACTION_PATH: ${{ github.action_path }}
+        MAX_DIFF_LINES: ${{ inputs.max-diff-lines }}
+      run: |
+        node "$ACTION_PATH/../dist/auto-filter-diff.js" pr.diff "$MAX_DIFF_LINES"
+
     - name: Split diff into chunks
       if: hashFiles('pr.diff') != ''
       id: chunk-diff
@@ -335,17 +359,6 @@ runs:
           done
         fi
 
-    - name: Score file risk
-      if: hashFiles('pr.diff') != ''
-      shell: bash
-      env:
-        ACTION_PATH: ${{ github.action_path }}
-        EXCLUDE_PATHS: ${{ inputs.exclude-paths }}
-      run: |
-        set -euo pipefail
-        node "$ACTION_PATH/../dist/score-risk.js" pr.diff "$EXCLUDE_PATHS"
-        echo "✅ File risk scores: $(jq -c . /tmp/file_risk_scores.json)"
-
     - name: Generate file history
       if: hashFiles('changed_files.txt') != ''
       shell: bash
diff --git a/review-pr/agents/evals/auto-filter-integration-1.json b/review-pr/agents/evals/auto-filter-integration-1.json
new file mode 100644
index 0000000..ce51fe4
--- /dev/null
+++ b/review-pr/agents/evals/auto-filter-integration-1.json
@@ -0,0 +1,26 @@
+{
+  "id": "a1f2e3d4-auto-filter-integration-001",
+  "title": "Auto-filter integration - score-0 test files excluded, SQL injection caught in remaining file (run 1)",
+  "evals": {
+    "setup": "apk add --no-cache github-cli jq nodejs && gen_rs() { local fp=$1; printf 'diff --git a/%s b/%s\\nindex abc1234..def5678 100644\\n--- a/%s\\n+++ b/%s\\n@@ -1,2 +1,52 @@\\n // Rust integration test\\n \\n' \"$fp\" \"$fp\" \"$fp\" \"$fp\"; i=1; while [ $i -le 50 ]; do printf '+// test line %d\\n' $i; i=$((i+1)); done; } && { gen_rs crates/vm/tests/integration/virtiofs_bench.rs; gen_rs crates/vm/tests/integration/cpu_test.rs; gen_rs crates/vm/tests/integration/memory_test.rs; gen_rs crates/vm/tests/integration/io_bench.rs; gen_rs crates/vm/tests/integration/network_test.rs; gen_rs crates/vm/tests/integration/block_dev_test.rs; gen_rs crates/vm/tests/integration/hypervisor_bench.rs; gen_rs crates/vm/tests/integration/vhost_test.rs; gen_rs crates/vm/tests/integration/virtio_test.rs; gen_rs crates/vm/tests/integration/kvm_bench.rs; gen_rs crates/vm/tests/integration/balloon_test.rs; gen_rs crates/vm/tests/integration/rng_bench.rs; gen_rs crates/vm/tests/integration/pci_test.rs; gen_rs crates/vm/tests/integration/vsock_test.rs; gen_rs crates/vm/tests/integration/console_test.rs; cat << 'GODBEOF'\ndiff --git a/pkg/storage/db.go b/pkg/storage/db.go\nindex abc1234..def5678 100644\n--- a/pkg/storage/db.go\n+++ b/pkg/storage/db.go\n@@ -1,5 +1,110 @@\n package storage\n\n+import (\n+\t\"database/sql\"\n+\t\"fmt\"\n+)\n+\n+// User represents a database user record.\n+type User struct {\n+\tID    int\n+\tName  string\n+\tEmail string\n+}\n+\n+// db filler line 1\n+// db filler line 2\n+// db filler line 3\n+// db filler line 4\n+// db filler line 5\n+// db filler line 6\n+// db filler line 7\n+// db filler line 8\n+// db filler line 9\n+// db filler line 10\n+// db filler line 11\n+// db filler line 12\n+// db filler line 13\n+// db filler line 14\n+// db filler line 15\n+// db filler line 16\n+// db filler line 17\n+// db filler line 18\n+// db filler line 19\n+// db filler line 20\n+// db filler line 21\n+// db filler line 22\n+// db filler line 23\n+// db filler line 24\n+// db filler line 25\n+// db filler line 26\n+// db filler line 27\n+// db filler line 28\n+// db filler line 29\n+// db filler line 30\n+// db filler line 31\n+// db filler line 32\n+// db filler line 33\n+// db filler line 34\n+// db filler line 35\n+// db filler line 36\n+// db filler line 37\n+// db filler line 38\n+// db filler line 39\n+// db filler line 40\n+// db filler line 41\n+// db filler line 42\n+// db filler line 43\n+// db filler line 44\n+// db filler line 45\n+// db filler line 46\n+// db filler line 47\n+// db filler line 48\n+// db filler line 49\n+// db filler line 50\n+// db filler line 51\n+// db filler line 52\n+// db filler line 53\n+// db filler line 54\n+// db filler line 55\n+// db filler line 56\n+// db filler line 57\n+// db filler line 58\n+// db filler line 59\n+// db filler line 60\n+// db filler line 61\n+// db filler line 62\n+// db filler line 63\n+// db filler line 64\n+// db filler line 65\n+// db filler line 66\n+// db filler line 67\n+// db filler line 68\n+// db filler line 69\n+// db filler line 70\n+// db filler line 71\n+// db filler line 72\n+// db filler line 73\n+// db filler line 74\n+// db filler line 75\n+// db filler line 76\n+// db filler line 77\n+// db filler line 78\n+// db filler line 79\n+// db filler line 80\n+\n+// GetUser fetches a user by ID from the database.\n+func GetUser(db *sql.DB, id string) (*User, error) {\n+\tq := fmt.Sprintf(\"SELECT * FROM users WHERE id = '%s'\", id)\n+\trow := db.QueryRow(q)\n+\tvar u User\n+\tif err := row.Scan(&u.ID, &u.Name, &u.Email); err != nil {\n+\t\treturn nil, fmt.Errorf(\"scan user: %w\", err)\n+\t}\n+\treturn &u, nil\n+}\nGODBEOF\ncat << 'GOHANDLEREOF'\ndiff --git a/pkg/api/handler.go b/pkg/api/handler.go\nindex abc1234..def5678 100644\n--- a/pkg/api/handler.go\n+++ b/pkg/api/handler.go\n@@ -1,3 +1,43 @@\n package api\n\n+import (\n+\t\"encoding/json\"\n+\t\"fmt\"\n+\t\"net/http\"\n+)\n+\n+// handler filler line 1\n+// handler filler line 2\n+// handler filler line 3\n+// handler filler line 4\n+// handler filler line 5\n+// handler filler line 6\n+// handler filler line 7\n+// handler filler line 8\n+// handler filler line 9\n+// handler filler line 10\n+// handler filler line 11\n+// handler filler line 12\n+// handler filler line 13\n+// handler filler line 14\n+// handler filler line 15\n+// handler filler line 16\n+// handler filler line 17\n+// handler filler line 18\n+// handler filler line 19\n+// handler filler line 20\n+// handler filler line 21\n+// handler filler line 22\n+// handler filler line 23\n+// handler filler line 24\n+// handler filler line 25\n+// handler filler line 26\n+// handler filler line 27\n+// handler filler line 28\n+// handler filler line 29\n+// handler filler line 30\n+\n+// GetUserHandler handles GET /users/{id} requests.\n+func GetUserHandler(w http.ResponseWriter, r *http.Request) {\n+\tuserID := r.URL.Query().Get(\"id\")\n+\tw.Header().Set(\"Content-Type\", \"application/json\")\n+\tif err := json.NewEncoder(w).Encode(map[string]string{\"id\": userID}); err != nil {\n+\t\thttp.Error(w, fmt.Sprintf(\"encode error: %v\", err), http.StatusInternalServerError)\n+\t\treturn\n+\t}\n+}\nGOHANDLEREOF\n} > pr.diff && echo \"Full diff: $(wc -l < pr.diff) lines\" && node dist/score-risk.js pr.diff \"\" >/dev/null 2>&1 && node dist/auto-filter-diff.js pr.diff 3000 && echo \"Filtered diff: $(wc -l < pr.diff) lines\" && CHUNK=1 && CHUNK_LINES=0 && : > /tmp/drafter_chunk_1.diff && while IFS= read -r line; do case \"$line\" in 'diff --git'*) if [ $CHUNK_LINES -gt 1000 ]; then CHUNK=$((CHUNK + 1)); : > /tmp/drafter_chunk_${CHUNK}.diff; CHUNK_LINES=0; fi ;; esac; echo \"$line\" >> /tmp/drafter_chunk_${CHUNK}.diff; CHUNK_LINES=$((CHUNK_LINES + 1)); done < pr.diff && printf 'pkg/storage/db.go\\npkg/api/handler.go\\n' > changed_files.txt && jq -n '{\"title\":\"Add storage layer and API handler\",\"author\":{\"login\":\"testuser\"},\"body\":\"Adds user storage with DB queries and HTTP handler.\",\"baseRefName\":\"main\",\"headRefName\":\"feature/storage\"}' > pr_metadata.json && mkdir -p /tmp/refs && cp review-pr/agents/refs/*.md /tmp/refs/ && export GITHUB_ACTIONS=true && echo 'Setup complete'",
+    "relevance": [
+      "The agent ran 'echo $GITHUB_ACTIONS' before performing the review to detect the output mode",
+      "The agent detected GITHUB_ACTIONS=true and operated in GitHub posting mode",
+      "The agent used the pre-split chunk files at /tmp/drafter_chunk_*.diff rather than re-reading pr.diff directly",
+      "At least one finding flags the SQL injection vulnerability in pkg/storage/db.go where user input is interpolated directly into a SQL query string via fmt.Sprintf",
+      "The SQL injection finding has severity 'high'",
+      "The agent did not post any review comments on files under crates/vm/tests/integration/ or any other auto-excluded Rust test paths"
+    ]
+  },
+  "messages": [
+    {
+      "message": {
+        "agentName": "",
+        "message": {
+          "role": "user",
+          "content": "Review the following PR.\n\n## PR Information\n- **Title**: Add storage layer and API handler\n- **Author**: testuser\n- **Branch**: feature/storage → main\n- **Files Changed**: 2\n\n## PR Description\nAdds user storage with DB queries and HTTP handler.\n\n## Changed Files\n\npkg/storage/db.go\npkg/api/handler.go\n\n---\n\n## Instructions\n\nExecute the review pipeline:\n\n1. **Gather**: Read the pre-fetched `pr.diff` file. If missing, run `gh pr diff` (use the full URL, not just the number)\n2. **Draft**: Delegate to `drafter` agent to generate bug hypotheses\n3. **Verify**: For each hypothesis, delegate to `verifier` agent\n4. **Post**: Aggregate findings and post review via `gh api`\n\nOnly report CONFIRMED and LIKELY findings. Always post as COMMENT (never APPROVE or REQUEST_CHANGES)."
+        }
+      }
+    }
+  ]
+}
diff --git a/review-pr/mention-reply/action.yml b/review-pr/mention-reply/action.yml
index 3ee4de0..65d5387 100644
--- a/review-pr/mention-reply/action.yml
+++ b/review-pr/mention-reply/action.yml
@@ -113,7 +113,7 @@ runs:
         GH_TOKEN: ${{ inputs.github-token }}
         GITHUB_TOKEN: ${{ inputs.github-token }}
         SECRETS_DETECTED: ${{ steps.run-mention-reply.outputs.secrets-detected }}
-      run: node "$ACTION_PATH/dist/post-mention-reply.js"
+      run: node "$ACTION_PATH/../../dist/post-mention-reply.js"
     - name: Save reviewer memory
       if: always() && steps.run-mention-reply.outcome != 'skipped'
       continue-on-error: true
diff --git a/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts
new file mode 100644
index 0000000..874cd16
--- /dev/null
+++ b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts
@@ -0,0 +1,276 @@
+/**
+ * Unit tests for src/auto-filter-diff.
+ *
+ * Tests cover both phases of the auto-filter logic:
+ *   Phase 1 — auto-exclude score-0 files
+ *   Phase 2 — progressive cap (lowest-risk files removed first)
+ *
+ * Plus edge cases: empty diff, all excluded, unknown files, disabled cap.
+ */
+import { describe, expect, it } from 'vitest';
+import { autoFilterDiff } from '../auto-filter-diff.js';
+
+// ── Fixture helpers ───────────────────────────────────────────────────────────
+
+/**
+ * Build a minimal but structurally valid diff section for `filePath`.
+ *
+ * The resulting string ends with '\n' so that concatenated sections assemble
+ * correctly (each section's trailing newline becomes the separator before the
+ * next 'diff --git' header).
+ *
+ * `extraAddedLines` controls how many '+' lines are included — used by
+ * progressive-cap tests that need predictable section sizes.
+ */
+function makeDiff(filePath: string, extraAddedLines = 5): string {
+  return [
+    `diff --git a/${filePath} b/${filePath}`,
+    'index abc..def 100644',
+    `--- a/${filePath}`,
+    `+++ b/${filePath}`,
+    '@@ -1,1 +1,2 @@',
+    ' existing',
+    ...Array.from({ length: extraAddedLines }, (_, i) => `+line${i}`),
+    '',
+  ].join('\n');
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// Phase 1 — auto-exclude score-0 files
+// ═════════════════════════════════════════════════════════════════════════════
+
+describe('autoFilterDiff — phase 1: auto-exclude score-0 files', () => {
+  it('removes score-0 files from the filtered diff', () => {
+    const diff = makeDiff('src/gen.pb.go') + makeDiff('src/auth/handler.go');
+    const riskScores = { 'src/gen.pb.go': 0, 'src/auth/handler.go': 2 };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.autoExcludedFiles).toEqual(['src/gen.pb.go']);
+    expect(result.filtered).not.toContain('src/gen.pb.go');
+    expect(result.filtered).toContain('src/auth/handler.go');
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('keeps score > 0 files in the filtered diff', () => {
+    const diff = makeDiff('src/auth/handler.go');
+    const riskScores = { 'src/auth/handler.go': 2 };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.autoExcludedFiles).toHaveLength(0);
+    expect(result.filtered).toContain('src/auth/handler.go');
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('removes multiple score-0 files in a single pass', () => {
+    const diff =
+      makeDiff('backend/gen/foo.pb.go') +
+      makeDiff('backend/gen/bar.pb.go') +
+      makeDiff('src/auth/handler.go');
+    const riskScores = {
+      'backend/gen/foo.pb.go': 0,
+      'backend/gen/bar.pb.go': 0,
+      'src/auth/handler.go': 3,
+    };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.autoExcludedFiles).toHaveLength(2);
+    expect(result.autoExcludedFiles).toContain('backend/gen/foo.pb.go');
+    expect(result.autoExcludedFiles).toContain('backend/gen/bar.pb.go');
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('keeps files not present in riskScores (unknown = needs review)', () => {
+    const diff = makeDiff('src/unknown.go');
+    // riskScores intentionally empty — file is unknown
+    const result = autoFilterDiff(diff, {}, 0);
+
+    expect(result.autoExcludedFiles).toHaveLength(0);
+    expect(result.filtered).toContain('src/unknown.go');
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('preserves unknown files even when other files are score-0', () => {
+    const diff = makeDiff('src/gen.pb.go') + makeDiff('src/unlisted.go');
+    const riskScores = { 'src/gen.pb.go': 0 };
+    // 'src/unlisted.go' not in riskScores → kept
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.autoExcludedFiles).toEqual(['src/gen.pb.go']);
+    expect(result.filtered).toContain('src/unlisted.go');
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('all files excluded returns empty string', () => {
+    const diff = makeDiff('src/a.go') + makeDiff('src/b.go');
+    const riskScores = { 'src/a.go': 0, 'src/b.go': 0 };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.filtered).toBe('');
+    expect(result.remainingFiles).toBe(0);
+    expect(result.autoExcludedFiles).toHaveLength(2);
+  });
+
+  it('reports correct originalLines', () => {
+    const diff = makeDiff('src/a.go') + makeDiff('src/b.go');
+    const riskScores = { 'src/a.go': 0 };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    // originalLines must be > 0 and equal to the line count of the full diff
+    expect(result.originalLines).toBeGreaterThan(0);
+    expect(result.originalLines).toBe(diff.split('\n').length);
+  });
+});
+
+// ═════════════════════════════════════════════════════════════════════════════
+// Phase 2 — progressive cap
+// ═════════════════════════════════════════════════════════════════════════════
+
+describe('autoFilterDiff — phase 2: progressive cap', () => {
+  it('removes lowest-score file first when diff exceeds maxDiffLines', () => {
+    // Two large sections; make the cap smaller than their combined line count
+    // but larger than the higher-risk section alone.
+    const highRisk = makeDiff('src/auth/handler.go', 80); // score 3, ~87 lines
+    const lowRisk = makeDiff('src/utils/helper.go', 80); // score 1, ~87 lines
+    const diff = highRisk + lowRisk;
+    // Combined: ~174 lines. Cap: 100 → lowRisk should be removed (87 lines gone → ~87 remain < 100).
+    const riskScores = { 'src/auth/handler.go': 3, 'src/utils/helper.go': 1 };
+
+    const result = autoFilterDiff(diff, riskScores, 100);
+
+    expect(result.progressivelyExcludedFiles).toContain('src/utils/helper.go');
+    expect(result.progressivelyExcludedFiles).not.toContain('src/auth/handler.go');
+    expect(result.filtered).toContain('src/auth/handler.go');
+    expect(result.filtered).not.toContain('src/utils/helper.go');
+  });
+
+  it('keeps at least 1 file even when all are low-score and cap is tiny', () => {
+    const diff = makeDiff('src/a.go', 50) + makeDiff('src/b.go', 50) + makeDiff('src/c.go', 50);
+    const riskScores = { 'src/a.go': 1, 'src/b.go': 1, 'src/c.go': 1 };
+    // Cap of 1 line — far below any single section; should still keep exactly 1 file.
+    const result = autoFilterDiff(diff, riskScores, 1);
+
+    expect(result.remainingFiles).toBeGreaterThanOrEqual(1);
+  });
+
+  it('is disabled when maxDiffLines is 0', () => {
+    const diff = makeDiff('src/a.go', 80) + makeDiff('src/b.go', 80);
+    const riskScores = { 'src/a.go': 1, 'src/b.go': 2 };
+
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+    expect(result.remainingFiles).toBe(2);
+  });
+
+  it('does not remove files when diff is already under maxDiffLines', () => {
+    const diff = makeDiff('src/a.go', 5); // ~12 lines
+    const riskScores = { 'src/a.go': 1 };
+
+    const result = autoFilterDiff(diff, riskScores, 10000);
+
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+    expect(result.remainingFiles).toBe(1);
+  });
+
+  it('removes files in ascending risk-score order (lowest first)', () => {
+    const diff =
+      makeDiff('src/low.go', 30) + // score 1
+      makeDiff('src/mid.go', 30) + // score 2
+      makeDiff('src/high.go', 30); // score 4
+    // Combined: ~99 lines. Remove 1 to get under ~70.
+    const riskScores = { 'src/low.go': 1, 'src/mid.go': 2, 'src/high.go': 4 };
+
+    const result = autoFilterDiff(diff, riskScores, 70);
+
+    // Should remove 'src/low.go' (lowest score) first
+    expect(result.progressivelyExcludedFiles[0]).toBe('src/low.go');
+  });
+
+  it('treats unknown files (not in riskScores) as high-priority to keep', () => {
+    const diff =
+      makeDiff('src/scored.go', 50) + // score 1
+      makeDiff('src/unknown.go', 50); // not in riskScores
+    const riskScores = { 'src/scored.go': 1 };
+    // Cap forces removal of one file — scored.go (score 1) should go before unknown
+    const result = autoFilterDiff(diff, riskScores, 60);
+
+    // scored.go should be removed, unknown.go kept (Infinity sort key)
+    expect(result.progressivelyExcludedFiles).toContain('src/scored.go');
+    expect(result.filtered).toContain('src/unknown.go');
+  });
+
+  it('keeps ALL unknown files even when multiple exist and diff exceeds cap', () => {
+    // Three unknown files (not in riskScores) plus one scored file.
+    // Phase 2 must never remove unknown files — only the scored one is a candidate.
+    const diff =
+      makeDiff('src/scored.go', 50) + // score 1 — the only removal candidate
+      makeDiff('src/unknown1.go', 50) + // not in riskScores
+      makeDiff('src/unknown2.go', 50) + // not in riskScores
+      makeDiff('src/unknown3.go', 50); // not in riskScores
+    const riskScores = { 'src/scored.go': 1 };
+    // Combined is well over 100; cap set low enough to trigger phase 2.
+    const result = autoFilterDiff(diff, riskScores, 100);
+
+    // Only the scored file may be removed; all three unknown files must be kept.
+    expect(result.progressivelyExcludedFiles).not.toContain('src/unknown1.go');
+    expect(result.progressivelyExcludedFiles).not.toContain('src/unknown2.go');
+    expect(result.progressivelyExcludedFiles).not.toContain('src/unknown3.go');
+    expect(result.filtered).toContain('src/unknown1.go');
+    expect(result.filtered).toContain('src/unknown2.go');
+    expect(result.filtered).toContain('src/unknown3.go');
+  });
+
+  it('does not run phase 2 on a single-file diff (nothing to remove)', () => {
+    const diff = makeDiff('src/single.go', 200); // over any cap
+    const riskScores = { 'src/single.go': 1 };
+
+    const result = autoFilterDiff(diff, riskScores, 1);
+
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+    expect(result.remainingFiles).toBe(1);
+  });
+});
+
+// ═════════════════════════════════════════════════════════════════════════════
+// Edge cases
+// ═════════════════════════════════════════════════════════════════════════════
+
+describe('autoFilterDiff — edge cases', () => {
+  it('empty diff returns a clean zero result', () => {
+    const result = autoFilterDiff('', {}, 3000);
+
+    expect(result.filtered).toBe('');
+    expect(result.autoExcludedFiles).toHaveLength(0);
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+    expect(result.remainingFiles).toBe(0);
+    expect(result.remainingLines).toBe(0);
+    expect(result.originalLines).toBe(0);
+  });
+
+  it('diff with no matching risk scores is passed through unchanged', () => {
+    const diff = makeDiff('src/a.go') + makeDiff('src/b.go');
+    const result = autoFilterDiff(diff, {}, 0);
+
+    expect(result.filtered).toBe(diff);
+    expect(result.autoExcludedFiles).toHaveLength(0);
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+    expect(result.remainingFiles).toBe(2);
+  });
+
+  it('remainingLines matches filtered diff line count', () => {
+    const diff = makeDiff('src/gen.go') + makeDiff('src/auth/handler.go');
+    const riskScores = { 'src/gen.go': 0, 'src/auth/handler.go': 2 };
+    const result = autoFilterDiff(diff, riskScores, 0);
+
+    expect(result.remainingLines).toBe(result.filtered.split('\n').length);
+  });
+
+  it('autoExcludedFiles and progressivelyExcludedFiles are empty when nothing removed', () => {
+    const diff = makeDiff('src/auth/handler.go');
+    const riskScores = { 'src/auth/handler.go': 2 };
+    const result = autoFilterDiff(diff, riskScores, 10000);
+
+    expect(result.autoExcludedFiles).toHaveLength(0);
+    expect(result.progressivelyExcludedFiles).toHaveLength(0);
+  });
+});
diff --git a/src/auto-filter-diff/auto-filter-diff.ts b/src/auto-filter-diff/auto-filter-diff.ts
new file mode 100644
index 0000000..9cc280c
--- /dev/null
+++ b/src/auto-filter-diff/auto-filter-diff.ts
@@ -0,0 +1,189 @@
+/**
+ * auto-filter-diff — automatically filters low-risk files from a PR diff.
+ *
+ * Two-phase approach:
+ *
+ *   Phase 1 — Auto-exclude score-0 files:
+ *     Remove every file section whose path appears in `riskScores` with a value
+ *     of 0.  Files NOT present in `riskScores` are kept (unknown = needs review).
+ *
+ *   Phase 2 — Progressive cap (only when maxDiffLines > 0):
+ *     If the remaining diff exceeds `maxDiffLines` after Phase 1, sort the kept
+ *     files by risk score ascending (lowest risk first) and remove them one by
+ *     one until the diff fits the cap.  At least one file is always kept.
+ *
+ * Pure functions — no filesystem access.
+ */
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface AutoFilterResult {
+  /** Filtered diff content (empty string when all files are excluded). */
+  filtered: string;
+  /** Paths removed by Phase 1 (score === 0 in riskScores). */
+  autoExcludedFiles: string[];
+  /** Paths removed by Phase 2 (progressive cap). */
+  progressivelyExcludedFiles: string[];
+  /** Number of file sections remaining after both phases. */
+  remainingFiles: number;
+  /** Line count of the filtered diff (split by '\n'). */
+  remainingLines: number;
+  /** Line count of the original diff (split by '\n'). */
+  originalLines: number;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+interface DiffSection {
+  path: string;
+  /** Raw lines of this section (no trailing newline within each element). */
+  lines: string[];
+}
+
+/**
+ * Extract the destination file path from a `diff --git a/… b/…` header line.
+ * Uses indexOf(' b/') rather than a greedy regex to handle paths containing 'b/'.
+ */
+function extractFilePath(diffGitLine: string): string {
+  const after = diffGitLine.slice('diff --git a/'.length);
+  const sepIdx = after.indexOf(' b/');
+  return sepIdx >= 0 ? after.slice(0, sepIdx) : after;
+}
+
+/**
+ * Split a unified diff string into per-file sections.
+ *
+ * Each section owns the lines from its `diff --git` header up to (but not
+ * including) the next `diff --git` header.  The last section also owns the
+ * trailing empty element that results from a trailing newline in the diff.
+ *
+ * Lines that appear before the first `diff --git` header (rare preamble) are
+ * returned separately so that the round-trip `assemble(parseSections(d))` is
+ * lossless.
+ */
+function parseSections(diffContent: string): { preamble: string[]; sections: DiffSection[] } {
+  const lines = diffContent.split('\n');
+  const sections: DiffSection[] = [];
+  const preamble: string[] = [];
+  let startLine = -1;
+  let currentPath = '';
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (line.startsWith('diff --git ')) {
+      if (startLine >= 0) {
+        sections.push({ path: currentPath, lines: lines.slice(startLine, i) });
+      }
+      currentPath = extractFilePath(line);
+      startLine = i;
+    } else if (startLine < 0) {
+      preamble.push(line);
+    }
+  }
+
+  if (startLine >= 0) {
+    sections.push({ path: currentPath, lines: lines.slice(startLine) });
+  }
+
+  return { preamble, sections };
+}
+
+/**
+ * Reassemble kept sections (plus the preamble) back into a diff string.
+ * Returns empty string when no sections remain.
+ */
+function assembleFiltered(preamble: string[], sections: DiffSection[]): string {
+  if (sections.length === 0 && preamble.length === 0) return '';
+  const allLines: string[] = [...preamble];
+  for (const s of sections) {
+    allLines.push(...s.lines);
+  }
+  return allLines.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Filter a unified diff in two phases (see module docblock).
+ *
+ * @param diffContent  Raw unified diff text.
+ * @param riskScores   Map of `{ filePath → score }` from the score-risk step.
+ * @param maxDiffLines Progressive cap.  Pass 0 to disable Phase 2.
+ */
+export function autoFilterDiff(
+  diffContent: string,
+  riskScores: Record<string, number>,
+  maxDiffLines: number,
+): AutoFilterResult {
+  if (!diffContent) {
+    return {
+      filtered: '',
+      autoExcludedFiles: [],
+      progressivelyExcludedFiles: [],
+      remainingFiles: 0,
+      remainingLines: 0,
+      originalLines: 0,
+    };
+  }
+
+  const originalLines = diffContent.split('\n').length;
+  const { preamble, sections } = parseSections(diffContent);
+
+  // ── Phase 1: auto-exclude score-0 files ────────────────────────────────────
+  const autoExcludedFiles: string[] = [];
+  let keptSections = sections.filter((section) => {
+    if (section.path in riskScores && riskScores[section.path] === 0) {
+      autoExcludedFiles.push(section.path);
+      return false;
+    }
+    return true;
+  });
+
+  // ── Phase 2: progressive cap ───────────────────────────────────────────────
+  const progressivelyExcludedFiles: string[] = [];
+
+  if (maxDiffLines > 0 && keptSections.length > 1) {
+    let totalLines = keptSections.reduce((sum, s) => sum + s.lines.length, 0);
+
+    if (totalLines > maxDiffLines) {
+      // Sort ascending by risk score so lowest-risk files are removed first.
+      // Files not in riskScores get Infinity — preserve them as long as possible.
+      const sortedAsc = [...keptSections].sort((a, b) => {
+        const sa = riskScores[a.path] !== undefined ? riskScores[a.path] : Infinity;
+        const sb = riskScores[b.path] !== undefined ? riskScores[b.path] : Infinity;
+        return sa - sb;
+      });
+
+      // slice(0, -1) protects the last file so at least one is always kept.
+      // Additionally filter out unknown files (not in riskScores): they must
+      // always be kept for review regardless of the progressive cap.
+      const removable = sortedAsc.slice(0, -1).filter((s) => riskScores[s.path] !== undefined);
+
+      for (const section of removable) {
+        if (totalLines <= maxDiffLines) break;
+        progressivelyExcludedFiles.push(section.path);
+        totalLines -= section.lines.length;
+        keptSections = keptSections.filter((s) => s.path !== section.path);
+      }
+    }
+  }
+
+  // ── Assemble result ────────────────────────────────────────────────────────
+  const filtered = keptSections.length === 0 ? '' : assembleFiltered(preamble, keptSections);
+  const remainingLines = keptSections.reduce((sum, s) => sum + s.lines.length, 0);
+
+  return {
+    filtered,
+    autoExcludedFiles,
+    progressivelyExcludedFiles,
+    remainingFiles: keptSections.length,
+    remainingLines,
+    originalLines,
+  };
+}
diff --git a/src/auto-filter-diff/index.ts b/src/auto-filter-diff/index.ts
new file mode 100644
index 0000000..567c729
--- /dev/null
+++ b/src/auto-filter-diff/index.ts
@@ -0,0 +1,76 @@
+/**
+ * auto-filter-diff CLI entrypoint.
+ *
+ * Usage:
+ *   node dist/auto-filter-diff.js <diffPath> [maxDiffLines]
+ *
+ *   diffPath      Path to the diff file (read and overwritten in-place).
+ *   maxDiffLines  Progressive line cap (default 3000; set to 0 to disable).
+ *
+ * Reads /tmp/file_risk_scores.json (written by the score-risk step), calls
+ * autoFilterDiff, and writes the filtered diff back to diffPath.
+ *
+ * When all file sections are excluded the file is deleted (not left empty)
+ * so that hashFiles('pr.diff') returns '' and downstream steps guarded by
+ * `if: hashFiles('pr.diff') != ''` are skipped automatically.
+ *
+ * All progress messages are written to stderr so they appear in the Actions log.
+ *
+ * See auto-filter-diff.ts for the pure filtering logic.
+ */
+import { existsSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { autoFilterDiff } from './auto-filter-diff.js';
+
+const SCORES_PATH = '/tmp/file_risk_scores.json';
+
+const [, , diffPath, maxDiffLinesArg] = process.argv;
+
+if (!diffPath) {
+  process.stderr.write('Usage: auto-filter-diff <diffPath> [maxDiffLines]\n');
+  process.exit(1);
+}
+
+const maxDiffLines = (() => {
+  const parsed = parseInt(maxDiffLinesArg ?? '3000', 10);
+  return Number.isNaN(parsed) ? 3000 : parsed;
+})();
+
+try {
+  if (!existsSync(SCORES_PATH)) {
+    process.stderr.write(`⚠️  No risk scores found at ${SCORES_PATH} — skipping auto-filter\n`);
+    process.exit(0);
+  }
+
+  const diffContent = readFileSync(diffPath, 'utf-8');
+  const riskScores: Record<string, number> = JSON.parse(readFileSync(SCORES_PATH, 'utf-8'));
+
+  const result = autoFilterDiff(diffContent, riskScores, maxDiffLines);
+
+  for (const path of result.autoExcludedFiles) {
+    process.stderr.write(`⏭️ Auto-excluded (score 0): ${path}\n`);
+  }
+
+  for (const path of result.progressivelyExcludedFiles) {
+    const score = riskScores[path] ?? 'unknown';
+    process.stderr.write(`⏭️ Progressive cap (score ${score}): ${path}\n`);
+  }
+
+  const totalExcluded = result.autoExcludedFiles.length + result.progressivelyExcludedFiles.length;
+  const removedLines = result.originalLines - result.remainingLines;
+  process.stderr.write(
+    `✅ Auto-filter complete: ${totalExcluded} files excluded (${removedLines} lines removed),` +
+      ` ${result.remainingFiles} files / ${result.remainingLines} lines remaining\n`,
+  );
+
+  if (result.remainingFiles === 0) {
+    if (existsSync(diffPath)) rmSync(diffPath);
+    process.stderr.write(
+      'ℹ️  All files excluded — removed pr.diff so downstream diff steps are skipped\n',
+    );
+  } else {
+    writeFileSync(diffPath, result.filtered, 'utf-8');
+  }
+} catch (err) {
+  process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}\n`);
+  process.exit(1);
+}
diff --git a/src/mention-reply/index.ts b/src/mention-reply/index.ts
index b2c221a..dd86e72 100644
--- a/src/mention-reply/index.ts
+++ b/src/mention-reply/index.ts
@@ -103,7 +103,18 @@ export function parseEventContext(): EventContext {
     diff_hunk?: string;
   };
 
-  if (eventName === 'pull_request_review_comment') {
+  // Detect inline review comments either by the event name or by payload structure.
+  // The structural fallback is needed in test environments where GITHUB_EVENT_NAME is
+  // overridden on a `run:` step but cannot be overridden on `uses:` composite actions.
+  // Using raw.comment (with a safe cast + optional chaining) instead of the already-cast
+  // `comment` variable guards against the real pull_request event payload where
+  // raw.comment is absent — accessing `comment.diff_hunk` there would throw.
+  const isInlineReviewComment =
+    eventName === 'pull_request_review_comment' ||
+    (raw.pull_request !== undefined &&
+      (raw.comment as Record<string, unknown> | undefined)?.diff_hunk !== undefined);
+
+  if (isInlineReviewComment) {
     // For pull_request_review_comment events the PR lives at raw.pull_request,
     // not raw.issue. The comment is always on a PR, so isPrComment is true.
     const pullRequest = raw.pull_request as { number: number };
diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts
index 7467bd8..1a218d7 100644
--- a/src/score-risk/__tests__/score-risk.test.ts
+++ b/src/score-risk/__tests__/score-risk.test.ts
@@ -246,6 +246,66 @@ describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => {
 
 // ── Rule 6: test/doc/config file → reset score to 0 ──────────────────────────
 
+describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset score to 0', () => {
+  it('Rust file in tests/ directory scores 0 (directory component match)', () => {
+    // Security keyword "virtiofs" not in path, but tests/ directory resets to 0
+    const diff = makeDiff('crates/vm/tests/integration/virtiofs.rs');
+    const scores = scoreFiles(diff, []);
+    expect(scores['crates/vm/tests/integration/virtiofs.rs']).toBe(0);
+  });
+
+  it('Rust bench file in benches/ directory scores 0 (directory component match)', () => {
+    const diff = makeDiff('crates/hypervisor/benches/kvm_bench.rs');
+    const scores = scoreFiles(diff, []);
+    expect(scores['crates/hypervisor/benches/kvm_bench.rs']).toBe(0);
+  });
+
+  it('Ruby spec file by suffix scores 0', () => {
+    const diff = makeDiff('spec/models/user_spec.rb');
+    const scores = scoreFiles(diff, []);
+    expect(scores['spec/models/user_spec.rb']).toBe(0);
+  });
+
+  it('file in tests/ directory with security keyword in path resets to 0 (rule 6 wins)', () => {
+    // "auth" in path would normally add +2 (rule 3), but tests/ directory resets to 0
+    const diff = makeDiff('src/auth/tests/handler_test.go');
+    const scores = scoreFiles(diff, []);
+    // Rule 7 (error handling) can still apply; rule 3 (+2) is reset by rule 6.
+    // No error-handling keywords in the default diff content, so score stays 0.
+    expect(scores['src/auth/tests/handler_test.go']).toBe(0);
+  });
+
+  it('Rust _test.rs suffix scores 0', () => {
+    const diff = makeDiff('src/vm/memory_test.rs');
+    const scores = scoreFiles(diff, []);
+    expect(scores['src/vm/memory_test.rs']).toBe(0);
+  });
+
+  it('Rust _bench.rs suffix scores 0', () => {
+    const diff = makeDiff('src/crypto/hash_bench.rs');
+    const scores = scoreFiles(diff, []);
+    expect(scores['src/crypto/hash_bench.rs']).toBe(0);
+  });
+
+  it('Rust _spec.rs suffix scores 0', () => {
+    const diff = makeDiff('src/auth/token_spec.rs');
+    const scores = scoreFiles(diff, []);
+    expect(scores['src/auth/token_spec.rs']).toBe(0);
+  });
+
+  it('__tests__/ directory component scores 0', () => {
+    const diff = makeDiff('src/auth/__tests__/handler.ts');
+    const scores = scoreFiles(diff, []);
+    expect(scores['src/auth/__tests__/handler.ts']).toBe(0);
+  });
+
+  it('specs/ directory component scores 0', () => {
+    const diff = makeDiff('lib/specs/my_spec.rb');
+    const scores = scoreFiles(diff, []);
+    expect(scores['lib/specs/my_spec.rb']).toBe(0);
+  });
+});
+
 describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () => {
   const TEST_PATHS = [
     'src/auth_test.go',
diff --git a/src/score-risk/score-risk.ts b/src/score-risk/score-risk.ts
index 4740b17..519d19c 100644
--- a/src/score-risk/score-risk.ts
+++ b/src/score-risk/score-risk.ts
@@ -29,7 +29,7 @@ const SECURITY_PATH_RE = /auth|security|crypto|session|secret|token|password|cre
 
 /** Matches test, documentation, and config file extensions (case-insensitive). */
 const TEST_FILE_RE =
-  /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$/i;
+  /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$|_test\.rs$|_bench\.rs$|_spec\.rs$|_spec\.rb$|(^|\/)(tests?|benches|__tests__|specs?)\//i;
 
 /** Matches error-handling keywords in diff hunk lines (case-sensitive, matching bash awk). */
 const ERROR_PATTERN_RE = /catch|rescue|except|recover|error|panic/;
diff --git a/tsup.config.ts b/tsup.config.ts
index 0cfa331..f6aa14c 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -23,6 +23,7 @@ const src = (name: string) => {
   return p;
 };
 const entry = {
+  'auto-filter-diff': src('auto-filter-diff'),
   'check-org-membership': src('check-org-membership'),
   credentials: src('credentials'),
   'filter-diff': src('filter-diff'),