diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
new file mode 100644
index 000000000..d1b11eed2
--- /dev/null
+++ b/.github/workflows/bench.yml
@@ -0,0 +1,97 @@
+name: Bench Gate
+
+# Compares hot-path benchmarks between the PR head and its merge
+# base, so performance regressions in the sync engine and DB write
+# paths fail the PR instead of shipping. The gated regression classes
+# have all shipped before:
+#
+#   - discovery/skip work scaling with archive size instead of new
+#     data (#912, the providerSourceUnchangedInDB gap)
+#   - O(session history) work per incremental append (#954)
+#   - bulk ingest throughput (#411)
+#   - per-row query-shape regressions in usage aggregation (#309)
+#
+# Both sides run `make bench-gate` — the Makefile is the single
+# source of truth for the gated package list, sample count, and
+# iteration count — and cmd/benchgate compares the outputs. It gates
+# on allocs/op and B/op (deterministic on a given machine, tight
+# thresholds) and on ns/op with a loose 2x threshold that only
+# catches algorithmic blowups; both sides run on the same runner
+# within one job, so the comparison is apples to apples.
+#
+# Benchmarks that only exist on one side are reported but never fail
+# the gate: a benchmark added by a PR has no baseline and is reported
+# without gating, then gates automatically once merged. Because each
+# side benchmarks its own Makefile's package list, a PR that adds a
+# package to the gate cannot break the base run. A partially failing
+# base run degrades to a partial baseline (whatever benchmarks it
+# produced still gate) rather than silently disabling the whole gate.
+
+on:
+  pull_request:
+    # Docs/frontend-only PRs cannot change the gated Go paths. If
+    # this check is ever made required on branch protection, pair it
+    # with a no-op sibling workflow on the inverse paths.
+    paths:
+      - "**.go"
+      - "go.mod"
+      - "go.sum"
+      - "Makefile"
+      - ".github/workflows/bench.yml"
+
+concurrency:
+  group: bench-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  bench-gate:
+    name: Benchmark Gate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c  # v6.4.0
+        with:
+          go-version-file: go.mod
+
+      - name: Run benchmarks (PR head)
+        run: |
+          set -euo pipefail
+          make -s bench-gate | tee /tmp/bench-new.txt
+
+      - name: Run benchmarks (merge base)
+        env:
+          BASE_REF: ${{ github.base_ref }}
+        # A failing merge-base run keeps whatever benchmark output it
+        # produced: go test emits results per package, so one broken
+        # package (or a base predating the bench-gate target) leaves
+        # a partial or empty baseline and benchgate gates only what
+        # exists on both sides. The warning makes the degraded run
+        # visible instead of a silently green vacuous pass.
+        #
+        # The sample count and fixed iteration count are evaluated
+        # from the PR head's Makefile and passed into the base run:
+        # two benchmarks grow their fixture per iteration, so a PR
+        # that changes BENCH_GATE_COUNT/TIME must not compare against
+        # a baseline measured with the old values. The package list
+        # intentionally stays per-side, so growing the gate cannot
+        # break the base run.
+        run: |
+          set -euo pipefail
+          eval "$(make -s bench-gate-config)"
+          base=$(git merge-base HEAD "origin/$BASE_REF")
+          git worktree add /tmp/bench-base "$base"
+          if ! make -s -C /tmp/bench-base bench-gate \
+              BENCH_GATE_COUNT="$BENCH_GATE_COUNT" \
+              BENCH_GATE_TIME="$BENCH_GATE_TIME" > /tmp/bench-old.txt; then
+            echo "::warning title=Bench Gate::merge-base benchmark run exited non-zero; gating against its partial output"
+          fi
+
+      - name: Compare against merge base
+        run: go run ./cmd/benchgate -old /tmp/bench-old.txt -new /tmp/bench-new.txt
diff --git a/Makefile b/Makefile
index 2d827c6de..e53699989 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ AIR_BIN := $(shell if command -v air >/dev/null 2>&1; then command -v air; \
 	elif [ -x "$(GOPATH_FIRST)/bin/air" ]; then printf "%s" "$(GOPATH_FIRST)/bin/air"; \
 	fi)
 
-.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends test-postgres test-postgres-ci test-s3 postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help
+.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends bench-gate bench-gate-config test-postgres test-postgres-ci test-s3 postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help
 
 # Ensure go:embed has at least one file (no-op if frontend is built)
 ensure-embed-dir:
@@ -266,6 +266,35 @@ bench-backends: pricing-snapshot ensure-embed-dir
 		AGENTSVIEW_BENCH_MESSAGES_PER_SESSION=$(BENCH_BACKENDS_MESSAGES_PER_SESSION) \
 		CGO_ENABLED=1 go test -tags "fts5,benchdb" ./internal/backendbench $(BENCH_BACKENDS_FLAGS)
 
+# Hot-path benchmark gate. Runs every benchmark in the gated packages
+# (sync engine warm/cold/append, message write paths, usage
+# aggregation, secret scanning). This target is the single source of
+# truth for the gate configuration: CI's bench.yml runs it on both
+# the PR head and the merge base, then compares the outputs with
+# `go run ./cmd/benchgate -old old.txt -new new.txt`. Run it before
+# and after touching a sync or DB hot path.
+BENCH_GATE_PACKAGES ?= ./internal/sync ./internal/db ./internal/secrets
+# Count must stay >= 5: benchgate's time gate needs at least 5
+# candidate samples for its significance test.
+BENCH_GATE_COUNT ?= 6
+# Fixed iterations, not a duration: some gated benchmarks grow their
+# fixture as they iterate, so baseline and candidate must run the
+# same iteration count to measure identical workloads.
+BENCH_GATE_TIME ?= 20x
+bench-gate: pricing-snapshot ensure-embed-dir
+	CGO_ENABLED=1 go test -tags "fts5" -run '^$$' \
+		-bench . -benchmem \
+		-count $(BENCH_GATE_COUNT) -benchtime $(BENCH_GATE_TIME) \
+		-timeout 25m $(BENCH_GATE_PACKAGES)
+
+# Prints the gate's sample/iteration configuration in shell-evalable
+# form. CI evaluates this on the PR head and passes the values into
+# the merge-base `make bench-gate` invocation, so both sides measure
+# identical workloads even when a PR changes the defaults above (the
+# package list intentionally stays per-side).
+bench-gate-config:
+	@echo "BENCH_GATE_COUNT=$(BENCH_GATE_COUNT) BENCH_GATE_TIME=$(BENCH_GATE_TIME)"
+
 # Start test PostgreSQL container
 postgres-up:
 	docker compose -f docker-compose.test.yml up -d --wait
@@ -484,6 +513,7 @@ help:
 	@echo "  test           - Run all tests"
 	@echo "  test-short     - Run fast tests only"
 	@echo "  bench-backends - Benchmark SQLite, DuckDB, and PostgreSQL stores"
+	@echo "  bench-gate     - Run the hot-path benchmarks CI gates PRs on"
 	@echo "  test-postgres  - Run PostgreSQL integration tests"
 	@echo "  test-s3        - Run S3 discovery integration tests (Docker)"
 	@echo "  postgres-up    - Start test PostgreSQL container"
diff --git a/cmd/benchgate/main.go b/cmd/benchgate/main.go
new file mode 100644
index 000000000..6b3003ff0
--- /dev/null
+++ b/cmd/benchgate/main.go
@@ -0,0 +1,524 @@
+// Command benchgate compares two `go test -bench` outputs (baseline
+// vs candidate) and exits non-zero when a benchmark regresses beyond
+// configured thresholds.
+//
+// Parsing and statistics come from golang.org/x/perf — benchfmt for
+// the benchmark format and benchmath (the engine behind benchstat)
+// for summaries and significance tests. benchgate only adds the
+// policy benchstat deliberately does not provide: thresholds, floors,
+// and a failing exit code for CI.
+//
+// It is the comparison step of the bench-gate CI workflow: allocs/op
+// and B/op are deterministic for the same code on the same machine,
+// so they get tight ratio thresholds that catch O(archive)-instead-
+// of-O(delta) work regressions regardless of sample count; time
+// (sec/op) is noisy on shared runners, so it gets a loose threshold
+// and additionally must be a statistically significant difference
+// (Mann-Whitney U, as in benchstat) before it fails the gate.
+// Baselines below a per-metric floor are skipped entirely, since a
+// few extra allocations on a tiny benchmark is noise, not a
+// regression.
+//
+// Multiple runs of the same benchmark (-count=N) are kept as a
+// sample. The baseline is summarized by its median; the candidate is
+// gated on its median for time but on its WORST run for allocs/op
+// and B/op — those are deterministic, so a single outlier run there
+// is a real intermittent allocation path, not noise, and must fail.
+// Gating is per benchmark: any one benchmark over its threshold
+// fails the gate; there is no cross-benchmark averaging. Benchmarks
+// present on only one side are reported but never fail the gate, so
+// adding or removing benchmarks in a PR does not wedge it. A gated
+// unit missing from the baseline (a legitimately older or partial
+// base run) is reported as not gated; a gated unit missing from the
+// candidate (e.g. -benchmem dropped) is a configuration error, since
+// it would otherwise silently disable that gate for good.
+//
+// Lines that look like benchmark results but fail to parse (for
+// example test log output interleaved into a result line) are a
+// corrupted capture: they are reported and the gate exits 2, because
+// the corrupted benchmark would otherwise silently vanish from both
+// sides and never gate again.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+
+	"golang.org/x/perf/benchfmt"
+	"golang.org/x/perf/benchmath"
+	"golang.org/x/perf/benchunit"
+)
+
+// minTimeSamples is the per-side sample count the sec/op
+// significance test needs before its verdict means anything.
+const minTimeSamples = 5
+
+// benchSamples collects every measured value per benchmark and unit:
+// benchmark key -> tidied unit (sec/op, B/op, allocs/op, ...) ->
+// samples across -count runs. The key includes the package path when
+// the output carries one, so same-named benchmarks in different
+// packages never merge.
+type benchSamples map[string]map[string][]float64
+
+// gate is one metric's regression rule: fail when the candidate
+// exceeds the baseline median by more than maxRatio, unless the
+// baseline is below floor (too small to compare meaningfully). With
+// worstCase set, the candidate is judged by its worst (highest) run
+// rather than its median — for deterministic metrics, where any
+// outlier run is a real intermittent code path. With
+// needSignificance set, the samples must also differ significantly
+// under the benchmath comparison test — the benchstat noise guard,
+// used for wall-clock time.
+type gate struct {
+	unit             string
+	maxRatio         float64
+	floor            float64
+	worstCase        bool
+	needSignificance bool
+}
+
+// violation describes one gate failure.
+type violation struct {
+	name     string
+	unit     string
+	old, new float64
+	ratio    float64
+	maxRatio float64
+}
+
+// configIssue describes a capture that cannot support the gate it
+// was given (e.g. too few candidate samples for significance
+// testing) — a CI configuration error, not a regression.
+type configIssue struct {
+	name string
+	msg  string
+}
+
+func (v violation) String() string {
+	cls := benchunit.ClassOf(v.unit)
+	return fmt.Sprintf(
+		"%s: %s regressed %.2fx (%s -> %s, limit %.2fx)",
+		v.name, v.unit, v.ratio,
+		benchunit.Scale(v.old, cls), benchunit.Scale(v.new, cls),
+		v.maxRatio,
+	)
+}
+
+// parseBench extracts benchmark samples from `go test -bench` output
+// using the official format parser. Values arrive tidied by
+// benchfmt: ns/op becomes sec/op, MB/s becomes B/s. Lines that look
+// like results but fail to parse are returned as syntax errors so a
+// corrupted capture is loud instead of silently missing benchmarks.
+func parseBench(
+	reader *benchfmt.Reader,
+) (benchSamples, []string, error) {
+	out := make(benchSamples)
+	var syntaxErrs []string
+	for reader.Scan() {
+		res, ok := reader.Result().(*benchfmt.Result)
+		if !ok {
+			if serr, isSyntax := reader.Result().(*benchfmt.SyntaxError); isSyntax {
+				syntaxErrs = append(syntaxErrs, serr.Error())
+			}
+			continue
+		}
+		name := string(res.Name.Full())
+		if pkg := res.GetConfig("pkg"); pkg != "" {
+			name = pkg + "." + name
+		}
+		units := out[name]
+		if units == nil {
+			units = make(map[string][]float64)
+			out[name] = units
+		}
+		for _, v := range res.Values {
+			units[v.Unit] = append(units[v.Unit], v.Value)
+		}
+	}
+	if err := reader.Err(); err != nil {
+		return nil, nil, err
+	}
+	return out, syntaxErrs, nil
+}
+
+// evalGate applies one gate to one benchmark's samples and returns
+// the report fragment plus an optional violation or config issue
+// (their name fields are filled in by the caller).
+func evalGate(
+	g gate, oldVals, newVals []float64,
+) (string, *violation, *configIssue) {
+	thresholds := benchmath.DefaultThresholds
+	oldSample := benchmath.NewSample(oldVals, &thresholds)
+	newSample := benchmath.NewSample(newVals, &thresholds)
+	oldCenter := benchmath.AssumeNothing.
+		Summary(oldSample, 0.95).Center
+	var newCenter float64
+	if g.worstCase {
+		// Samples are sorted ascending; the worst candidate run
+		// is the last one.
+		newCenter = newSample.Values[len(newSample.Values)-1]
+	} else {
+		newCenter = benchmath.AssumeNothing.
+			Summary(newSample, 0.95).Center
+	}
+	cls := benchunit.ClassOf(g.unit)
+	span := fmt.Sprintf(
+		"%s %s -> %s", g.unit,
+		benchunit.Scale(oldCenter, cls),
+		benchunit.Scale(newCenter, cls),
+	)
+
+	if oldCenter <= 0 || oldCenter < g.floor {
+		return fmt.Sprintf(
+			"%s (below %s floor, not gated)",
+			span, benchunit.Scale(g.floor, cls),
+		), nil, nil
+	}
+	if g.needSignificance && len(newVals) < minTimeSamples {
+		issue := &configIssue{msg: fmt.Sprintf(
+			"%s needs at least %d candidate samples for significance gating, got %d",
+			g.unit, minTimeSamples, len(newVals),
+		)}
+		return span + " (too few candidate samples, not gated)",
+			nil, issue
+	}
+	if g.needSignificance && len(oldVals) < minTimeSamples {
+		// A short baseline is not a configuration error: the base
+		// run may legitimately be partial (e.g. it failed part-way
+		// and the workflow gates against what it produced).
+		return fmt.Sprintf(
+			"%s (baseline has only %d sample(s), significance needs %d, not gated)",
+			span, len(oldVals), minTimeSamples,
+		), nil, nil
+	}
+
+	ratio := newCenter / oldCenter
+	detail, significant := gateDetail(
+		g, oldSample, newSample, span, ratio,
+	)
+	var v *violation
+	if ratio > g.maxRatio && (!g.needSignificance || significant) {
+		v = &violation{
+			unit: g.unit,
+			old:  oldCenter, new: newCenter,
+			ratio: ratio, maxRatio: g.maxRatio,
+		}
+	}
+	return detail, v, nil
+}
+
+// gateDetail renders the gated report fragment and, for
+// significance-gated units, runs the benchmath comparison.
+func gateDetail(
+	g gate, oldSample, newSample *benchmath.Sample,
+	span string, ratio float64,
+) (string, bool) {
+	if g.worstCase {
+		// Also surface the baseline's worst run: the gate is
+		// deliberately candidate-worst vs baseline-median, so
+		// pre-existing baseline instability should at least be
+		// visible when reading a failure.
+		cls := benchunit.ClassOf(g.unit)
+		oldWorst := oldSample.Values[len(oldSample.Values)-1]
+		return fmt.Sprintf(
+			"%s (%.2fx, limit %.2fx, worst of %d run(s), baseline worst %s)",
+			span, ratio, g.maxRatio, len(newSample.Values),
+			benchunit.Scale(oldWorst, cls),
+		), false
+	}
+	cmp := benchmath.AssumeNothing.Compare(oldSample, newSample)
+	significant := cmp.P < cmp.Alpha
+	detail := fmt.Sprintf(
+		"%s (%.2fx, limit %.2fx, %s)", span, ratio, g.maxRatio, cmp,
+	)
+	if g.needSignificance && !significant {
+		detail += " [not significant, not gated]"
+	}
+	return detail, significant
+}
+
+// compare applies the gates to every benchmark present in both maps
+// and returns a human-readable report plus the violations and
+// config issues.
+func compare(
+	oldRes, newRes benchSamples, gates []gate,
+) (report []string, violations []violation, issues []configIssue) {
+	names := make([]string, 0, len(newRes))
+	for name := range newRes {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	for _, name := range names {
+		oldUnits, ok := oldRes[name]
+		if !ok {
+			report = append(report, fmt.Sprintf(
+				"%s: new benchmark, no baseline to compare", name,
+			))
+			continue
+		}
+		parts, vs, is := compareUnits(gates, oldUnits, newRes[name])
+		for i := range vs {
+			vs[i].name = name
+		}
+		for i := range is {
+			is[i].name = name
+		}
+		violations = append(violations, vs...)
+		issues = append(issues, is...)
+		report = append(report, fmt.Sprintf(
+			"%s: %s", name, strings.Join(parts, ", "),
+		))
+	}
+
+	var removed []string
+	for name := range oldRes {
+		if _, ok := newRes[name]; !ok {
+			removed = append(removed, name)
+		}
+	}
+	sort.Strings(removed)
+	for _, name := range removed {
+		report = append(report, fmt.Sprintf(
+			"%s: present in baseline but missing from candidate",
+			name,
+		))
+	}
+	return report, violations, issues
+}
+
+// compareUnits evaluates every gate for one benchmark, plus a note
+// for candidate units no gate covers (custom b.ReportMetric units
+// are collected but deliberately never gated).
+func compareUnits(
+	gates []gate, oldUnits, newUnits map[string][]float64,
+) (parts []string, vs []violation, is []configIssue) {
+	gated := make(map[string]bool, len(gates))
+	for _, g := range gates {
+		gated[g.unit] = true
+		oldVals, okOld := oldUnits[g.unit]
+		newVals, okNew := newUnits[g.unit]
+		switch {
+		case !okOld && !okNew:
+			// The benchmark doesn't emit this unit at all.
+			continue
+		case !okOld:
+			// A baseline may legitimately lack a unit (older or
+			// partial base run): report, don't gate.
+			parts = append(parts, fmt.Sprintf(
+				"%s missing from baseline, not gated", g.unit,
+			))
+			continue
+		case !okNew:
+			// The candidate capture is under the workflow's
+			// control; losing a gated unit the baseline has (e.g.
+			// -benchmem dropped) would silently disable this gate,
+			// so it is a configuration error.
+			parts = append(parts, fmt.Sprintf(
+				"%s missing from candidate", g.unit,
+			))
+			is = append(is, configIssue{msg: fmt.Sprintf(
+				"%s present in baseline but missing from candidate capture (was -benchmem dropped?)",
+				g.unit,
+			)})
+			continue
+		}
+		part, v, issue := evalGate(g, oldVals, newVals)
+		parts = append(parts, part)
+		if v != nil {
+			vs = append(vs, *v)
+		}
+		if issue != nil {
+			is = append(is, *issue)
+		}
+	}
+	var custom []string
+	for unit := range newUnits {
+		if !gated[unit] {
+			custom = append(custom, unit)
+		}
+	}
+	sort.Strings(custom)
+	for _, unit := range custom {
+		parts = append(parts, fmt.Sprintf(
+			"%s has no gate, not gated", unit,
+		))
+	}
+	return parts, vs, is
+}
+
+func parseFile(path string) (benchSamples, []string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer f.Close()
+	return parseBench(benchfmt.NewReader(f, path))
+}
+
+// results bundles everything render needs to produce the final
+// output and exit code.
+type results struct {
+	report               []string
+	violations           []violation
+	issues               []configIssue
+	newCount             int
+	oldSyntax, newSyntax []string
+}
+
+// render formats the human-readable outcome and picks the exit
+// code: 2 for unusable input or configuration errors, 1 for
+// regressions, 0 otherwise. Violations always print, even when a
+// config issue or corrupted capture also occurred, so a detected
+// regression is never hidden behind an exit-2.
+func render(r results) (string, int) {
+	var b strings.Builder
+	for _, line := range r.report {
+		fmt.Fprintln(&b, line)
+	}
+	if len(r.violations) > 0 {
+		fmt.Fprintf(&b, "\nbenchgate: %d regression(s):\n",
+			len(r.violations))
+		for _, v := range r.violations {
+			fmt.Fprintf(&b, "  %s\n", v)
+		}
+	}
+	renderSyntax(&b, "baseline", r.oldSyntax)
+	renderSyntax(&b, "candidate", r.newSyntax)
+	if len(r.issues) > 0 {
+		fmt.Fprintln(&b, "\nbenchgate: invalid benchmark configuration:")
+		for _, issue := range r.issues {
+			fmt.Fprintf(&b, "  %s: %s\n", issue.name, issue.msg)
+		}
+	}
+	switch {
+	case len(r.oldSyntax)+len(r.newSyntax) > 0 || len(r.issues) > 0:
+		return b.String(), 2
+	case r.newCount == 0:
+		fmt.Fprintln(&b, "benchgate: candidate output contains no benchmarks")
+		return b.String(), 2
+	case len(r.violations) > 0:
+		return b.String(), 1
+	}
+	fmt.Fprintln(&b, "benchgate: no regressions beyond thresholds")
+	return b.String(), 0
+}
+
+// renderSyntax reports unparseable result lines in one capture. A
+// benchmark whose result line is corrupted (e.g. by interleaved log
+// output) parses on neither side and would otherwise vanish from
+// the gate without a trace.
+func renderSyntax(b *strings.Builder, side string, errs []string) {
+	if len(errs) == 0 {
+		return
+	}
+	fmt.Fprintf(
+		b,
+		"\nbenchgate: %s capture is corrupted (%d unparseable result line(s); benchmarks on those lines are not gated):\n",
+		side, len(errs),
+	)
+	for _, e := range errs {
+		fmt.Fprintf(b, "  %s\n", e)
+	}
+}
+
+// flags holds the parsed command line.
+type flags struct {
+	oldPath, newPath string
+	gates            []gate
+}
+
+func parseFlags() flags {
+	oldPath := flag.String(
+		"old", "", "baseline `go test -bench` output file",
+	)
+	newPath := flag.String(
+		"new", "", "candidate `go test -bench` output file",
+	)
+	maxTimeRatio := flag.Float64(
+		"max-time-ratio", 2.0,
+		"fail when candidate median sec/op exceeds baseline by this factor "+
+			"(only when the difference is statistically significant; needs at "+
+			"least 5 candidate samples)",
+	)
+	maxAllocRatio := flag.Float64(
+		"max-alloc-ratio", 1.25,
+		"fail when candidate worst-run allocs/op exceeds baseline median by this factor",
+	)
+	maxBytesRatio := flag.Float64(
+		"max-bytes-ratio", 1.35,
+		"fail when candidate worst-run B/op exceeds baseline median by this factor",
+	)
+	timeFloorNs := flag.Float64(
+		"time-floor-ns", 100_000,
+		"skip the time gate when the baseline is below this many ns",
+	)
+	allocFloor := flag.Float64(
+		"alloc-floor", 64,
+		"skip the allocs/op gate when the baseline is below this",
+	)
+	bytesFloor := flag.Float64(
+		"bytes-floor", 16_384,
+		"skip the B/op gate when the baseline is below this",
+	)
+	flag.Parse()
+
+	if *oldPath == "" || *newPath == "" {
+		fmt.Fprintln(os.Stderr, "benchgate: -old and -new are required")
+		flag.Usage()
+		os.Exit(2)
+	}
+	return flags{
+		oldPath: *oldPath,
+		newPath: *newPath,
+		gates: []gate{
+			{
+				unit:      "allocs/op",
+				maxRatio:  *maxAllocRatio,
+				floor:     *allocFloor,
+				worstCase: true,
+			},
+			{
+				unit:      "B/op",
+				maxRatio:  *maxBytesRatio,
+				floor:     *bytesFloor,
+				worstCase: true,
+			},
+			{
+				unit:             "sec/op",
+				maxRatio:         *maxTimeRatio,
+				floor:            *timeFloorNs / 1e9,
+				needSignificance: true,
+			},
+		},
+	}
+}
+
+func main() {
+	cfg := parseFlags()
+	oldRes, oldSyntax, err := parseFile(cfg.oldPath)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "benchgate: reading baseline: %v\n", err)
+		os.Exit(2)
+	}
+	newRes, newSyntax, err := parseFile(cfg.newPath)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "benchgate: reading candidate: %v\n", err)
+		os.Exit(2)
+	}
+
+	report, violations, issues := compare(oldRes, newRes, cfg.gates)
+	out, code := render(results{
+		report:     report,
+		violations: violations,
+		issues:     issues,
+		newCount:   len(newRes),
+		oldSyntax:  oldSyntax,
+		newSyntax:  newSyntax,
+	})
+	fmt.Print(out)
+	os.Exit(code)
+}
diff --git a/cmd/benchgate/main_test.go b/cmd/benchgate/main_test.go
new file mode 100644
index 000000000..b56043ddd
--- /dev/null
+++ b/cmd/benchgate/main_test.go
@@ -0,0 +1,459 @@
+package main
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"golang.org/x/perf/benchfmt"
+)
+
+func parseString(t *testing.T, input string) (benchSamples, []string) {
+	t.Helper()
+	got, syntaxErrs, err := parseBench(
+		benchfmt.NewReader(strings.NewReader(input), "test"),
+	)
+	require.NoError(t, err)
+	return got, syntaxErrs
+}
+
+func TestParseBench(t *testing.T) {
+	tests := []struct {
+		name       string
+		input      string
+		want       benchSamples
+		wantSyntax int
+	}{
+		{
+			name: "full benchmem line with tidied units",
+			input: "goos: linux\n" +
+				"pkg: example.com/x\n" +
+				"BenchmarkFoo-8   \t 100\t 1234567 ns/op\t 2345 B/op\t 67 allocs/op\n" +
+				"PASS\nok  \texample.com/x\t1.2s\n",
+			want: benchSamples{
+				"example.com/x.Foo-8": {
+					"sec/op":    {1234567e-9},
+					"B/op":      {2345},
+					"allocs/op": {67},
+				},
+			},
+		},
+		{
+			name: "multiple counts collect all samples",
+			input: "BenchmarkFoo-8 100 200 ns/op 9 allocs/op\n" +
+				"BenchmarkFoo-8 100 150 ns/op 8 allocs/op\n" +
+				"BenchmarkFoo-8 100 180 ns/op 10 allocs/op\n",
+			want: benchSamples{
+				"Foo-8": {
+					"sec/op":    {200e-9, 150e-9, 180e-9},
+					"allocs/op": {9, 8, 10},
+				},
+			},
+		},
+		{
+			name: "same benchmark name in different packages stays separate",
+			input: "pkg: example.com/a\n" +
+				"BenchmarkScan-4 10 100 ns/op\n" +
+				"pkg: example.com/b\n" +
+				"BenchmarkScan-4 10 900 ns/op\n",
+			want: benchSamples{
+				"example.com/a.Scan-4": {"sec/op": {100e-9}},
+				"example.com/b.Scan-4": {"sec/op": {900e-9}},
+			},
+		},
+		{
+			name: "log lines and headers are ignored",
+			input: "2026/07/03 10:20:36 discovered 40 files in 0s\n" +
+				"cpu: Apple M5 Max\n",
+			want: benchSamples{},
+		},
+		{
+			name: "result line corrupted by interleaved log output is a syntax error",
+			input: "BenchmarkSyncAllWarmNoop-18   \t2026/07/03 15:39:07 discovered 40 files in 0s\n" +
+				"       5\t   3581975 ns/op\t  212433 B/op\t    2760 allocs/op\n",
+			want:       benchSamples{},
+			wantSyntax: 1,
+		},
+		{
+			name:  "custom ReportMetric units are kept",
+			input: "BenchmarkBaz-2 10 900 ns/op 3 things/op\n",
+			want: benchSamples{
+				"Baz-2": {
+					"sec/op":    {900e-9},
+					"things/op": {3},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, syntaxErrs := parseString(t, tt.input)
+			assert.Len(t, syntaxErrs, tt.wantSyntax)
+			require.Len(t, got, len(tt.want))
+			for name, wantUnits := range tt.want {
+				gotUnits, ok := got[name]
+				require.True(t, ok, "missing benchmark %s", name)
+				for unit, wantVals := range wantUnits {
+					assert.InDeltaSlice(
+						t, wantVals, gotUnits[unit], 1e-15,
+						"%s %s", name, unit,
+					)
+				}
+			}
+		})
+	}
+}
+
+func testGates() []gate {
+	return []gate{
+		{unit: "allocs/op", maxRatio: 1.25, floor: 64, worstCase: true},
+		{unit: "B/op", maxRatio: 1.35, floor: 16_384, worstCase: true},
+		{
+			unit: "sec/op", maxRatio: 2.0, floor: 100_000e-9,
+			needSignificance: true,
+		},
+	}
+}
+
+// noisy fabricates a benchmark sample of n values spread ±2% around
+// center, so significance tests have a realistic distribution.
+func noisy(center float64, n int) []float64 {
+	out := make([]float64, n)
+	for i := range out {
+		out[i] = center * (0.98 + 0.01*float64(i%5))
+	}
+	return out
+}
+
+func TestCompare(t *testing.T) {
+	tests := []struct {
+		name       string
+		old, new   benchSamples
+		wantUnits  []string // units of expected violations, in order
+		wantReport []string // substrings that must appear in the report
+	}{
+		{
+			name: "within thresholds passes",
+			old: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1e-3, 6),
+					"B/op":      {100_000},
+					"allocs/op": {1000},
+				},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1.5e-3, 6),
+					"B/op":      {120_000},
+					"allocs/op": {1100},
+				},
+			},
+			wantUnits: nil,
+		},
+		{
+			name: "alloc regression fails even with a single run",
+			old: benchSamples{
+				"BenchmarkFoo-8": {"allocs/op": {1000}},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {"allocs/op": {2000}},
+			},
+			wantUnits: []string{"allocs/op"},
+		},
+		{
+			name: "significant time blowup fails",
+			old: benchSamples{
+				"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 6)},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {"sec/op": noisy(5e-3, 6)},
+			},
+			wantUnits: []string{"sec/op"},
+		},
+		{
+			name: "time blowup without significance is reported, not gated",
+			old: benchSamples{
+				"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 5)},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op": append(noisy(1e-3, 4), 5e-3),
+				},
+			},
+			wantUnits:  nil,
+			wantReport: []string{"not significant, not gated"},
+		},
+		{
+			name: "tiny baseline below floor is not gated",
+			old: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    {500e-9},
+					"B/op":      {128},
+					"allocs/op": {3},
+				},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    {5000e-9},
+					"B/op":      {1280},
+					"allocs/op": {30},
+				},
+			},
+			wantUnits:  nil,
+			wantReport: []string{"not gated"},
+		},
+		{
+			name: "new benchmark without baseline is reported, not gated",
+			old:  benchSamples{},
+			new: benchSamples{
+				"BenchmarkNew-8": {"allocs/op": {99999}},
+			},
+			wantUnits:  nil,
+			wantReport: []string{"no baseline to compare"},
+		},
+		{
+			name: "removed benchmark is reported, not gated",
+			old: benchSamples{
+				"BenchmarkGone-8": {"sec/op": {1e-3}},
+			},
+			new:        benchSamples{},
+			wantUnits:  nil,
+			wantReport: []string{"missing from candidate"},
+		},
+		{
+			name: "gated unit missing from the baseline is reported, not gated",
+			old: benchSamples{
+				"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 6)},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1e-3, 6),
+					"allocs/op": {90000},
+				},
+			},
+			wantUnits:  nil,
+			wantReport: []string{"allocs/op missing from baseline, not gated"},
+		},
+		{
+			name: "custom ReportMetric unit is reported as ungated",
+			old: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1e-3, 6),
+					"things/op": {3},
+				},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1e-3, 6),
+					"things/op": {900},
+				},
+			},
+			wantUnits:  nil,
+			wantReport: []string{"things/op has no gate, not gated"},
+		},
+		{
+			name: "multiple regressions are all reported",
+			old: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(1e-3, 6),
+					"B/op":      {100_000},
+					"allocs/op": {1000},
+				},
+			},
+			new: benchSamples{
+				"BenchmarkFoo-8": {
+					"sec/op":    noisy(9e-3, 6),
+					"B/op":      {900_000},
+					"allocs/op": {9000},
+				},
+			},
+			wantUnits: []string{"allocs/op", "B/op", "sec/op"},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			report, violations, issues := compare(tt.old, tt.new, testGates())
+			units := make([]string, 0, len(violations))
+			for _, v := range violations {
+				units = append(units, v.unit)
+			}
+			assert.Empty(t, issues)
+			if len(tt.wantUnits) == 0 {
+				assert.Empty(t, violations)
+			} else {
+				assert.Equal(t, tt.wantUnits, units)
+			}
+			joined := strings.Join(report, "\n")
+			for _, want := range tt.wantReport {
+				assert.Contains(t, joined, want)
+			}
+		})
+	}
+}
+
+// TestCompareOutlierRunPolicy pins the split policy for a single
+// outlier among repeated -count runs of one benchmark. allocs/op is
+// deterministic for identical code and iteration counts, so one
+// outlier run means a real intermittent allocation path and must
+// fail even though the median is unchanged. Wall-clock time is
+// summarized by its median, so one slow run on a noisy runner cannot
+// fail the gate on its own.
+func TestCompareOutlierRunPolicy(t *testing.T) {
+	t.Run("alloc outlier run fails", func(t *testing.T) {
+		old := benchSamples{
+			"BenchmarkFoo-8": {"allocs/op": {1000, 1000, 1000, 1000, 1000}},
+		}
+		next := benchSamples{
+			"BenchmarkFoo-8": {"allocs/op": {1000, 1000, 9000, 1000, 1000}},
+		}
+		_, violations, issues := compare(old, next, testGates())
+		require.Len(t, violations, 1)
+		assert.Empty(t, issues)
+		assert.Equal(t, "allocs/op", violations[0].unit)
+		assert.InDelta(t, 9000, violations[0].new, 1e-9,
+			"the worst run is what gets gated")
+	})
+
+	t.Run("time outlier run does not fail", func(t *testing.T) {
+		old := benchSamples{
+			"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 6)},
+		}
+		next := benchSamples{
+			"BenchmarkFoo-8": {"sec/op": append(noisy(1e-3, 5), 9e-3)},
+		}
+		_, violations, issues := compare(old, next, testGates())
+		assert.Empty(t, issues)
+		assert.Empty(t, violations)
+	})
+}
+
+// TestCompareMissingCandidateUnit pins the asymmetric missing-unit
+// policy: a gated unit the baseline has but the candidate lost
+// (e.g. -benchmem dropped from the candidate run) is a config error
+// so the gate exits 2 instead of silently disabling that metric.
+func TestCompareMissingCandidateUnit(t *testing.T) {
+	old := benchSamples{
+		"BenchmarkFoo-8": {
+			"sec/op":    noisy(1e-3, 6),
+			"allocs/op": {1000},
+		},
+	}
+	next := benchSamples{
+		"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 6)},
+	}
+	report, violations, issues := compare(old, next, testGates())
+	assert.Empty(t, violations)
+	require.Len(t, issues, 1)
+	assert.Contains(t, issues[0].msg,
+		"allocs/op present in baseline but missing from candidate")
+	assert.Contains(t, strings.Join(report, "\n"),
+		"allocs/op missing from candidate")
+}
+
+// TestCompareTimeGateSampleCounts pins the asymmetric sample-count
+// policy: too few candidate samples is a configuration error (the
+// candidate run is under the workflow's control), while a short
+// baseline is merely reported and not gated (the base run may
+// legitimately be partial).
+func TestCompareTimeGateSampleCounts(t *testing.T) {
+	t.Run("too few candidate samples is a config issue", func(t *testing.T) {
+		old := benchSamples{"BenchmarkFoo-8": {"sec/op": noisy(1e-3, 6)}}
+		next := benchSamples{"BenchmarkFoo-8": {"sec/op": {2e-3}}}
+		_, violations, issues := compare(old, next, testGates())
+		assert.Empty(t, violations)
+		require.Len(t, issues, 1)
+		assert.Contains(t, issues[0].msg, "at least 5 candidate samples")
+	})
+
+	t.Run("short baseline is reported, not a config issue", func(t *testing.T) {
+		old := benchSamples{"BenchmarkFoo-8": {"sec/op": {1e-3, 1e-3}}}
+		next := benchSamples{"BenchmarkFoo-8": {"sec/op": noisy(9e-3, 6)}}
+		report, violations, issues := compare(old, next, testGates())
+		assert.Empty(t, violations)
+		assert.Empty(t, issues)
+		assert.Contains(t, strings.Join(report, "\n"),
+			"baseline has only 2 sample(s)")
+	})
+}
+
+func TestRender(t *testing.T) {
+	sampleViolation := violation{
+		name: "BenchmarkFoo-8", unit: "allocs/op",
+		old: 1000, new: 2000, ratio: 2.0, maxRatio: 1.25,
+	}
+	tests := []struct {
+		name     string
+		r        results
+		wantCode int
+		wantOut  []string
+	}{
+		{
+			name:     "clean run passes",
+			r:        results{report: []string{"BenchmarkFoo-8: ok"}, newCount: 1},
+			wantCode: 0,
+			wantOut:  []string{"no regressions beyond thresholds"},
+		},
+		{
+			name: "violations exit 1",
+			r: results{
+				violations: []violation{sampleViolation},
+				newCount:   1,
+			},
+			wantCode: 1,
+			wantOut:  []string{"1 regression(s)", "allocs/op regressed 2.00x"},
+		},
+		{
+			name: "violations still print when a config issue forces exit 2",
+			r: results{
+				violations: []violation{sampleViolation},
+				issues:     []configIssue{{name: "BenchmarkFoo-8", msg: "too few"}},
+				newCount:   1,
+			},
+			wantCode: 2,
+			wantOut: []string{
+				"allocs/op regressed 2.00x",
+				"invalid benchmark configuration",
+			},
+		},
+		{
+			name: "corrupted capture exits 2 and is described",
+			r: results{
+				newCount:  1,
+				newSyntax: []string{"test:3: no iteration count"},
+			},
+			wantCode: 2,
+			wantOut: []string{
+				"candidate capture is corrupted",
+				"no iteration count",
+			},
+		},
+		{
+			name:     "empty candidate exits 2",
+			r:        results{newCount: 0},
+			wantCode: 2,
+			wantOut:  []string{"candidate output contains no benchmarks"},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out, code := render(tt.r)
+			assert.Equal(t, tt.wantCode, code)
+			for _, want := range tt.wantOut {
+				assert.Contains(t, out, want)
+			}
+		})
+	}
+}
+
+func TestViolationString(t *testing.T) {
+	v := violation{
+		name: "BenchmarkFoo-8", unit: "allocs/op",
+		old: 1000, new: 2000, ratio: 2.0, maxRatio: 1.25,
+	}
+	got := v.String()
+	assert.Contains(t, got, "BenchmarkFoo-8")
+	assert.Contains(t, got, "allocs/op regressed 2.00x")
+	assert.Contains(t, got, "limit 1.25x")
+}
diff --git a/docs/internal/performance-gates.md b/docs/internal/performance-gates.md
new file mode 100644
index 000000000..094530484
--- /dev/null
+++ b/docs/internal/performance-gates.md
@@ -0,0 +1,154 @@
+# Performance Gates
+
+agentsview has repeatedly shipped performance regressions where sync work
+stopped scaling with *new data* and started scaling with *archive size*. This
+document records the regression classes we have actually hit, and the gates that
+now guard each one. When you touch a sync or DB hot path, know which gate covers
+you; when you fix a new class of regression, add a gate here.
+
+## Regression history
+
+| Class                          | What happened                                                                                                                                                                                           | Fixed in                                       |
+| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
+| Discovery O(sources) root work | Gemini rebuilt its project map per session; positron/vscode-copilot re-read `workspace.json` per session. A large store spent 2m47s in discovery.                                                       | #912                                           |
+| Unchanged sources reparsed     | The provider migration dropped pre-parse DB-freshness skips; every full sync reparsed and rewrote untouched sessions.                                                                                   | `providerSourceUnchangedInDB` (#883 follow-up) |
+| O(history) incremental appends | Every streamed line ran a full signal recompute (reload all messages, secret regex scan) and chunk merges delete+reinserted every message row. ~4,700 session updates/day each paid O(session history). | #954                                           |
+| Bulk ingest throughput         | Full resync ran per-row inserts and rebuilt FTS incrementally; 26.7k sessions took 1m17s.                                                                                                               | #411                                           |
+| Event storms                   | One SSE emit per watcher flush drove ~1/s dashboard refetch; SQLite WAL sidecar events fanned out to every session in a shared DB.                                                                      | #367, #956                                     |
+| Per-row query shape            | `GetDailyUsage` ran 1.2M `json_extract` calls per scan and had no date pushdown.                                                                                                                        | #309                                           |
+
+## Two layers of gates
+
+### 1. Deterministic work-count invariants (run in `make test`)
+
+These count work units instead of measuring time, so they are immune to CI
+runner noise and fail loudly:
+
+- `TestWarmFullSyncDoesNoBulkWriteWork` (`internal/sync/perf_invariant_test.go`)
+  — a second full sync over an unchanged Claude archive must skip everything
+  and run zero bulk-write batches (`Engine.PhaseStats`).
+- `TestProviderAuthoritativeUnchangedSessionSkipsOnResync`
+  (`internal/sync/provider_freshness_integration_test.go`) — the generic
+  freshness skip for provider-authoritative agents, Vibe as representative.
+- `TestWriteIncrementalDebouncesSignalRecompute` and the rest of
+  `internal/sync/signal_schedule_test.go` — streaming appends must debounce
+  the O(history) signal recompute.
+- The count-based seam tests in `internal/parser`
+  (`discovery_workspace_manifest_test.go`, gemini/antigravity provider tests)
+  — root-derived project info is built once per root, not once per source.
+- `internal/server/broadcaster_test.go` — SSE emits coalesce to at most one
+  broadcast per window.
+
+When you fix a performance bug, prefer adding a gate at this layer: expose or
+reuse a counter (`SyncStats`, `PhaseStats`, `AnomalyStats`, a swappable
+package-var seam) and assert the invariant, e.g. "second sync parses zero
+sessions" or "the manifest is read once per root regardless of session count".
+
+### 2. Benchmark gate (runs on every PR via `bench.yml`)
+
+`.github/workflows/bench.yml` runs `make bench-gate` — the single source of
+truth for the gated package list, sample count, and iteration count — on the PR
+head and its merge base on the same runner, then compares the outputs with
+`cmd/benchgate`:
+
+- `BenchmarkSyncAllWarmNoop` — full sync over an already-synced archive (stat +
+  skip work only; also self-asserts nothing is re-synced or bulk-rewritten).
+- `BenchmarkSyncPathsIncrementalAppend` — absorb one appended line into a
+  1,000-message session.
+- `BenchmarkSyncAllColdArchive` — first-sync ingest throughput through the
+  default per-session write path.
+- `BenchmarkResyncBulkIngest` — the same archive through the resync bulk-write
+  pipeline (`writeBatchBulk` / `DB.WriteSessionBatch`, the #411 regression
+  class); self-asserts every session took the batch path.
+- `BenchmarkReplaceSessionMessagesStreamingMerge` — the streaming chunk-merge
+  diff path (one UPDATE, not a full delete+reinsert).
+- `BenchmarkInsertMessagesBatch` — multi-row batched ingest.
+- `BenchmarkGetDailyUsage` — usage aggregation over 100k message rows.
+- `BenchmarkScan` / `BenchmarkScanDefinite` — secret-scan regex throughput.
+
+`benchgate` builds on `golang.org/x/perf`: `benchfmt` parses the output and
+`benchmath` — the statistics engine behind `benchstat` — summarizes samples and
+tests significance (Mann-Whitney U). benchgate adds only the policy benchstat
+does not provide: thresholds, floors, and a failing exit code. Gating is per
+benchmark — any single benchmark over its threshold fails the PR; nothing is
+averaged across benchmarks. It gates hard on `allocs/op` (limit 1.25x) and
+`B/op` (1.35x), which are deterministic for the same code and iteration count —
+an O(archive)-instead-of-O(delta) regression always blows them up. Those two
+compare the candidate's *worst* `-count` run against the baseline median, so
+even an intermittent extra-allocation path fails. That is intentionally
+asymmetric: the baseline is treated as the historical reference, and candidate
+instability is what blocks the PR (failure lines include the baseline's worst
+run so pre-existing instability is visible). Time (`sec/op`) compares medians
+with a loose 2.0x limit and must additionally be a statistically significant
+difference before it fails, so a single slow run on a noisy runner cannot flake
+a PR but algorithmic blowups still do. Time gating requires at least 5 candidate
+samples; fewer is reported as a configuration error (the candidate run is under
+the workflow's control), while a baseline with fewer than 5 samples — a
+legitimately partial base run — is reported and not gated. Baselines below a
+per-metric floor are not gated. Benchmarks that exist on only one side are
+reported but never fail, so adding or removing benchmarks cannot wedge a PR.
+Only `allocs/op`, `B/op`, and `sec/op` are gated: custom `b.ReportMetric` units
+are collected and reported as ungated, never enforced.
+
+Two failure modes are treated as loud configuration errors (exit 2) rather than
+silent gaps: a capture whose result lines fail to parse (for example test log
+output interleaved into a `Benchmark...` line — the sync benchmarks silence the
+engine's logger for exactly this reason), and a gated unit present in the
+baseline but missing from the candidate (for example a candidate captured
+without `-benchmem`), which would otherwise silently disable that gate for good.
+The reverse — a gated unit missing from the baseline, which may legitimately be
+older or partial — is reported as not gated.
+
+The gate always runs with a fixed `-benchtime=Nx` iteration count (not a
+duration): two of the benchmarks grow their fixture as they iterate, so the
+baseline and candidate must run the same number of iterations to measure
+identical workloads. CI evaluates `make bench-gate-config` on the PR head and
+passes the count and benchtime into the merge-base run, so a PR that changes
+those defaults still compares identical workloads; do the same locally if you
+override them.
+
+Report identifiers are package-qualified benchmark names
+(`go.kenn.io/agentsview/internal/db.InsertMessagesBatch-18`) when the captured
+output carries `pkg:` metadata, falling back to the bare name when it does not
+(e.g. hand-trimmed captures). Do not mix captures with and without `pkg:` lines:
+the same benchmark would key differently and be treated as removed/new.
+
+Run locally, comparing your working tree against a baseline commit. Like CI, use
+a worktree for the baseline — checking out or stashing in place can leave
+candidate files (or your commits) in the baseline run:
+
+```bash
+make bench-gate > new.txt                # candidate: current tree
+git worktree add /tmp/bench-base "$(git merge-base HEAD origin/main)"
+make -C /tmp/bench-base bench-gate > old.txt
+git worktree remove /tmp/bench-base
+go run ./cmd/benchgate -old old.txt -new new.txt
+```
+
+Cross-backend query benchmarks live separately in `internal/backendbench`
+(`make bench-backends`, requires Docker) and are not part of the PR gate.
+
+## Adding a benchmark to the gate
+
+Every benchmark in a gated package is gated — there is no per-name allowlist to
+maintain. A benchmark added by a PR has no baseline, so its first run is
+reported without gating; it gates automatically once merged.
+
+1. Write the benchmark next to the code it guards (`*_bench_test.go`,
+   `b.ReportAllocs()`, self-assert the invariant it protects where possible).
+   If the code under test logs, silence the logger in the benchmark (see
+   `silenceBenchLogs` in `internal/sync/engine_bench_test.go`): interleaved
+   log output corrupts result lines and benchgate fails on the corruption.
+1. If its package is not already gated, add it to `BENCH_GATE_PACKAGES` in the
+   Makefile — a benchmark outside the gated packages silently never runs, so
+   it looks gated while measuring nothing. CI picks the list up from the
+   Makefile; each side of the comparison benchmarks its own commit's list, so
+   growing the gate cannot break the base run.
+1. Keep per-op cost roughly in the 100µs–100ms band: below the benchgate floors
+   nothing is gated, and far above it the job gets slow.
+1. Keep per-iteration setup out of the timed region (`b.ResetTimer`, pre-built
+   fixtures): helper allocations inside the loop are gated as if they were
+   product cost and dilute or distort the ratio.
+1. If the benchmark's fixture grows across iterations, say so in its comment;
+   the fixed `-benchtime=Nx` keeps both sides comparable, but readers need to
+   know per-op cost depends on the iteration count.
diff --git a/go.mod b/go.mod
index 236d14d8f..d32a5084e 100644
--- a/go.mod
+++ b/go.mod
@@ -26,6 +26,7 @@ require (
 	github.com/tidwall/gjson v1.19.0
 	go.kenn.io/kit v0.1.7
 	golang.org/x/mod v0.37.0
+	golang.org/x/perf v0.0.0-20260615155930-9e4b9ddef5b6
 	golang.org/x/sync v0.21.0
 	golang.org/x/sys v0.46.0
 	golang.org/x/term v0.44.0
@@ -35,6 +36,7 @@ require (
 	dario.cat/mergo v1.0.2 // indirect
 	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
+	github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 // indirect
 	github.com/apache/arrow-go/v18 v18.5.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
@@ -109,12 +111,12 @@ require (
 	go.opentelemetry.io/otel/metric v1.43.0 // indirect
 	go.opentelemetry.io/otel/trace v1.43.0 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
-	golang.org/x/crypto v0.51.0 // indirect
-	golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect
-	golang.org/x/net v0.54.0 // indirect
-	golang.org/x/oauth2 v0.35.0 // indirect
+	golang.org/x/crypto v0.53.0 // indirect
+	golang.org/x/exp v0.0.0-20260603202125-055de637280b // indirect
+	golang.org/x/net v0.56.0 // indirect
+	golang.org/x/oauth2 v0.36.0 // indirect
 	golang.org/x/telemetry v0.0.0-20260508192327-42602be52be6 // indirect
-	golang.org/x/text v0.37.0 // indirect
+	golang.org/x/text v0.38.0 // indirect
 	golang.org/x/tools v0.45.0 // indirect
 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
 	gopkg.in/ini.v1 v1.67.2 // indirect
diff --git a/go.sum b/go.sum
index 07b890a25..d347c22a3 100644
--- a/go.sum
+++ b/go.sum
@@ -8,6 +8,8 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk
 github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
+github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 h1:xlwdaKcTNVW4PtpQb8aKA4Pjy0CdJHEqvFbAnvR5m2g=
+github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794/go.mod h1:7e+I0LQFUI9AXWxOfsQROs9xPhoJtbsyWcjJqDd4KPY=
 github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro=
 github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
 github.com/apache/arrow-go/v18 v18.5.1 h1:yaQ6zxMGgf9YCYw4/oaeOU3AULySDlAYDOcnr4LdHdI=
@@ -254,16 +256,18 @@ go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09
 go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI=
-golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8=
-golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU=
-golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU=
+golang.org/x/crypto v0.53.0 h1:QZ4Muo8THX6CizN2vPPd5fBGHyogrdK9fG4wLPFUsto=
+golang.org/x/crypto v0.53.0/go.mod h1:DNLU434OwVakk9PzuwV8w62mAJpRJL3vsgcfp4Qnsio=
+golang.org/x/exp v0.0.0-20260603202125-055de637280b h1:v1uXiEBHo8QA0LiGCo7UgHMzHT4Kdfpl2zmtH5vaP1Q=
+golang.org/x/exp v0.0.0-20260603202125-055de637280b/go.mod h1:d2fgXJLVs4dYDHUk5lwMIfzRzSrWCfGZb0ZqeLa/Vcw=
 golang.org/x/mod v0.37.0 h1:vF1DjpVEshcIqoEaauuHebaLk1O1forxjxBaVn884JQ=
 golang.org/x/mod v0.37.0/go.mod h1:m8S8VeM9r4dzDwjrKO0a1sZP3YjeMamRRlD+fmR2Q/0=
-golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w=
-golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ=
-golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
-golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
+golang.org/x/net v0.56.0 h1:Rw8j/hFzGvJUZwNBXnAtf5sVDVt+65SK2C7IxCxZt5o=
+golang.org/x/net v0.56.0/go.mod h1:D3Ku6r+V6JROoZK144D2XfMHFcMq/0zSfLelVTCFKec=
+golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs=
+golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q=
+golang.org/x/perf v0.0.0-20260615155930-9e4b9ddef5b6 h1:YTGOochus7nQXtT5KnsVfJ3/1yslyOyP3m9vMG1qMuQ=
+golang.org/x/perf v0.0.0-20260615155930-9e4b9ddef5b6/go.mod h1:FisaKCtzcRx02gCop3DILSnoD7CMnqwmde2yVxo4meM=
 golang.org/x/sync v0.21.0 h1:HLII4xRRTtCRkxYp4HNFF0Js/Og6q2i++KXbg0gHCwM=
 golang.org/x/sync v0.21.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -275,8 +279,8 @@ golang.org/x/telemetry v0.0.0-20260508192327-42602be52be6 h1:HjU6IWBiAgRIdAJ9/y1
 golang.org/x/telemetry v0.0.0-20260508192327-42602be52be6/go.mod h1:Eqhaxk/wZsWEH8CRxLwj6xzEJbz7k1EFGqx7nyCoabE=
 golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc=
 golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y=
-golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
-golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
+golang.org/x/text v0.38.0 h1:sXmwo9DwP3OK9EZ7PqAdaooSGozfl/3a6/xJcbzPRhE=
+golang.org/x/text v0.38.0/go.mod h1:YXZt3QhHUKYT53r2lLKFIVi6Ao1jdzrTR/KQ09qyxF4=
 golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
 golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/internal/db/db_test.go b/internal/db/db_test.go
index acbb3b311..4ced0f394 100644
--- a/internal/db/db_test.go
+++ b/internal/db/db_test.go
@@ -300,13 +300,13 @@ const (
 	tsMidYear = "2024-06-01T10:00:00Z"
 )
 
-func testDB(t *testing.T) *DB {
-	t.Helper()
-	dir := t.TempDir()
+func testDB(tb testing.TB) *DB {
+	tb.Helper()
+	dir := tb.TempDir()
 	path := filepath.Join(dir, "test.db")
 	d, err := openCopiedTestDB(path)
-	require.NoError(t, err, "opening test db")
-	t.Cleanup(func() { require.NoError(t, d.Close()) })
+	require.NoError(tb, err, "opening test db")
+	tb.Cleanup(func() { require.NoError(tb, d.Close()) })
 	return d
 }
 
diff --git a/internal/db/messages_bench_test.go b/internal/db/messages_bench_test.go
new file mode 100644
index 000000000..73a9eebfb
--- /dev/null
+++ b/internal/db/messages_bench_test.go
@@ -0,0 +1,146 @@
+package db
+
+import (
+	"encoding/json"
+	"fmt"
+	"testing"
+)
+
+// Hot-path benchmarks for the message write and usage aggregation
+// paths that have regressed in the past. CI's bench-gate workflow
+// runs them on every PR and compares allocs/op, B/op, and ns/op
+// against the merge base, so a reintroduced O(session-history)
+// rewrite or per-row JSON parse fails the PR instead of shipping:
+//
+//   - BenchmarkReplaceSessionMessagesStreamingMerge: a one-row tail
+//     change must take the in-place diff path (one UPDATE) rather
+//     than delete+reinserting every row and rewriting the FTS index
+//     (regressed pre-#954: streaming chunk merges rewrote whole
+//     sessions on every appended chunk).
+//   - BenchmarkInsertMessagesBatch: bulk ingest must keep multi-row
+//     batched inserts (#411).
+//
+// BenchmarkGetDailyUsage in usage_test.go covers the usage
+// aggregation scan (#309) and is part of the same CI gate.
+
+// benchSessionMessages builds n alternating user/assistant messages.
+// Assistant messages carry a model and token_usage payload so writes
+// exercise the same columns real ingest does.
+func benchSessionMessages(sessionID string, n int) []Message {
+	msgs := make([]Message, 0, n)
+	for i := range n {
+		role := "user"
+		content := fmt.Sprintf(
+			"user message %d with enough text to look real", i,
+		)
+		if i%2 == 1 {
+			role = "assistant"
+			content = fmt.Sprintf(
+				"assistant reply %d with enough text to look real", i,
+			)
+		}
+		m := Message{
+			SessionID:     sessionID,
+			Ordinal:       i,
+			Role:          role,
+			Content:       content,
+			Timestamp:     fmt.Sprintf("2026-06-%02dT10:00:00Z", 1+i%28),
+			ContentLength: len(content),
+		}
+		if role == "assistant" {
+			m.Model = "claude-bench-model"
+			m.TokenUsage = json.RawMessage(
+				`{"input_tokens":120,"output_tokens":45,` +
+					`"cache_creation_input_tokens":10,` +
+					`"cache_read_input_tokens":200}`,
+			)
+		}
+		msgs = append(msgs, m)
+	}
+	return msgs
+}
+
+func seedBenchSession(
+	b *testing.B, d *DB, sessionID string, n int,
+) []Message {
+	b.Helper()
+	if err := d.UpsertSession(Session{
+		ID:      sessionID,
+		Project: "bench",
+		Machine: "local",
+		Agent:   "claude",
+	}); err != nil {
+		b.Fatalf("seed session %s: %v", sessionID, err)
+	}
+	msgs := benchSessionMessages(sessionID, n)
+	if err := d.InsertMessages(msgs); err != nil {
+		b.Fatalf("seed messages for %s: %v", sessionID, err)
+	}
+	return msgs
+}
+
+// BenchmarkReplaceSessionMessagesStreamingMerge measures the
+// streaming chunk-merge shape: replacing a stored session where only
+// the tail message's content changed. The diff planner must apply a
+// single in-place UPDATE; cost must not scale with the number of
+// unchanged stored rows being rewritten.
+func BenchmarkReplaceSessionMessagesStreamingMerge(b *testing.B) {
+	const stored = 1000
+	d := testDB(b)
+	msgs := seedBenchSession(b, d, "bench-replace", stored)
+	last := len(msgs) - 1
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := range b.N {
+		content := fmt.Sprintf(
+			"assistant reply %d merged streaming tail variant %d",
+			last, i,
+		)
+		msgs[last].Content = content
+		msgs[last].ContentLength = len(content)
+		if err := d.ReplaceSessionMessages("bench-replace", msgs); err != nil {
+			b.Fatalf("replace: %v", err)
+		}
+	}
+}
+
+// BenchmarkInsertMessagesBatch measures bulk session ingest: one
+// session row plus a batch insert of its messages, the unit of work
+// the full-sync write pipeline performs per session.
+//
+// Each iteration adds a new session, so the database grows with the
+// iteration count and per-op cost is only comparable between runs
+// with the same count: the bench gate always runs with a fixed
+// -benchtime=Nx (see bench.yml and the Makefile) so baseline and
+// candidate insert into identically sized databases.
+func BenchmarkInsertMessagesBatch(b *testing.B) {
+	const batch = 200
+	d := testDB(b)
+
+	// Build the message fixture once: constructing 200 Message
+	// structs (~400 fmt.Sprintf calls) inside the timed loop would
+	// be gated as if it were ingest cost. Only the SessionID is
+	// rewritten per iteration, which allocates nothing.
+	msgs := benchSessionMessages("", batch)
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := range b.N {
+		sid := fmt.Sprintf("bench-insert-%06d", i)
+		if err := d.UpsertSession(Session{
+			ID:      sid,
+			Project: "bench",
+			Machine: "local",
+			Agent:   "claude",
+		}); err != nil {
+			b.Fatalf("upsert session: %v", err)
+		}
+		for j := range msgs {
+			msgs[j].SessionID = sid
+		}
+		if err := d.InsertMessages(msgs); err != nil {
+			b.Fatalf("insert messages: %v", err)
+		}
+	}
+}
diff --git a/internal/db/usage_test.go b/internal/db/usage_test.go
index c0ae860e8..194d117d9 100644
--- a/internal/db/usage_test.go
+++ b/internal/db/usage_test.go
@@ -2910,7 +2910,7 @@ func TestExcludeModelFilter(t *testing.T) {
 }
 
 func BenchmarkGetDailyUsage(b *testing.B) {
-	d := testDB(&testing.T{})
+	d := testDB(b)
 	ctx := context.Background()
 
 	if err := d.UpsertModelPricing([]ModelPricing{
diff --git a/internal/sync/engine_bench_test.go b/internal/sync/engine_bench_test.go
new file mode 100644
index 000000000..9259e3ff4
--- /dev/null
+++ b/internal/sync/engine_bench_test.go
@@ -0,0 +1,381 @@
+package sync
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"strconv"
+	"testing"
+	"time"
+
+	"go.kenn.io/agentsview/internal/db"
+	"go.kenn.io/agentsview/internal/parser"
+	"go.kenn.io/agentsview/internal/testjsonl"
+)
+
+// Hot-path benchmarks for the sync engine, covering the regression
+// classes that have shipped before. CI's bench-gate workflow runs
+// them on every PR and compares allocs/op, B/op, and ns/op against
+// the merge base:
+//
+//   - BenchmarkSyncAllWarmNoop: a full sync over an already-synced,
+//     unchanged archive must do stat+skip work only. Regressed when
+//     the provider migration dropped pre-parse DB-freshness skips
+//     and every full sync reparsed and rewrote unchanged sessions
+//     (fixed by providerSourceUnchangedInDB), and when discovery
+//     recomputed root-derived project info per source (#912).
+//   - BenchmarkSyncPathsIncrementalAppend: absorbing one appended
+//     line into a large session must scale with the appended data,
+//     not the stored history (#954).
+//   - BenchmarkSyncAllColdArchive: first-sync ingest throughput for
+//     a fresh archive through the default per-session write path.
+//   - BenchmarkResyncBulkIngest: the same archive through the
+//     resync bulk-write pipeline (writeBatchBulk /
+//     DB.WriteSessionBatch, the #411 regression class).
+//
+// Fixture sizes scale via AGENTSVIEW_BENCH_SYNC_SESSIONS and
+// AGENTSVIEW_BENCH_SYNC_MESSAGES for larger local runs.
+
+const (
+	defaultBenchSyncSessions = 40
+	defaultBenchSyncMessages = 30
+	benchLargeSessionLines   = 1000
+)
+
+// silenceBenchLogs discards the engine's global log output for the
+// duration of the benchmark. Log lines interleave with `go test
+// -bench` result lines on stdout, which corrupts them so benchfmt
+// cannot parse the benchmark — benchgate fails on such corruption,
+// and before it did, the corrupted benchmarks silently vanished
+// from the gate on both sides.
+func silenceBenchLogs(b *testing.B) {
+	b.Helper()
+	prev := log.Writer()
+	log.SetOutput(io.Discard)
+	b.Cleanup(func() { log.SetOutput(prev) })
+}
+
+func benchIntFromEnv(name string, def int) int {
+	raw := os.Getenv(name)
+	if raw == "" {
+		return def
+	}
+	n, err := strconv.Atoi(raw)
+	if err != nil || n <= 0 {
+		return def
+	}
+	return n
+}
+
+// writeBenchClaudeArchive lays out `sessions` Claude JSONL session
+// files of `perSession` alternating user/assistant messages under
+// dir/bench-project, mirroring the on-disk shape SyncAll discovers.
+func writeBenchClaudeArchive(
+	b *testing.B, dir string, sessions, perSession int,
+) {
+	b.Helper()
+	proj := filepath.Join(dir, "bench-project")
+	if err := os.MkdirAll(proj, 0o755); err != nil {
+		b.Fatalf("MkdirAll: %v", err)
+	}
+	for s := range sessions {
+		builder := testjsonl.NewSessionBuilder()
+		for m := 0; m < perSession; m += 2 {
+			ts := fmt.Sprintf(
+				"2026-06-20T10:%02d:%02dZ", (m/2/60)%60, (m/2)%60,
+			)
+			builder.AddClaudeUser(ts, fmt.Sprintf(
+				"user message %d in session %d", m, s,
+			))
+			builder.AddClaudeAssistant(ts, fmt.Sprintf(
+				"assistant reply %d in session %d", m, s,
+			))
+		}
+		path := filepath.Join(
+			proj, fmt.Sprintf("bench-%04d.jsonl", s),
+		)
+		if err := os.WriteFile(
+			path, []byte(builder.String()), 0o644,
+		); err != nil {
+			b.Fatalf("WriteFile %s: %v", path, err)
+		}
+	}
+}
+
+// openBenchEngine opens a fresh SQLite DB and an engine watching dir
+// as a Claude root. Cleanup closes the engine before the DB so any
+// pending debounced signal recompute drains first.
+func openBenchEngine(b *testing.B, dir string) (*Engine, *db.DB) {
+	b.Helper()
+	database, err := db.Open(filepath.Join(b.TempDir(), "bench.db"))
+	if err != nil {
+		b.Fatalf("open bench db: %v", err)
+	}
+	engine := NewEngine(database, EngineConfig{
+		AgentDirs: map[parser.AgentType][]string{
+			parser.AgentClaude: {dir},
+		},
+		Machine: "local",
+	})
+	b.Cleanup(func() {
+		engine.Close()
+		if err := database.Close(); err != nil {
+			b.Errorf("close bench db: %v", err)
+		}
+	})
+	return engine, database
+}
+
+// BenchmarkSyncAllWarmNoop measures a full sync pass over an archive
+// that is already fully synced and unchanged on disk: discovery plus
+// per-source freshness skips. It also asserts the invariant the
+// benchmark exists to protect — a warm no-op pass must not reparse
+// or rewrite anything.
+func BenchmarkSyncAllWarmNoop(b *testing.B) {
+	silenceBenchLogs(b)
+	sessions := benchIntFromEnv(
+		"AGENTSVIEW_BENCH_SYNC_SESSIONS", defaultBenchSyncSessions,
+	)
+	perSession := benchIntFromEnv(
+		"AGENTSVIEW_BENCH_SYNC_MESSAGES", defaultBenchSyncMessages,
+	)
+	dir := b.TempDir()
+	writeBenchClaudeArchive(b, dir, sessions, perSession)
+	engine, _ := openBenchEngine(b, dir)
+	ctx := context.Background()
+
+	first := engine.SyncAll(ctx, nil)
+	if first.Synced != sessions {
+		b.Fatalf(
+			"initial sync stored %d of %d sessions (failed=%d)",
+			first.Synced, sessions, first.Failed,
+		)
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for range b.N {
+		stats := engine.SyncAll(ctx, nil)
+		if stats.Synced != 0 {
+			b.Fatalf(
+				"warm no-op sync re-synced %d sessions", stats.Synced,
+			)
+		}
+		if writes := engine.PhaseStats().BatchedWrites.Load(); writes != 0 {
+			b.Fatalf(
+				"warm no-op sync bulk-wrote %d sessions", writes,
+			)
+		}
+	}
+}
+
+// BenchmarkSyncPathsIncrementalAppend measures absorbing a single
+// appended JSONL line into a session that already stores
+// benchLargeSessionLines messages, the streaming write the serve
+// daemon performs thousands of times per day.
+//
+// The session grows by one message per iteration, so per-op cost is
+// only comparable between runs with the same iteration count: the
+// bench gate always runs with a fixed -benchtime=Nx (see bench.yml
+// and the Makefile) so baseline and candidate absorb appends into
+// identically sized sessions. Growth is deliberate — per-append cost
+// staying flat as the session grows is exactly the invariant this
+// benchmark protects.
+func BenchmarkSyncPathsIncrementalAppend(b *testing.B) {
+	silenceBenchLogs(b)
+	dir := b.TempDir()
+	writeBenchClaudeArchive(b, dir, 1, benchLargeSessionLines)
+	engine, database := openBenchEngine(b, dir)
+	ctx := context.Background()
+
+	first := engine.SyncAll(ctx, nil)
+	if first.Synced != 1 {
+		b.Fatalf(
+			"initial sync stored %d sessions (failed=%d)",
+			first.Synced, first.Failed,
+		)
+	}
+
+	path := filepath.Join(dir, "bench-project", "bench-0000.jsonl")
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		b.Fatalf("OpenFile: %v", err)
+	}
+	defer f.Close()
+
+	// The first append after a full sync triggers the leading-edge
+	// inline signal recompute — an O(stored history) message reload
+	// plus secret scan whose one-time cost would otherwise dominate
+	// the fixed 20-iteration measurement and mask steady-state
+	// per-append regressions. Absorb it before the timer starts so
+	// the timed loop measures the debounced steady state.
+	warmup := testjsonl.NewSessionBuilder().AddClaudeUser(
+		"2026-06-20T11:00:00Z", "warm-up append",
+	).String()
+	if _, err := f.WriteString(warmup); err != nil {
+		b.Fatalf("warm-up append: %v", err)
+	}
+	engine.SyncPaths([]string{path})
+
+	// Stretch the debounce window so the flush timer cannot fire
+	// inside the timed loop on a slow run: allocs/op is gated on the
+	// candidate's worst -count run, and one timer-driven O(history)
+	// recompute landing mid-loop would flake the gate. Engine.Close
+	// drains the deferred recompute during cleanup.
+	engine.signalSched.mu.Lock()
+	engine.signalSched.interval = time.Hour
+	engine.signalSched.quiet = time.Hour
+	engine.signalSched.mu.Unlock()
+
+	// Pre-build the appended lines: constructing Claude JSONL via
+	// testjsonl allocates (map + json.Marshal), and inside the timed
+	// loop that helper cost would be gated as if it were sync work.
+	lines := make([]string, b.N)
+	for i := range lines {
+		lines[i] = testjsonl.NewSessionBuilder().AddClaudeUser(
+			"2026-06-20T11:00:00Z",
+			fmt.Sprintf("streamed line %d", i),
+		).String()
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := range b.N {
+		if _, err := f.WriteString(lines[i]); err != nil {
+			b.Fatalf("append: %v", err)
+		}
+		engine.SyncPaths([]string{path})
+	}
+	b.StopTimer()
+
+	msgs, err := database.GetAllMessages(ctx, "bench-0000")
+	if err != nil {
+		b.Fatalf("GetAllMessages: %v", err)
+	}
+	want := benchLargeSessionLines + 1 + b.N // +1 for the warm-up append
+	if len(msgs) < want {
+		b.Fatalf(
+			"appends were not absorbed: stored %d messages, want >= %d",
+			len(msgs), want,
+		)
+	}
+}
+
+// benchColdArchive is the shared cold-ingest loop: each iteration
+// syncs the same archive into a fresh database via syncOnce, then
+// verify checks the iteration's outcome with the timer stopped.
+func benchColdArchive(
+	b *testing.B,
+	syncOnce func(*Engine) SyncStats,
+	verify func(*Engine, SyncStats, int),
+) {
+	b.Helper()
+	silenceBenchLogs(b)
+	sessions := benchIntFromEnv(
+		"AGENTSVIEW_BENCH_SYNC_SESSIONS", defaultBenchSyncSessions,
+	)
+	perSession := benchIntFromEnv(
+		"AGENTSVIEW_BENCH_SYNC_MESSAGES", defaultBenchSyncMessages,
+	)
+	dir := b.TempDir()
+	writeBenchClaudeArchive(b, dir, sessions, perSession)
+	dbDir := b.TempDir()
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := range b.N {
+		b.StopTimer()
+		dbPath := filepath.Join(dbDir, fmt.Sprintf("cold-%d.db", i))
+		database, err := db.Open(dbPath)
+		if err != nil {
+			b.Fatalf("open bench db: %v", err)
+		}
+		engine := NewEngine(database, EngineConfig{
+			AgentDirs: map[parser.AgentType][]string{
+				parser.AgentClaude: {dir},
+			},
+			Machine: "local",
+		})
+		b.StartTimer()
+
+		stats := syncOnce(engine)
+
+		b.StopTimer()
+		verify(engine, stats, sessions)
+		engine.Close()
+		if err := database.Close(); err != nil {
+			b.Fatalf("close bench db: %v", err)
+		}
+		// Drop this iteration's DB (and WAL/SHM sidecars) so disk
+		// usage stays O(1) instead of retaining b.N populated
+		// databases until the function returns.
+		stale, err := filepath.Glob(dbPath + "*")
+		if err != nil {
+			b.Fatalf("glob %s: %v", dbPath, err)
+		}
+		for _, p := range stale {
+			if err := os.Remove(p); err != nil {
+				b.Fatalf("remove %s: %v", p, err)
+			}
+		}
+		b.StartTimer()
+	}
+}
+
+// BenchmarkSyncAllColdArchive measures first-sync ingest throughput
+// through the public SyncAll path: parse plus the default
+// per-session writes a user's first sync performs.
+func BenchmarkSyncAllColdArchive(b *testing.B) {
+	ctx := context.Background()
+	benchColdArchive(b,
+		func(engine *Engine) SyncStats {
+			return engine.SyncAll(ctx, nil)
+		},
+		func(_ *Engine, stats SyncStats, sessions int) {
+			if stats.Synced != sessions {
+				b.Fatalf(
+					"cold sync stored %d of %d sessions (failed=%d)",
+					stats.Synced, sessions, stats.Failed,
+				)
+			}
+		},
+	)
+}
+
+// BenchmarkResyncBulkIngest measures the bulk-write ingest path a
+// full resync uses: syncWriteBulk routes every parsed session
+// through writeBatchBulk and DB.WriteSessionBatch — the #411
+// regression class. This is a different write path from the default
+// per-session writes BenchmarkSyncAllColdArchive covers, and it
+// self-asserts that every session really went through the batch
+// pipeline so the benchmark cannot silently measure the wrong path.
+func BenchmarkResyncBulkIngest(b *testing.B) {
+	ctx := context.Background()
+	benchColdArchive(b,
+		func(engine *Engine) SyncStats {
+			engine.syncMu.Lock()
+			defer engine.syncMu.Unlock()
+			return engine.syncAllLocked(
+				ctx, nil, time.Time{}, nil, syncWriteBulk, true,
+			)
+		},
+		func(engine *Engine, stats SyncStats, sessions int) {
+			if stats.Synced != sessions {
+				b.Fatalf(
+					"bulk ingest stored %d of %d sessions (failed=%d)",
+					stats.Synced, sessions, stats.Failed,
+				)
+			}
+			writes := engine.PhaseStats().BatchedWrites.Load()
+			if writes != int64(sessions) {
+				b.Fatalf(
+					"bulk ingest wrote %d of %d sessions via the batch pipeline",
+					writes, sessions,
+				)
+			}
+		},
+	)
+}
diff --git a/internal/sync/perf_invariant_test.go b/internal/sync/perf_invariant_test.go
new file mode 100644
index 000000000..7f4ff8d43
--- /dev/null
+++ b/internal/sync/perf_invariant_test.go
@@ -0,0 +1,66 @@
+package sync
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// Deterministic work-count gates for the "sync work scales with new
+// data, not archive size" invariant. Unlike the wall-clock
+// benchmarks in engine_bench_test.go, these run in the regular test
+// suite and count work units, so they fail loudly in CI regardless
+// of runner noise. Companion gates elsewhere:
+//
+//   - TestProviderAuthoritativeUnchangedSessionSkipsOnResync covers
+//     the generic providerSourceUnchangedInDB skip for the
+//     provider.Parse fallthrough group (Vibe as representative).
+//   - TestWriteIncrementalDebouncesSignalRecompute pins the #954
+//     debounce of the O(history) signal recompute.
+//   - The count-based seam tests in internal/parser
+//     (discovery_workspace_manifest_test.go, antigravity/gemini
+//     provider tests) pin O(roots) discovery work (#912).
+
+// TestWarmFullSyncDoesNoBulkWriteWork verifies that a second full
+// sync over an unchanged Claude archive skips every session before
+// the parse and never enters the bulk-write pipeline. Claude has its
+// own pre-parse freshness path (shouldSkipProviderSourceByDB),
+// distinct from the generic check the Vibe test covers; both have
+// regressed independently in the past.
+func TestWarmFullSyncDoesNoBulkWriteWork(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test")
+	}
+
+	fx := newEngineFixture(t)
+	const n = 5
+	for i := range n {
+		fx.writeClaudeSession(
+			t, "proj", fmt.Sprintf("warm-%d.jsonl", i),
+			fmt.Sprintf("hello %d", i),
+		)
+	}
+
+	ctx := context.Background()
+	first := fx.engine.SyncAll(ctx, nil)
+	require.Equal(t, n, first.Synced,
+		"first sync parses and stores every session")
+
+	second := fx.engine.SyncAll(ctx, nil)
+	assert.Equal(t, 0, second.Synced,
+		"unchanged sessions must not be re-synced on a warm pass")
+	assert.GreaterOrEqual(t, second.Skipped, n,
+		"every unchanged session must be counted as skipped")
+
+	// PhaseStats resets at the start of each pass, so after the
+	// second pass it reflects only that pass: a warm no-op sync
+	// must not have run a single bulk-write batch.
+	stats := fx.engine.PhaseStats()
+	assert.Zero(t, stats.Batches.Load(),
+		"warm no-op sync must not run any bulk-write batch")
+	assert.Zero(t, stats.BatchedWrites.Load(),
+		"warm no-op sync must not rewrite any session")
+}