diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py
index 31e9670f9..edd633e67 100644
--- a/.github/scripts/build_csp_pr_comment.py
+++ b/.github/scripts/build_csp_pr_comment.py
@@ -2,9 +2,10 @@
"""Build a sticky PR comment for the CSP benchmarks workflow.
Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per
-circuit) and renders it as a markdown table with human-readable units. If
-``--baseline-csv`` is given, each metric cell appends a percentage delta
-versus the baseline value (last successful CSP-benchmarks run on main).
+(circuit, backend)) and renders one markdown table per backend with
+human-readable units. If ``--baseline-csv`` is given, each metric cell
+appends a percentage delta versus the baseline value (last successful
+CSP-benchmarks run on main) keyed by (circuit, backend).
"""
from __future__ import annotations
@@ -111,30 +112,46 @@ def read_rows(csv_path: Path) -> list[dict[str, str]]:
return list(csv.DictReader(f))
-def index_baseline(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]:
- """Index baseline rows by circuit name with float metric values."""
- out: dict[str, dict[str, float]] = {}
+def index_baseline(rows: list[dict[str, str]]) -> dict[tuple[str, str], dict[str, float]]:
+ """Index baseline rows by (circuit, backend) with float metric values.
+
+ Older baseline CSVs without a `backend` column are treated as `whir`
+ (the only backend that existed before backend-aware benchmarks landed),
+ so deltas remain valid across the schema bump.
+ """
+ out: dict[tuple[str, str], dict[str, float]] = {}
for row in rows:
circuit = (row.get("circuit") or "").strip()
if not circuit:
continue
+ backend = (row.get("backend") or "whir").strip() or "whir"
metrics: dict[str, float] = {}
for metric, _unit in METRIC_COLUMNS:
try:
metrics[metric] = float(row.get(metric) or 0)
except ValueError:
metrics[metric] = 0.0
- out[circuit] = metrics
+ out[(circuit, backend)] = metrics
+ return out
+
+
+def group_by_backend(rows: list[dict[str, str]]) -> dict[str, list[dict[str, str]]]:
+ """Bucket result rows by backend, preserving insertion order of backends."""
+ out: dict[str, list[dict[str, str]]] = {}
+ for row in rows:
+ backend = (row.get("backend") or "whir").strip() or "whir"
+ out.setdefault(backend, []).append(row)
return out
def render_table(
rows: list[dict[str, str]],
- baseline: dict[str, dict[str, float]],
+ backend: str,
+ baseline: dict[tuple[str, str], dict[str, float]],
has_baseline_file: bool,
) -> str:
if not rows:
- return "_No benchmark results were produced._"
+ return "_No benchmark results were produced for this backend._"
header = (
"| Circuit | Constraints | Witnesses | Prover time | Peak RSS | "
@@ -145,7 +162,7 @@ def render_table(
for row in sorted(rows, key=lambda r: r.get("circuit", "")):
circuit = row.get("circuit", "")
- baseline_metrics = baseline.get(circuit)
+ baseline_metrics = baseline.get((circuit, backend))
cells = [f"`{circuit}`"]
for metric, unit in METRIC_COLUMNS:
@@ -169,9 +186,18 @@ def render_table(
return "\n".join(lines)
+# Display order for backends. Anything not listed here is appended in the
+# order it appeared in the CSV.
+BACKEND_DISPLAY_ORDER: tuple[str, ...] = ("whir", "groth16")
+BACKEND_TITLES: dict[str, str] = {
+ "whir": "WHIR backend",
+ "groth16": "Groth16 backend",
+}
+
+
def compose_comment(
rows: list[dict[str, str]],
- baseline: dict[str, dict[str, float]],
+ baseline: dict[tuple[str, str], dict[str, float]],
baseline_run_id: str,
has_baseline_file: bool,
run_id: str,
@@ -181,7 +207,15 @@ def compose_comment(
runs_per_circuit: str,
) -> str:
short_sha = sha[:12] if sha else "unknown"
- table = render_table(rows, baseline, has_baseline_file)
+ by_backend = group_by_backend(rows)
+
+ # Stable backend display order: known backends first, unknown ones after.
+ backends_present = list(by_backend.keys())
+ ordered_backends = [b for b in BACKEND_DISPLAY_ORDER if b in by_backend]
+ ordered_backends += [b for b in backends_present if b not in BACKEND_DISPLAY_ORDER]
+
+ distinct_circuits = sorted({(row.get("circuit") or "") for row in rows})
+ distinct_circuits = [c for c in distinct_circuits if c]
if has_baseline_file:
if baseline_run_id:
@@ -189,13 +223,13 @@ def compose_comment(
f"Each metric cell shows the current value followed by the "
f"percentage delta against the latest successful "
f"[`main` run #{baseline_run_id}](https://github.com/worldfnd/provekit/actions/runs/{baseline_run_id}). "
- f"`(new)` marks circuits absent from the baseline."
+ f"`(new)` marks (circuit, backend) pairs absent from the baseline."
)
else:
baseline_note = (
"Each metric cell shows the current value followed by the "
"percentage delta against the latest successful `main` run. "
- "`(new)` marks circuits absent from the baseline."
+ "`(new)` marks (circuit, backend) pairs absent from the baseline."
)
else:
baseline_note = (
@@ -203,6 +237,10 @@ def compose_comment(
"workflow has produced at least one successful `main` run._"
)
+ backend_summary = ", ".join(
+ f"{BACKEND_TITLES.get(b, b)} ({len(by_backend[b])})" for b in ordered_backends
+ ) or "—"
+
lines = [
MARKER,
"## CSP benchmarks",
@@ -212,8 +250,9 @@ def compose_comment(
f"| Workflow status | {status_with_icon(status)} |",
f"| Commit | `{short_sha}` |",
f"| Run | [#{run_id}]({run_url}) |",
- f"| Circuits benchmarked | {len(rows)} |",
- f"| Iterations averaged per circuit | {runs_per_circuit} |",
+ f"| Distinct circuits | {len(distinct_circuits)} |",
+ f"| Backends benchmarked | {backend_summary} |",
+ f"| Iterations averaged per (circuit, backend) | {runs_per_circuit} |",
"",
"Prover time, peak RSS, peak heap, and verifier time are arithmetic means "
"across the iterations. Peak heap comes from the largest "
@@ -222,14 +261,27 @@ def compose_comment(
"",
baseline_note,
"",
- "",
- "Results
",
- "",
- table,
- "",
- " ",
- "",
]
+
+ if not ordered_backends:
+ lines.append("_No benchmark results were produced._")
+ lines.append("")
+ else:
+ for backend in ordered_backends:
+ title = BACKEND_TITLES.get(backend, backend)
+ table = render_table(by_backend[backend], backend, baseline, has_baseline_file)
+ lines.extend([
+ f"### {title}",
+ "",
+ "",
+ "Results
",
+ "",
+ table,
+ "",
+ " ",
+ "",
+ ])
+
return "\n".join(lines)
diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml
index 3e17f6368..a7f4bc351 100644
--- a/.github/workflows/csp-benchmarks.yml
+++ b/.github/workflows/csp-benchmarks.yml
@@ -7,9 +7,17 @@ on:
workflow_dispatch:
inputs:
bench_runs:
- description: "Iterations per circuit (default: 3)"
+ description: "Iterations per (circuit, backend) (default: 3)"
required: false
default: "3"
+ bench_backends:
+ description: "Backends to benchmark, space-separated (default: \"whir groth16\")"
+ required: false
+ default: "whir groth16"
+ bench_skip_groth16:
+ description: "Regex of circuits to skip on the groth16 backend (default: empty)"
+ required: false
+ default: ""
permissions:
contents: read
@@ -22,6 +30,8 @@ permissions:
env:
CARGO_TERM_COLOR: always
BENCH_RUNS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }}
+ BENCH_BACKENDS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_backends != '' && github.event.inputs.bench_backends || 'whir groth16') || 'whir groth16' }}
+ BENCH_SKIP_GROTH16: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.bench_skip_groth16 || '' }}
REQUIRED_NARGO_VERSION: "1.0.0-beta.19"
concurrency:
@@ -58,6 +68,8 @@ jobs:
PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
BENCH_DIR: ${{ github.workspace }}/csp-bench-logs
BENCH_RUNS: ${{ env.BENCH_RUNS }}
+ BENCH_BACKENDS: ${{ env.BENCH_BACKENDS }}
+ BENCH_SKIP_GROTH16: ${{ env.BENCH_SKIP_GROTH16 }}
run: |
bash scripts/run_csp_benchmarks.sh
diff --git a/Cargo.lock b/Cargo.lock
index f93dc8543..6a07443bc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -264,6 +264,7 @@ dependencies = [
"num-bigint",
"num-integer",
"num-traits",
+ "rayon",
"zeroize",
]
@@ -452,6 +453,7 @@ dependencies = [
"arrayvec",
"digest 0.10.7",
"num-bigint",
+ "rayon",
]
[[package]]
@@ -493,6 +495,7 @@ checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a"
dependencies = [
"num-traits",
"rand 0.8.5",
+ "rayon",
]
[[package]]
@@ -3211,6 +3214,15 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "memoffset"
version = "0.6.5"
@@ -4596,16 +4608,16 @@ dependencies = [
"anyhow",
"argh",
"ark-ff 0.5.0",
+ "ark-serialize 0.5.0",
"base64",
"hex",
- "nargo",
"nargo_toml",
- "noir_artifact_cli",
"noirc_abi",
"noirc_driver",
"postcard",
"provekit-common",
"provekit-gnark",
+ "provekit-groth16",
"provekit-prover",
"provekit-r1cs-compiler",
"provekit-verifier",
@@ -4687,25 +4699,57 @@ dependencies = [
"whir",
]
+[[package]]
+name = "provekit-groth16"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "ark-bn254",
+ "ark-ec",
+ "ark-ff 0.5.0",
+ "ark-poly",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+ "memmap2",
+ "provekit-common",
+ "rayon",
+ "serde",
+ "sha2 0.10.9",
+ "tempfile",
+ "tracing",
+ "zeroize",
+]
+
[[package]]
name = "provekit-prover"
version = "0.1.0"
dependencies = [
"acir",
"anyhow",
+ "ark-bn254",
+ "ark-ec",
"ark-ff 0.5.0",
+ "ark-poly",
+ "ark-serialize 0.5.0",
"ark-std 0.5.0",
"bn254_blackbox_solver",
+ "bytes",
"mavros-artifacts",
"mavros-vm",
+ "memmap2",
"nargo",
"noir_artifact_cli",
"noirc_abi",
"num-bigint",
"postcard",
"provekit-common",
+ "provekit-groth16",
+ "rayon",
+ "serde",
"tracing",
"whir",
+ "xz2",
+ "zstd",
]
[[package]]
@@ -4734,8 +4778,11 @@ name = "provekit-verifier"
version = "0.1.0"
dependencies = [
"anyhow",
+ "ark-bn254",
+ "ark-serialize 0.5.0",
"ark-std 0.5.0",
"provekit-common",
+ "provekit-groth16",
"rayon",
"tracing",
"whir",
diff --git a/Cargo.toml b/Cargo.toml
index 73d5ac541..5ee203dc0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
"provekit/r1cs-compiler",
"provekit/prover",
"provekit/verifier",
+ "provekit/groth16",
"tooling/cli",
"tooling/provekit-bench",
"tooling/provekit-ffi",
@@ -100,6 +101,7 @@ provekit-cli = { path = "tooling/cli" }
provekit-common = { path = "provekit/common" , features = ["provekit_ntt"]}
provekit-ffi = { path = "tooling/provekit-ffi" }
provekit-gnark = { path = "tooling/provekit-gnark" }
+provekit-groth16 = { path = "provekit/groth16" }
provekit-prover = { path = "provekit/prover", default-features = false }
provekit-r1cs-compiler = { path = "provekit/r1cs-compiler" }
provekit-verifier = { path = "provekit/verifier" }
@@ -155,6 +157,7 @@ parking_lot = "0.12"
# and calls keccak::f1600(), which was removed in keccak 0.2.0 stable. Pinning to
# the RC prevents `cargo update` from bumping acvm_blackbox_solver's keccak to stable.
keccak = "=0.2.0-rc.2"
+memmap2 = "0.9.5"
xz2 = "0.1.7"
zerocopy = "0.8.25"
zeroize = "1.8.1"
diff --git a/README.md b/README.md
index a7e8f0b18..5afe07485 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ cargo run --release --bin provekit-cli verify
| Command | Purpose | Key options |
| :--- | :--- | :--- |
-| `prepare` | Compile a Noir package and write prover/verifier keys | `--pkp`/`-p`, `--pkv`/`-v`, `--hash`; default hash: `skyscraper` |
+| `prepare` | Compile a Noir package and write prover/verifier keys | `--pkp`/`-p`, `--pkv`/`-v`, `--hash`, `--backend`, `--mmap` (Groth16 only); default hash: `skyscraper`, default backend: `whir` |
| `prove` | Produce `proof.np` from a prover key and inputs | `--prover`/`-p`, `--input`/`-i`, `--out`/`-o` |
| `verify` | Verify a proof against a verifier key | `--verifier`/`-v`, `--proof` |
@@ -52,6 +52,8 @@ Read the table per command: the short `-p` flag changes meaning between `prepare
Available `prepare --hash` choices are `skyscraper`, `sha256`, `keccak`, `blake3`, and `poseidon2`.
+Available `prepare --backend` choices are `whir` (default) and `groth16`.
+
## How It Works
```mermaid
@@ -109,6 +111,7 @@ For larger circuits and integration experiments, see [`noir-examples/`](./noir-e
## Advanced Usage
+- **Mmap-format `.pkp`** (Groth16 only): pass `--mmap` to `prepare` to write an mmap-friendly `.pkp` instead of the zstd-compressed default. Larger artifact (no compression, raw in-memory layout for curve-point and R1CS arrays), but near-instant load — the kernel pages bytes in lazily as the MSM touches them, matching rapidsnark's zkey-loading model. Both layouts share the `.pkp` extension; `prove` auto-detects via the file's `MMAP` sentinel.
- **Direct R1CS frontend:** after generating Mavros artifacts, call `provekit-cli prepare --compiler mavros --r1cs `.
- **Recursive verifier inputs:** `provekit-cli generate-gnark-inputs ` writes `params_for_recursive_verifier` and `r1cs.json` by default; use `--params` and `--r1cs` to override those paths.
- **Inspection commands:** use `circuit-stats` for Noir ACIR/R1CS structure, `analyze-pkp` for Noir prover-key size breakdowns, and `show-inputs` for public inputs.
diff --git a/noir-examples/noir_sha256/Prover.toml b/noir-examples/noir_sha256/Prover.toml
index 689c76d15..7c0585c7b 100644
--- a/noir-examples/noir_sha256/Prover.toml
+++ b/noir-examples/noir_sha256/Prover.toml
@@ -1,3 +1,3 @@
input = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-expected = [67, 25, 141, 183, 254, 43, 174, 230, 241, 12, 52, 52, 209, 164, 42, 198, 77, 148, 199, 2, 25, 96, 124, 119, 128, 33, 172, 170, 236, 162, 201, 30]
+expected = [248, 42, 35, 54, 144, 202, 70, 153, 180, 85, 249, 220, 89, 110, 125, 88, 66, 133, 186, 224, 63, 42, 42, 43, 212, 248, 195, 112, 11, 16, 217, 156]
diff --git a/noir-examples/noir_sha256/src/main.nr b/noir-examples/noir_sha256/src/main.nr
index 31f5f3f92..cf5aec930 100644
--- a/noir-examples/noir_sha256/src/main.nr
+++ b/noir-examples/noir_sha256/src/main.nr
@@ -1,7 +1,7 @@
use sha256::sha256_var;
-// Chain 17 SHA-256 rounds over a 32-byte state.
-global NUM_SHA_CALLS: u32 = 17;
+// Change this to control how many SHA256 calls are generated.
+global NUM_SHA_CALLS: u32 = 35;
fn main(input: [u8; 32], expected: pub [u8; 32]) {
let mut data = input;
diff --git a/playground/passport-input-gen/src/bin/passport_cli/main.rs b/playground/passport-input-gen/src/bin/passport_cli/main.rs
index 0a21bb4fa..fe9d96eff 100644
--- a/playground/passport-input-gen/src/bin/passport_cli/main.rs
+++ b/playground/passport-input-gen/src/bin/passport_cli/main.rs
@@ -247,7 +247,7 @@ fn prove_circuit(
"\n [{circuit_name}] Loading prover from: {}",
pkp_path.display()
);
- let prover: provekit_common::Prover = provekit_common::file::read(pkp_path)
+ let prover: provekit_prover::Prover = provekit_prover::read_pkp(pkp_path)
.with_context(|| format!("Reading prover key for {circuit_name}"))?;
let (num_constraints, num_witnesses) = prover.size();
@@ -259,8 +259,9 @@ fn prove_circuit(
let json = serde_json::to_string(inputs)
.with_context(|| format!("Serializing {circuit_name} inputs to JSON"))?;
let abi = match &prover {
- provekit_common::Prover::Noir(p) => p.witness_generator.abi(),
- provekit_common::Prover::Mavros(p) => &p.abi,
+ provekit_prover::Prover::Noir(p) => p.witness_generator.abi(),
+ provekit_prover::Prover::Mavros(p) => &p.abi,
+ provekit_prover::Prover::Groth16(p) => p.witness_generator.abi(),
};
let input_map = Format::Json
.parse(&json, abi)
diff --git a/provekit/common/src/file/binary_format.rs b/provekit/common/src/file/binary_format.rs
index 44ff55717..949d491fd 100644
--- a/provekit/common/src/file/binary_format.rs
+++ b/provekit/common/src/file/binary_format.rs
@@ -15,13 +15,13 @@ pub const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
// ---------------------------------------------------------------------------
pub const PROVER_FORMAT: [u8; 8] = *b"PrvKitPr";
-pub const PROVER_VERSION: (u16, u16) = (1, 2);
+pub const PROVER_VERSION: (u16, u16) = (1, 5);
pub const VERIFIER_FORMAT: [u8; 8] = *b"PrvKitVr";
-pub const VERIFIER_VERSION: (u16, u16) = (1, 3);
+pub const VERIFIER_VERSION: (u16, u16) = (1, 4);
pub const NOIR_PROOF_SCHEME_FORMAT: [u8; 8] = *b"NrProScm";
pub const NOIR_PROOF_SCHEME_VERSION: (u16, u16) = (1, 2);
pub const NOIR_PROOF_FORMAT: [u8; 8] = *b"NPSProof";
-pub const NOIR_PROOF_VERSION: (u16, u16) = (1, 1);
+pub const NOIR_PROOF_VERSION: (u16, u16) = (1, 2);
diff --git a/provekit/common/src/file/io/bin.rs b/provekit/common/src/file/io/bin.rs
index a092b9462..41da7bff8 100644
--- a/provekit/common/src/file/io/bin.rs
+++ b/provekit/common/src/file/io/bin.rs
@@ -111,12 +111,29 @@ pub fn read_hash_config(
}
/// Read a compressed binary file, auto-detecting zstd or XZ compression.
+///
+/// The decompressed bytes are streamed directly into postcard's deserializer
+/// instead of being materialized into a single `Vec`. This keeps peak
+/// memory close to the size of the deserialized struct, instead of paying
+/// twice (once for the decompressed buffer, once for the parsed value).
+///
+/// `postcard::from_io` needs a scratch buffer sized to fit the largest
+/// `deserialize_bytes` / `deserialize_byte_buf` read it will encounter. For
+/// our types that's bounded by the on-disk file size (the largest single
+/// borrowed-bytes field — currently the Groth16 proving key — encodes
+/// ~1:1 against the compressed file because arkworks-serialized curve points
+/// are essentially random). We size the scratch buffer to the file size with
+/// a small floor for tiny files.
#[instrument(fields(size = path.metadata().map(|m| m.len()).ok()))]
pub fn read_bin Deserialize<'a>>(
path: &Path,
format: [u8; 8],
(major, minor): (u16, u16),
) -> Result {
+ use std::io::BufRead;
+
+ let file_size = path.metadata().map(|m| m.len()).unwrap_or(0) as usize;
+
let mut file = BufReader::new(File::open(path).context("while opening input file")?);
let mut buffer = [0; HEADER_SIZE];
@@ -140,9 +157,48 @@ pub fn read_bin Deserialize<'a>>(
// Skip hash_config byte (can be read separately via read_hash_config if needed)
let _hash_config_byte = header.get_u8();
- let uncompressed = decompress_stream(&mut file)?;
+ // Detect compression via magic bytes.
+ let peek = file.fill_buf().context("while peeking compression magic")?;
+ ensure!(
+ peek.len() >= 6,
+ "File too small to detect compression format"
+ );
+ let is_zstd = peek[..4] == ZSTD_MAGIC;
+ let is_xz = peek[..6] == XZ_MAGIC;
+
+ // Scratch buffer for postcard streaming. 1 MB floor handles tiny files
+ // (.np proofs are a few hundred bytes); for large .pkp files we use the
+ // compressed file size, which is a safe upper bound on the largest single
+ // `deserialize_byte_buf` read in our formats.
+ let scratch_size = std::cmp::max(1 << 20, file_size);
+ let mut scratch = vec![0u8; scratch_size];
+
+ // Wrap the streaming decoder in a `BufReader` so postcard's per-byte
+ // `pop()` calls become fast in-memory reads instead of one syscall each.
+ // 256 KB is large enough to amortize syscall overhead without holding more
+ // decompressed data in memory than necessary.
+ const DECODER_BUF: usize = 256 * 1024;
+
+ let value = if is_zstd {
+ let decoder = zstd::Decoder::new(file).context("while initializing zstd decoder")?;
+ let buffered = BufReader::with_capacity(DECODER_BUF, decoder);
+ let (value, _) = postcard::from_io::((buffered, &mut scratch))
+ .context("while streaming postcard from zstd")?;
+ value
+ } else if is_xz {
+ let decoder = xz2::read::XzDecoder::new(file);
+ let buffered = BufReader::with_capacity(DECODER_BUF, decoder);
+ let (value, _) = postcard::from_io::((buffered, &mut scratch))
+ .context("while streaming postcard from xz")?;
+ value
+ } else {
+ anyhow::bail!(
+ "Unknown compression format (first bytes: {:02X?})",
+ &peek[..peek.len().min(6)]
+ );
+ };
- postcard::from_bytes(&uncompressed).context("while decoding from postcard")
+ Ok(value)
}
/// Serialize a value to bytes in the same format as `write_bin` (header +
@@ -234,40 +290,3 @@ fn decompress_bytes(data: &[u8]) -> Result> {
);
}
}
-
-/// Peek at the first bytes to detect compression format, then
-/// stream-decompress.
-fn decompress_stream(reader: &mut BufReader) -> Result> {
- use std::io::BufRead;
-
- let buf = reader
- .fill_buf()
- .context("while peeking compression magic")?;
- ensure!(
- buf.len() >= 6,
- "File too small to detect compression format"
- );
-
- let is_zstd = buf[..4] == ZSTD_MAGIC;
- let is_xz = buf[..6] == XZ_MAGIC;
-
- let mut out = Vec::new();
- if is_zstd {
- let mut decoder = zstd::Decoder::new(reader).context("while initializing zstd decoder")?;
- decoder
- .read_to_end(&mut out)
- .context("while decompressing zstd data")?;
- } else if is_xz {
- let mut decoder = xz2::read::XzDecoder::new(reader);
- decoder
- .read_to_end(&mut out)
- .context("while decompressing XZ data")?;
- } else {
- anyhow::bail!(
- "Unknown compression format (first bytes: {:02X?})",
- &buf[..buf.len().min(6)]
- );
- }
-
- Ok(out)
-}
diff --git a/provekit/common/src/file/io/mod.rs b/provekit/common/src/file/io/mod.rs
index 049c984a7..c2e6ae9aa 100644
--- a/provekit/common/src/file/io/mod.rs
+++ b/provekit/common/src/file/io/mod.rs
@@ -3,17 +3,18 @@ mod buf_ext;
mod counting_writer;
mod json;
+pub use self::bin::Compression;
use {
self::{
bin::{
deserialize_from_bytes, read_bin, read_hash_config as read_hash_config_bin,
- serialize_to_bytes, write_bin, Compression,
+ serialize_to_bytes, write_bin,
},
buf_ext::BufExt,
counting_writer::CountingWriter,
json::{read_json, write_json},
},
- crate::{HashConfig, NoirProof, NoirProofScheme, Prover, Verifier},
+ crate::{HashConfig, NoirProof, NoirProofScheme, Verifier},
anyhow::Result,
serde::{Deserialize, Serialize},
std::{ffi::OsStr, path::Path},
@@ -29,20 +30,13 @@ pub trait FileFormat: Serialize + for<'a> Deserialize<'a> {
}
/// Helper trait to optionally extract hash config.
-pub(crate) trait MaybeHashAware {
+///
+/// `pub` so downstream crates (e.g. `provekit_prover`) can implement it for
+/// types they own. Internal helpers in this module are the only consumers.
+pub trait MaybeHashAware {
fn maybe_hash_config(&self) -> Option;
}
-/// Impl for Prover (has hash config).
-impl MaybeHashAware for Prover {
- fn maybe_hash_config(&self) -> Option {
- match self {
- Prover::Noir(p) => Some(p.hash_config),
- Prover::Mavros(p) => Some(p.hash_config),
- }
- }
-}
-
/// Impl for Verifier (has hash config).
impl MaybeHashAware for Verifier {
fn maybe_hash_config(&self) -> Option {
@@ -74,13 +68,6 @@ impl FileFormat for NoirProofScheme {
const COMPRESSION: Compression = Compression::Zstd;
}
-impl FileFormat for Prover {
- const FORMAT: [u8; 8] = crate::binary_format::PROVER_FORMAT;
- const EXTENSION: &'static str = "pkp";
- const VERSION: (u16, u16) = crate::binary_format::PROVER_VERSION;
- const COMPRESSION: Compression = Compression::Xz;
-}
-
impl FileFormat for Verifier {
const FORMAT: [u8; 8] = crate::binary_format::VERIFIER_FORMAT;
const EXTENSION: &'static str = "pkv";
diff --git a/provekit/common/src/interner.rs b/provekit/common/src/interner.rs
index 822a6a7dd..413885f95 100644
--- a/provekit/common/src/interner.rs
+++ b/provekit/common/src/interner.rs
@@ -39,4 +39,30 @@ impl Interner {
pub fn get(&self, el: InternedFieldElement) -> Option {
self.values.get(el.0).copied()
}
+
+ /// Borrow the deduplicated values array. Used by mmap-format writers
+ /// that need the raw bytes.
+ pub fn values_raw(&self) -> &[FieldElement] {
+ &self.values
+ }
+
+ /// Construct an Interner from a pre-built values vector. Bypasses the
+ /// dedup work in `intern()` — used by mmap-format readers that have
+ /// already loaded a deduplicated set of values from disk.
+ pub fn from_values(values: Vec) -> Self {
+ Self { values }
+ }
+}
+
+impl InternedFieldElement {
+ /// Construct an InternedFieldElement from a raw index. Used by
+ /// mmap-format readers that load the index Vec from raw bytes.
+ pub const fn new(idx: usize) -> Self {
+ Self(idx)
+ }
+
+ /// Inner index value.
+ pub const fn index(&self) -> usize {
+ self.0
+ }
}
diff --git a/provekit/common/src/lib.rs b/provekit/common/src/lib.rs
index 3953207d8..ac04abbd9 100644
--- a/provekit/common/src/lib.rs
+++ b/provekit/common/src/lib.rs
@@ -3,7 +3,7 @@ pub use file::binary_format;
pub mod hash_config;
mod interner;
mod mavros;
-mod noir_proof_scheme;
+pub mod noir_proof_scheme;
pub mod ntt;
pub mod optimize;
pub mod poseidon2;
@@ -19,19 +19,17 @@ mod verifier;
mod whir_r1cs;
pub mod witness;
-use crate::{
- interner::{InternedFieldElement, Interner},
- sparse_matrix::{HydratedSparseMatrix, SparseMatrix},
-};
pub use {
acir::FieldElement as NoirElement,
ark_bn254::Fr as FieldElement,
hash_config::HashConfig,
+ interner::{InternedFieldElement, Interner},
mavros::{MavrosProver, MavrosSchemeData},
noir_proof_scheme::{NoirProof, NoirProofScheme, NoirSchemeData},
prefix_covector::{OffsetCovector, PrefixCovector, SparseCovector},
- prover::{NoirProver, Prover},
+ prover::NoirProver,
r1cs::R1CS,
+ sparse_matrix::{HydratedSparseMatrix, SparseMatrix},
transcript_sponge::TranscriptSponge,
verifier::Verifier,
whir_r1cs::{R1csHash, WhirConfig, WhirR1CSProof, WhirR1CSScheme, WhirZkConfig},
diff --git a/provekit/common/src/noir_proof_scheme.rs b/provekit/common/src/noir_proof_scheme.rs
index 7731d3c47..d084190ed 100644
--- a/provekit/common/src/noir_proof_scheme.rs
+++ b/provekit/common/src/noir_proof_scheme.rs
@@ -27,10 +27,48 @@ pub enum NoirProofScheme {
Mavros(MavrosSchemeData),
}
+// INVARIANT: Variant order is wire-format-critical (postcard uses positional
+// discriminants). Do not reorder, cfg-gate, or insert variants without
+// verifying cross-target deserialization (native <-> WASM).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct NoirProof {
- pub public_inputs: PublicInputs,
- pub whir_r1cs_proof: WhirR1CSProof,
+pub enum NoirProof {
+ Whir {
+ public_inputs: PublicInputs,
+ whir_r1cs_proof: WhirR1CSProof,
+ },
+ Groth16 {
+ public_inputs: PublicInputs,
+ /// CanonicalSerialize'd `provekit_groth16::Proof`.
+ groth16_proof: Vec,
+ },
+}
+
+impl NoirProof {
+ /// Access public inputs regardless of proof variant.
+ pub fn public_inputs(&self) -> &PublicInputs {
+ match self {
+ NoirProof::Whir { public_inputs, .. } => public_inputs,
+ NoirProof::Groth16 { public_inputs, .. } => public_inputs,
+ }
+ }
+
+ /// Mutably access public inputs regardless of proof variant.
+ pub fn public_inputs_mut(&mut self) -> &mut PublicInputs {
+ match self {
+ NoirProof::Whir { public_inputs, .. } => public_inputs,
+ NoirProof::Groth16 { public_inputs, .. } => public_inputs,
+ }
+ }
+
+ /// Access the WHIR proof, panics if this is a Groth16 proof.
+ pub fn whir_r1cs_proof(&self) -> &WhirR1CSProof {
+ match self {
+ NoirProof::Whir {
+ whir_r1cs_proof, ..
+ } => whir_r1cs_proof,
+ NoirProof::Groth16 { .. } => panic!("called whir_r1cs_proof() on a Groth16 proof"),
+ }
+ }
}
impl NoirProofScheme {
diff --git a/provekit/common/src/prover.rs b/provekit/common/src/prover.rs
index 88e2da07b..88b89e6b8 100644
--- a/provekit/common/src/prover.rs
+++ b/provekit/common/src/prover.rs
@@ -1,12 +1,19 @@
+//! Backend-specific prover types that don't introduce a `provekit_groth16`
+//! dependency.
+//!
+//! `NoirProver` lives here because it's referenced by the WHIR pipeline that
+//! is shared by everything in the workspace. The Groth16 prover and the
+//! `Prover` enum live in `provekit_prover::prover_types` so they can hold a
+//! typed `provekit_groth16::ProvingKey` without creating a dependency cycle
+//! (`provekit_groth16` depends on this crate for `R1CS`).
+
use {
crate::{
- noir_proof_scheme::NoirProofScheme,
whir_r1cs::WhirR1CSScheme,
witness::{NoirWitnessGenerator, SplitWitnessBuilders},
- HashConfig, MavrosProver, NoirElement, R1CS,
+ HashConfig, NoirElement, R1CS,
},
acir::circuit::Program,
- noirc_abi::Abi,
serde::{Deserialize, Serialize},
};
@@ -19,68 +26,3 @@ pub struct NoirProver {
pub witness_generator: NoirWitnessGenerator,
pub whir_for_witness: WhirR1CSScheme,
}
-
-/// On-disk **ProveKit Prover** (PKP) — the prover-side scheme that gets
-/// serialized to a `.pkp` file by `prepare` and loaded by `prove`.
-///
-/// Holds the R1CS, witness builders, WHIR config, and frontend-specific
-/// program data needed to produce a proof.
-///
-/// INVARIANT: Variant order is wire-format-critical (postcard uses positional
-/// discriminants). Do not reorder, cfg-gate, or insert variants without
-/// verifying cross-target deserialization (native <-> WASM).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum Prover {
- Noir(NoirProver),
- Mavros(MavrosProver),
-}
-
-impl Prover {
- /// Convert a compilation output into the on-disk prover format.
- pub fn from_noir_proof_scheme(scheme: NoirProofScheme) -> Self {
- match scheme {
- NoirProofScheme::Noir(d) => Prover::Noir(NoirProver {
- hash_config: d.hash_config,
- program: d.program,
- r1cs: d.r1cs,
- split_witness_builders: d.split_witness_builders,
- witness_generator: d.witness_generator,
- whir_for_witness: d.whir_for_witness,
- }),
- NoirProofScheme::Mavros(d) => Prover::Mavros(MavrosProver {
- abi: d.abi,
- num_public_inputs: d.num_public_inputs,
- whir_for_witness: d.whir_for_witness,
- witgen_binary: d.witgen_binary,
- ad_binary: d.ad_binary,
- constraints_layout: d.constraints_layout,
- witness_layout: d.witness_layout,
- hash_config: d.hash_config,
- }),
- }
- }
-
- pub fn abi(&self) -> &Abi {
- match self {
- Prover::Noir(p) => p.witness_generator.abi(),
- Prover::Mavros(p) => &p.abi,
- }
- }
-
- pub fn size(&self) -> (usize, usize) {
- match self {
- Prover::Noir(p) => (p.r1cs.num_constraints(), p.r1cs.num_witnesses()),
- Prover::Mavros(p) => (
- p.constraints_layout.algebraic_size,
- p.witness_layout.algebraic_size,
- ),
- }
- }
-
- pub fn whir_for_witness(&self) -> &WhirR1CSScheme {
- match self {
- Prover::Noir(p) => &p.whir_for_witness,
- Prover::Mavros(p) => &p.whir_for_witness,
- }
- }
-}
diff --git a/provekit/common/src/sparse_matrix.rs b/provekit/common/src/sparse_matrix.rs
index 012a3b447..887fd727a 100644
--- a/provekit/common/src/sparse_matrix.rs
+++ b/provekit/common/src/sparse_matrix.rs
@@ -312,6 +312,45 @@ impl SparseMatrix {
}
}
+ /// Borrow the internal `new_row_indices` array. Used by mmap-format
+ /// writers that need the raw bytes.
+ pub fn new_row_indices_raw(&self) -> &[u32] {
+ &self.new_row_indices
+ }
+
+ /// Borrow the internal `col_indices` array (absolute, not
+ /// delta-encoded). Used by mmap-format writers that need the raw
+ /// bytes.
+ pub fn col_indices_raw(&self) -> &[u32] {
+ &self.col_indices
+ }
+
+ /// Borrow the internal `values` array (interner indices). Used by
+ /// mmap-format writers that need the raw bytes.
+ pub fn values_raw(&self) -> &[InternedFieldElement] {
+ &self.values
+ }
+
+ /// Construct a `SparseMatrix` directly from its three internal arrays.
+ /// Used by mmap-format readers that have just memcpy'd the bytes
+ /// from disk into owned `Vec`s. Skips the per-entry insertion path
+ /// that goes through `set` / `push_row` / delta decoding.
+ pub fn from_raw_parts(
+ num_rows: usize,
+ num_cols: usize,
+ new_row_indices: Vec,
+ col_indices: Vec,
+ values: Vec,
+ ) -> Self {
+ Self {
+ num_rows,
+ num_cols,
+ new_row_indices,
+ col_indices,
+ values,
+ }
+ }
+
pub const fn hydrate<'a>(&'a self, interner: &'a Interner) -> HydratedSparseMatrix<'a> {
HydratedSparseMatrix {
matrix: self,
diff --git a/provekit/common/src/verifier.rs b/provekit/common/src/verifier.rs
index 2663cff61..a82dcf569 100644
--- a/provekit/common/src/verifier.rs
+++ b/provekit/common/src/verifier.rs
@@ -20,6 +20,10 @@ pub struct Verifier {
pub whir_for_witness: Option,
#[serde(with = "serde_jsonify")]
pub abi: Abi,
+ /// CanonicalSerialize'd `provekit_groth16::VerifyingKey` (None for WHIR
+ /// proofs).
+ #[serde(default)]
+ pub groth16_vk: Option>,
}
impl Verifier {
@@ -30,12 +34,14 @@ impl Verifier {
whir_for_witness: Some(d.whir_for_witness),
abi: d.witness_generator.abi.clone(),
hash_config: d.hash_config,
+ groth16_vk: None,
},
NoirProofScheme::Mavros(d) => Self {
r1cs: d.r1cs,
whir_for_witness: Some(d.whir_for_witness),
abi: d.abi.clone(),
hash_config: d.hash_config,
+ groth16_vk: None,
},
}
}
diff --git a/provekit/groth16/Cargo.toml b/provekit/groth16/Cargo.toml
new file mode 100644
index 000000000..a61149659
--- /dev/null
+++ b/provekit/groth16/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "provekit-groth16"
+version = "0.1.0"
+edition.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
+ark-bn254 = { version = "0.5.0", default-features = false, features = ["curve"] }
+ark-ff = { workspace = true }
+ark-ec = { version = "0.5", features = ["parallel"] }
+ark-poly = { workspace = true }
+ark-serialize = { workspace = true }
+ark-std = { version = "0.5", features = ["std"] }
+rayon = { workspace = true }
+anyhow = { workspace = true }
+sha2 = { workspace = true }
+serde = { workspace = true }
+tracing = { workspace = true }
+zeroize = { workspace = true, features = ["derive"] }
+provekit-common = { workspace = true }
+
+# Target-specific: mmap-backed proving key loader is non-WASM only.
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+memmap2 = { workspace = true }
+
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
+tempfile = { workspace = true }
+
+[lints]
+workspace = true
diff --git a/provekit/groth16/src/lib.rs b/provekit/groth16/src/lib.rs
new file mode 100644
index 000000000..95afe5cdf
--- /dev/null
+++ b/provekit/groth16/src/lib.rs
@@ -0,0 +1,67 @@
+/// Groth16 proof system with BSB22 commitment extension for BN254.
+///
+/// This is a Rust port of gnark's Groth16 BN254 backend, using arkworks
+/// primitives for elliptic curve operations, pairings, FFT, and MSM.
+///
+/// Reference: DIZK paper (Figure 4)
+/// BSB22 extension:
+pub mod pedersen;
+pub mod prover;
+pub mod setup;
+pub mod types;
+pub mod verifier;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub mod mmap_pk;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub use mmap_pk::{MmapProvingKey, MMAP_SENTINEL};
+pub use types::{Proof, ProvingKey, VerifyingKey};
+
+/// Domain separator for BSB22 commitment hashing.
+pub const COMMITMENT_DST: &[u8] = b"bsb22-commitment";
+
+/// Domain separator for folding PoKs.
+pub const BSB22_FOLD_DST: &[u8] = b"G16-BSB22";
+
+/// Field element byte length for BN254.
+pub const FR_BYTES: usize = 32;
+
+/// Information about a single BSB22 commitment within the R1CS.
+///
+/// All wire indices in this struct are **absolute witness indices**: position
+/// 0 is the constant-1 ONE_WIRE, public input `i` lives at index `1 + i`, and
+/// private/challenge wires follow. The verifier subtracts 1 when looking up
+/// values in its `extended_public` vector (which excludes the ONE_WIRE), so
+/// index 0 is never a valid entry in `public_and_commitment_committed`.
+#[derive(Clone, Debug, Default)]
+pub struct CommitmentInfo {
+ /// Indices of public wires and other commitment wires hashed with this
+ /// commitment. See struct-level docs for index convention.
+ pub public_and_commitment_committed: Vec,
+ /// Indices of private/internal wires committed to.
+ pub private_committed: Vec,
+ /// Wire index where the commitment challenge value is stored.
+ pub commitment_index: usize,
+ /// Number of entries in `public_and_commitment_committed` that are public
+ /// (as opposed to other commitment indices).
+ pub nb_public_committed: usize,
+}
+
+impl CommitmentInfo {
+ /// Returns the public wire indices committed to.
+ pub fn public_committed(&self) -> &[usize] {
+ &self.public_and_commitment_committed[..self.nb_public_committed]
+ }
+
+ /// Returns the commitment wire indices committed to.
+ pub fn commitment_committed(&self) -> &[usize] {
+ &self.public_and_commitment_committed[self.nb_public_committed..]
+ }
+}
+
+/// Helper to convert arkworks MSM errors (which are just `usize`) into anyhow
+/// errors.
+pub(crate) fn msm_err(e: usize) -> anyhow::Error {
+ anyhow::anyhow!("MSM error: bases/scalars length mismatch ({})", e)
+}
diff --git a/provekit/groth16/src/mmap_pk.rs b/provekit/groth16/src/mmap_pk.rs
new file mode 100644
index 000000000..9554b1916
--- /dev/null
+++ b/provekit/groth16/src/mmap_pk.rs
@@ -0,0 +1,1388 @@
+//! mmap-backed Groth16 proving key.
+//!
+//! Mirrors rapidsnark's zkey loading approach (see
+//! `rapidsnark/src/fileloader.cpp` + `binfile_utils.cpp` + `zkey_utils.cpp`):
+//! the file is mmap'd once, sections are indexed from a small in-file table,
+//! and big curve-point arrays are exposed as `&[G1Affine]` / `&[G2Affine]`
+//! slices that point directly into the mmap'd region. No per-point
+//! deserialization, no copy.
+//!
+//! Coexists with the existing zstd-compressed `.pkp` path
+//! (`provekit_prover::pkp_io`); the on-disk discriminator is the 4-byte
+//! sentinel that follows the 21-byte common header — `MMAP_SENTINEL` here vs.
+//! zstd/xz magic in the legacy path.
+//!
+//! ## On-disk layout (after the 21-byte common header)
+//!
+//! ```text
+//! [ MMAP_SENTINEL 4 bytes ]
+//! [ metadata_len (u64 LE) 8 bytes ]
+//! [ postcard-encoded Prover metadata_len bytes (PK = zero-byte placeholder) ]
+//! [ pad to 8-byte align ]
+//! [ section_count (u32 LE) 4 bytes ]
+//! [ section table (id u32, off u64, len u64) × section_count ]
+//! [ pad to MMAP_ALIGN ]
+//! [ section bodies (raw arkworks in-memory layout for big arrays) ]
+//! ```
+//!
+//! Section IDs are listed in [`SectionId`].
+//!
+//! ## Why this layout assumes raw Montgomery in-memory bytes
+//!
+//! Arkworks `G1Affine` / `G2Affine` for BN254 are repr-Rust structs containing
+//! `Fp` field elements. The bytes stored on disk are produced
+//! by `slice::from_raw_parts(slice.as_ptr() as *const u8, ...)` — i.e. the
+//! exact in-memory representation including Montgomery form. On read, the
+//! mmap'd bytes are reinterpreted via [`std::slice::from_raw_parts`] back into
+//! `&[G1Affine]`. This matches rapidsnark's `(G1PointAffine *)ptr` cast.
+//!
+//! The cost is layout coupling: a future arkworks version that changes the
+//! `Affine` struct layout (or its `Fp` representation) silently breaks the
+//! file format. The format is therefore versioned via the common header's
+//! `PROVER_VERSION`; bump the version when the layout assumption changes.
+
+#![cfg(not(target_arch = "wasm32"))]
+
+use {
+ crate::pedersen,
+ anyhow::{bail, ensure, Context, Result},
+ ark_bn254::{Fr, G1Affine, G2Affine},
+ ark_serialize::{CanonicalDeserialize, CanonicalSerialize},
+ memmap2::Mmap,
+ provekit_common::{InternedFieldElement, Interner, SparseMatrix, R1CS},
+ std::{
+ fs::{File, OpenOptions},
+ io::{Read, Seek, SeekFrom, Write},
+ path::Path,
+ },
+ tracing::info_span,
+};
+
+/// 4-byte sentinel that distinguishes a mmap-format `.pkp` from a
+/// zstd/xz-compressed one. Placed immediately after the 21-byte common
+/// header.
+pub const MMAP_SENTINEL: [u8; 4] = *b"MMAP";
+
+/// Required alignment for the start of every section body. Picked to match
+/// `align_of::()` (which is `align_of::() == 8` on every
+/// supported target). Section bodies for `bool` arrays only need 1-byte
+/// alignment, but we pad them to `MMAP_ALIGN` too for consistency.
+pub const MMAP_ALIGN: usize = 8;
+
+/// Section IDs in the mmap-format `.pkp` file.
+#[repr(u32)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum SectionId {
+ G1A = 1,
+ G1B = 2,
+ G1Z = 3,
+ G1K = 4,
+ G2B = 5,
+ InfinityA = 6,
+ InfinityB = 7,
+ /// Small fixed-size data: domain_size, domain_gen, g1_alpha, g1_beta,
+ /// g1_delta, g2_beta, g2_delta, nb_infinity_a, nb_infinity_b. Stored
+ /// arkworks-uncompressed. As of PROVER_VERSION (1, 4) pedersen
+ /// commitment keys are no longer appended here — they live in
+ /// dedicated raw sections (`PedersenIndex`, `PedersenBases`,
+ /// `PedersenBasesExpSigma`) so loading them does not require
+ /// per-point arkworks deserialization.
+ Scalars = 8,
+ /// Per-commitment lengths: `u64 num_commitments` followed by
+ /// `num_commitments × (u64 basis_len, u64 sigma_len)`. Lets the
+ /// reader split the two raw G1Affine sections below into per-commit
+ /// slices.
+ PedersenIndex = 9,
+ /// Raw `G1Affine` bytes for `pedersen::ProvingKey::basis`, concatenated
+ /// across all commitments. Same in-memory Montgomery layout as the
+ /// `G1A` / `G1B` sections — copied (not borrowed) into owned
+ /// `Vec` on load, no arkworks per-point deserialize.
+ PedersenBases = 10,
+ /// Raw `G1Affine` bytes for `pedersen::ProvingKey::basis_exp_sigma`,
+ /// concatenated across all commitments. Same layout as
+ /// `PedersenBases`.
+ PedersenBasesExpSigma = 11,
+ /// R1CS scalars: a small fixed-shape header with `num_public_inputs`,
+ /// `num_virtual`, and per-matrix `num_rows` / `num_cols` for A / B /
+ /// C. Stored as raw `u64` bytes (8 fields × 8 bytes = 64 bytes
+ /// total).
+ R1CSScalars = 12,
+ /// R1CS interner: deduplicated `Vec` in raw `Fr` bytes
+ /// (Montgomery layout, like the G1 sections).
+ R1CSInterner = 13,
+ /// `r1cs.a.new_row_indices` raw `u32` bytes.
+ R1CSAMatrixRowIndices = 14,
+ /// `r1cs.a.col_indices` raw `u32` bytes (absolute column indices —
+ /// the mmap format does not delta-encode).
+ R1CSAMatrixColIndices = 15,
+ /// `r1cs.a.values` raw `usize` (`InternedFieldElement` newtype)
+ /// bytes.
+ R1CSAMatrixValues = 16,
+ /// `r1cs.b.new_row_indices` raw `u32` bytes.
+ R1CSBMatrixRowIndices = 17,
+ /// `r1cs.b.col_indices` raw `u32` bytes.
+ R1CSBMatrixColIndices = 18,
+ /// `r1cs.b.values` raw `usize` bytes.
+ R1CSBMatrixValues = 19,
+ /// `r1cs.c.new_row_indices` raw `u32` bytes.
+ R1CSCMatrixRowIndices = 20,
+ /// `r1cs.c.col_indices` raw `u32` bytes.
+ R1CSCMatrixColIndices = 21,
+ /// `r1cs.c.values` raw `usize` bytes.
+ R1CSCMatrixValues = 22,
+ /// Commitment-info index: `u64 num_commitments` followed by
+ /// `num_commitments × (u64 pub_len, u64 priv_len, u64 chal_len)`.
+ /// Lets the reader split the three raw `u64` sections below into
+ /// per-commitment slices.
+ CommitmentInfoIndex = 23,
+ /// `Groth16CommitmentInfo::public_committed` raw `u64` bytes,
+ /// concatenated across all commitments. (`usize` on 64-bit hosts is
+ /// 8 bytes; we always store as `u64` for portability.)
+ CommitmentInfoPublicCommitted = 24,
+ /// `Groth16CommitmentInfo::private_committed` raw `u64` bytes.
+ CommitmentInfoPrivateCommitted = 25,
+ /// `Groth16CommitmentInfo::challenge_indices` raw `u64` bytes.
+ CommitmentInfoChallengeIndices = 26,
+}
+
+impl SectionId {
+ fn from_u32(v: u32) -> Option {
+ match v {
+ 1 => Some(Self::G1A),
+ 2 => Some(Self::G1B),
+ 3 => Some(Self::G1Z),
+ 4 => Some(Self::G1K),
+ 5 => Some(Self::G2B),
+ 6 => Some(Self::InfinityA),
+ 7 => Some(Self::InfinityB),
+ 8 => Some(Self::Scalars),
+ 9 => Some(Self::PedersenIndex),
+ 10 => Some(Self::PedersenBases),
+ 11 => Some(Self::PedersenBasesExpSigma),
+ 12 => Some(Self::R1CSScalars),
+ 13 => Some(Self::R1CSInterner),
+ 14 => Some(Self::R1CSAMatrixRowIndices),
+ 15 => Some(Self::R1CSAMatrixColIndices),
+ 16 => Some(Self::R1CSAMatrixValues),
+ 17 => Some(Self::R1CSBMatrixRowIndices),
+ 18 => Some(Self::R1CSBMatrixColIndices),
+ 19 => Some(Self::R1CSBMatrixValues),
+ 20 => Some(Self::R1CSCMatrixRowIndices),
+ 21 => Some(Self::R1CSCMatrixColIndices),
+ 22 => Some(Self::R1CSCMatrixValues),
+ 23 => Some(Self::CommitmentInfoIndex),
+ 24 => Some(Self::CommitmentInfoPublicCommitted),
+ 25 => Some(Self::CommitmentInfoPrivateCommitted),
+ 26 => Some(Self::CommitmentInfoChallengeIndices),
+ _ => None,
+ }
+ }
+}
+
+/// Compile-time assertion that arkworks BN254 `G1Affine` / `G2Affine` align to
+/// at most `MMAP_ALIGN`. If a future arkworks version raises alignment, this
+/// trips and the file format must be revisited.
+const _: () = {
+ assert!(std::mem::align_of::() <= MMAP_ALIGN);
+ assert!(std::mem::align_of::() <= MMAP_ALIGN);
+};
+
+/// Mmap-backed proving key: identical fields to [`crate::ProvingKey`] but the
+/// large arrays are slices into an mmap'd file rather than owned `Vec`s.
+///
+/// The `_mmap` field keeps the file mapping alive for the lifetime of the
+/// struct; the raw pointer/length pairs index into it. The accessor methods
+/// (`g1_a()` etc.) return slices with the struct's lifetime, so the borrow
+/// checker prevents callers from outliving the mapping.
+///
+/// SAFETY: `*_ptr` fields point into `_mmap`'s mapped region. Constructed
+/// only via [`MmapProvingKey::load`], which validates section bounds and
+/// alignment.
+pub struct MmapProvingKey {
+ /// Holds the file mapping alive. Never accessed after construction.
+ _mmap: Mmap,
+
+ pub domain_size: u64,
+ pub domain_gen: Fr,
+
+ pub g1_alpha: G1Affine,
+ pub g1_beta: G1Affine,
+ pub g1_delta: G1Affine,
+
+ g1_a_ptr: *const G1Affine,
+ g1_a_len: usize,
+ g1_b_ptr: *const G1Affine,
+ g1_b_len: usize,
+ g1_k_ptr: *const G1Affine,
+ g1_k_len: usize,
+ g1_z_ptr: *const G1Affine,
+ g1_z_len: usize,
+
+ pub g2_beta: G2Affine,
+ pub g2_delta: G2Affine,
+ g2_b_ptr: *const G2Affine,
+ g2_b_len: usize,
+
+ infinity_a_ptr: *const bool,
+ infinity_a_len: usize,
+ infinity_b_ptr: *const bool,
+ infinity_b_len: usize,
+
+ pub nb_infinity_a: u64,
+ pub nb_infinity_b: u64,
+
+ /// Raw-pointer descriptors for each Pedersen commitment key. The
+ /// pointers index into the same `_mmap` mapping above. Lifetime is
+ /// implicit through `&self` — accessors return `&[G1Affine]` slices
+ /// bound to `&self`. No memcpy on load, unlike the legacy
+ /// `Vec` field this replaces.
+ pub commitment_keys: Vec,
+}
+
+/// Borrowed Pedersen proving key whose basis arrays point into an mmap'd
+/// `.pkp` file. Layout-compatible with [`pedersen::ProvingKey`] (the
+/// underlying `G1Affine` bytes are in the same in-memory Montgomery form
+/// as the `G1A` / `G1B` sections), but no `Vec` is ever
+/// allocated — the pointers reference file pages directly.
+///
+/// SAFETY: the pointers are only valid while the parent `MmapProvingKey`
+/// (and therefore its `_mmap`) is alive. Construction and use are gated
+/// behind that lifetime via the `&self` borrow on the accessors.
+pub struct MmapPedersenProvingKey {
+ basis_ptr: *const G1Affine,
+ basis_len: usize,
+ basis_exp_sigma_ptr: *const G1Affine,
+ basis_exp_sigma_len: usize,
+}
+
+// SAFETY: raw pointers into a read-only `Mmap`, same justification as the
+// `MmapProvingKey` Send / Sync impls below.
+unsafe impl Send for MmapPedersenProvingKey {}
+unsafe impl Sync for MmapPedersenProvingKey {}
+
+impl MmapPedersenProvingKey {
+ pub fn basis(&self) -> &[G1Affine] {
+ // SAFETY: pointer / length validated by `load_pedersen_commitment_keys`
+ // (alignment + bounds against the section); mapping outlives `&self`.
+ unsafe { std::slice::from_raw_parts(self.basis_ptr, self.basis_len) }
+ }
+
+ pub fn basis_exp_sigma(&self) -> &[G1Affine] {
+ // SAFETY: see `basis`.
+ unsafe { std::slice::from_raw_parts(self.basis_exp_sigma_ptr, self.basis_exp_sigma_len) }
+ }
+
+ /// Borrow this mmap-backed key as a `pedersen::ProvingKeyView`, so
+ /// callers can run the same `commit` / `prove_knowledge` logic
+ /// whether the bases are owned or mmap-backed.
+ pub fn view(&self) -> pedersen::ProvingKeyView<'_> {
+ pedersen::ProvingKeyView {
+ basis: self.basis(),
+ basis_exp_sigma: self.basis_exp_sigma(),
+ }
+ }
+}
+
+// SAFETY: `*_ptr` fields point into a read-only `Mmap`. Mmap pages are
+// shareable across threads (the kernel handles paging), and we never mutate
+// through the pointers. `Vec` is already Send + Sync.
+unsafe impl Send for MmapProvingKey {}
+// SAFETY: same as Send — read-only access through aliasable pointers into a
+// shared mapping.
+unsafe impl Sync for MmapProvingKey {}
+
+impl std::fmt::Debug for MmapProvingKey {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("MmapProvingKey")
+ .field("domain_size", &self.domain_size)
+ .field("g1_a_len", &self.g1_a_len)
+ .field("g1_b_len", &self.g1_b_len)
+ .field("g1_k_len", &self.g1_k_len)
+ .field("g1_z_len", &self.g1_z_len)
+ .field("g2_b_len", &self.g2_b_len)
+ .field("nb_infinity_a", &self.nb_infinity_a)
+ .field("nb_infinity_b", &self.nb_infinity_b)
+ .field("nb_commitment_keys", &self.commitment_keys.len())
+ .finish()
+ }
+}
+
+impl MmapProvingKey {
+ pub fn g1_a(&self) -> &[G1Affine] {
+ // SAFETY: pointer/len validated in `load`; mapping outlives `&self`.
+ unsafe { std::slice::from_raw_parts(self.g1_a_ptr, self.g1_a_len) }
+ }
+
+ pub fn g1_b(&self) -> &[G1Affine] {
+ // SAFETY: see `g1_a`.
+ unsafe { std::slice::from_raw_parts(self.g1_b_ptr, self.g1_b_len) }
+ }
+
+ pub fn g1_k(&self) -> &[G1Affine] {
+ // SAFETY: see `g1_a`.
+ unsafe { std::slice::from_raw_parts(self.g1_k_ptr, self.g1_k_len) }
+ }
+
+ pub fn g1_z(&self) -> &[G1Affine] {
+ // SAFETY: see `g1_a`.
+ unsafe { std::slice::from_raw_parts(self.g1_z_ptr, self.g1_z_len) }
+ }
+
+ pub fn g2_b(&self) -> &[G2Affine] {
+ // SAFETY: see `g1_a`.
+ unsafe { std::slice::from_raw_parts(self.g2_b_ptr, self.g2_b_len) }
+ }
+
+ pub fn infinity_a(&self) -> &[bool] {
+ // SAFETY: see `g1_a`. `bool` has alignment 1, len validated.
+ unsafe { std::slice::from_raw_parts(self.infinity_a_ptr, self.infinity_a_len) }
+ }
+
+ pub fn infinity_b(&self) -> &[bool] {
+ // SAFETY: see `infinity_a`.
+ unsafe { std::slice::from_raw_parts(self.infinity_b_ptr, self.infinity_b_len) }
+ }
+
+ /// Load a proving key from a mmap-format file. The file's 21-byte common
+ /// header has already been read and validated by the caller; `data_offset`
+ /// is the offset (within the mmap) where the [`MMAP_SENTINEL`] starts.
+ ///
+ /// The caller is responsible for any postcard metadata that lives in the
+ /// same file — this function reads only the section table and section
+ /// bodies for the proving key.
+ pub fn load(mmap: Mmap, sections_start: usize) -> Result {
+ ensure!(
+ sections_start + 4 <= mmap.len(),
+ "mmap pkp: section_count out of bounds"
+ );
+ let section_count =
+ u32::from_le_bytes(mmap[sections_start..sections_start + 4].try_into().unwrap());
+ let table_start = sections_start + 4;
+ let table_entry_bytes = 4 + 8 + 8;
+ let table_end = table_start + section_count as usize * table_entry_bytes;
+ ensure!(
+ table_end <= mmap.len(),
+ "mmap pkp: section table out of bounds (table_end={}, file_len={})",
+ table_end,
+ mmap.len()
+ );
+
+ // Parse section table.
+ let section_offsets = {
+ let _s = info_span!("section_table_parse", section_count).entered();
+ let mut section_offsets = std::collections::HashMap::::new();
+ for i in 0..section_count {
+ let entry = table_start + i as usize * table_entry_bytes;
+ let id = u32::from_le_bytes(mmap[entry..entry + 4].try_into().unwrap());
+ let off =
+ u64::from_le_bytes(mmap[entry + 4..entry + 12].try_into().unwrap()) as usize;
+ let len =
+ u64::from_le_bytes(mmap[entry + 12..entry + 20].try_into().unwrap()) as usize;
+ ensure!(
+ off + len <= mmap.len(),
+ "mmap pkp: section {} body out of bounds",
+ id
+ );
+ let Some(sid) = SectionId::from_u32(id) else {
+ bail!("mmap pkp: unknown section id {}", id);
+ };
+ section_offsets.insert(sid, (off, len));
+ }
+ section_offsets
+ };
+
+ let g1_size = std::mem::size_of::();
+ let g2_size = std::mem::size_of::();
+
+ let load_g1_section = |sid: SectionId| -> Result<(*const G1Affine, usize)> {
+ let (off, len) = *section_offsets
+ .get(&sid)
+ .ok_or_else(|| anyhow::anyhow!("missing section {:?}", sid))?;
+ ensure!(
+ off % MMAP_ALIGN == 0,
+ "section {:?} body not aligned (off={})",
+ sid,
+ off
+ );
+ ensure!(
+ len % g1_size == 0,
+ "section {:?} body length {} not a multiple of size_of::()={}",
+ sid,
+ len,
+ g1_size
+ );
+ let count = len / g1_size;
+ let ptr = unsafe { mmap.as_ptr().add(off) } as *const G1Affine;
+ Ok((ptr, count))
+ };
+
+ let load_g2_section = |sid: SectionId| -> Result<(*const G2Affine, usize)> {
+ let (off, len) = *section_offsets
+ .get(&sid)
+ .ok_or_else(|| anyhow::anyhow!("missing section {:?}", sid))?;
+ ensure!(
+ off % MMAP_ALIGN == 0,
+ "section {:?} body not aligned (off={})",
+ sid,
+ off
+ );
+ ensure!(
+ len % g2_size == 0,
+ "section {:?} body length {} not a multiple of size_of::()={}",
+ sid,
+ len,
+ g2_size
+ );
+ let count = len / g2_size;
+ let ptr = unsafe { mmap.as_ptr().add(off) } as *const G2Affine;
+ Ok((ptr, count))
+ };
+
+ let load_bool_section = |sid: SectionId| -> Result<(*const bool, usize)> {
+ let (off, len) = *section_offsets
+ .get(&sid)
+ .ok_or_else(|| anyhow::anyhow!("missing section {:?}", sid))?;
+ let ptr = unsafe { mmap.as_ptr().add(off) } as *const bool;
+ Ok((ptr, len))
+ };
+
+ let (
+ g1_a_ptr,
+ g1_a_len,
+ g1_b_ptr,
+ g1_b_len,
+ g1_z_ptr,
+ g1_z_len,
+ g1_k_ptr,
+ g1_k_len,
+ g2_b_ptr,
+ g2_b_len,
+ infinity_a_ptr,
+ infinity_a_len,
+ infinity_b_ptr,
+ infinity_b_len,
+ ) = {
+ let _s = info_span!("bulk_section_pointers").entered();
+ let (g1_a_ptr, g1_a_len) = load_g1_section(SectionId::G1A)?;
+ let (g1_b_ptr, g1_b_len) = load_g1_section(SectionId::G1B)?;
+ let (g1_z_ptr, g1_z_len) = load_g1_section(SectionId::G1Z)?;
+ let (g1_k_ptr, g1_k_len) = load_g1_section(SectionId::G1K)?;
+ let (g2_b_ptr, g2_b_len) = load_g2_section(SectionId::G2B)?;
+ let (infinity_a_ptr, infinity_a_len) = load_bool_section(SectionId::InfinityA)?;
+ let (infinity_b_ptr, infinity_b_len) = load_bool_section(SectionId::InfinityB)?;
+ (
+ g1_a_ptr,
+ g1_a_len,
+ g1_b_ptr,
+ g1_b_len,
+ g1_z_ptr,
+ g1_z_len,
+ g1_k_ptr,
+ g1_k_len,
+ g2_b_ptr,
+ g2_b_len,
+ infinity_a_ptr,
+ infinity_a_len,
+ infinity_b_ptr,
+ infinity_b_len,
+ )
+ };
+
+ // Scalars: arkworks-uncompressed.
+ let (sp_off, sp_len) = *section_offsets
+ .get(&SectionId::Scalars)
+ .ok_or_else(|| anyhow::anyhow!("missing scalars section"))?;
+ let mut sp = &mmap[sp_off..sp_off + sp_len];
+
+ let (
+ domain_size,
+ domain_gen,
+ g1_alpha,
+ g1_beta,
+ g1_delta,
+ g2_beta,
+ g2_delta,
+ nb_infinity_a,
+ nb_infinity_b,
+ ) = {
+ let _s = info_span!("scalars_deserialize", sp_len).entered();
+ let domain_size =
+ u64::deserialize_uncompressed_unchecked(&mut sp).context("read domain_size")?;
+ let domain_gen =
+ Fr::deserialize_uncompressed_unchecked(&mut sp).context("read domain_gen")?;
+ let g1_alpha =
+ G1Affine::deserialize_uncompressed_unchecked(&mut sp).context("read g1_alpha")?;
+ let g1_beta =
+ G1Affine::deserialize_uncompressed_unchecked(&mut sp).context("read g1_beta")?;
+ let g1_delta =
+ G1Affine::deserialize_uncompressed_unchecked(&mut sp).context("read g1_delta")?;
+ let g2_beta =
+ G2Affine::deserialize_uncompressed_unchecked(&mut sp).context("read g2_beta")?;
+ let g2_delta =
+ G2Affine::deserialize_uncompressed_unchecked(&mut sp).context("read g2_delta")?;
+ let nb_infinity_a =
+ u64::deserialize_uncompressed_unchecked(&mut sp).context("read nb_infinity_a")?;
+ let nb_infinity_b =
+ u64::deserialize_uncompressed_unchecked(&mut sp).context("read nb_infinity_b")?;
+ (
+ domain_size,
+ domain_gen,
+ g1_alpha,
+ g1_beta,
+ g1_delta,
+ g2_beta,
+ g2_delta,
+ nb_infinity_a,
+ nb_infinity_b,
+ )
+ };
+
+ // Pedersen commitment keys: raw G1Affine bytes in three sections.
+ // Layout-compatible with the bulk G1 sections (same in-memory
+ // Montgomery form), but built into owned `Vec` here so
+ // the existing `pedersen::ProvingKey` API stays intact. The copy
+ // is one memcpy per basis/sigma slice — no per-point arkworks
+ // deserialization, no Montgomery rebuild.
+ let commitment_keys = {
+ let _s = info_span!("pedersen_commitment_keys_load").entered();
+ load_pedersen_commitment_keys(&mmap, §ion_offsets)?
+ };
+
+ Ok(MmapProvingKey {
+ _mmap: mmap,
+ domain_size,
+ domain_gen,
+ g1_alpha,
+ g1_beta,
+ g1_delta,
+ g1_a_ptr,
+ g1_a_len,
+ g1_b_ptr,
+ g1_b_len,
+ g1_k_ptr,
+ g1_k_len,
+ g1_z_ptr,
+ g1_z_len,
+ g2_beta,
+ g2_delta,
+ g2_b_ptr,
+ g2_b_len,
+ infinity_a_ptr,
+ infinity_a_len,
+ infinity_b_ptr,
+ infinity_b_len,
+ nb_infinity_a,
+ nb_infinity_b,
+ commitment_keys,
+ })
+ }
+}
+
+/// Read the three Pedersen sections and build
+/// `Vec` with raw pointers into the mmap. Pure
+/// zero-copy — no `Vec` is allocated, no per-point arkworks
+/// deserialization, no memcpy of the basis bytes. Allocation cost is one
+/// outer `Vec` of `num_commitments` × 32-byte
+/// descriptors (a few hundred bytes for typical circuits).
+///
+/// If there are no commitment keys (circuit without BSB22 commitments)
+/// the index section still exists but encodes zero commitments, and
+/// the two byte sections are empty.
+fn load_pedersen_commitment_keys(
+ mmap: &Mmap,
+ section_offsets: &std::collections::HashMap,
+) -> Result> {
+ let g1_size = std::mem::size_of::();
+
+ let (idx_off, idx_len) = *section_offsets
+ .get(&SectionId::PedersenIndex)
+ .ok_or_else(|| anyhow::anyhow!("missing pedersen index section"))?;
+ let (bases_off, bases_len) = *section_offsets
+ .get(&SectionId::PedersenBases)
+ .ok_or_else(|| anyhow::anyhow!("missing pedersen bases section"))?;
+ let (sigma_off, sigma_len) = *section_offsets
+ .get(&SectionId::PedersenBasesExpSigma)
+ .ok_or_else(|| anyhow::anyhow!("missing pedersen basis_exp_sigma section"))?;
+
+ ensure!(
+ bases_off % MMAP_ALIGN == 0,
+ "PedersenBases body not aligned (off={})",
+ bases_off
+ );
+ ensure!(
+ sigma_off % MMAP_ALIGN == 0,
+ "PedersenBasesExpSigma body not aligned (off={})",
+ sigma_off
+ );
+ ensure!(
+ bases_len % g1_size == 0,
+ "PedersenBases body length {} not a multiple of size_of::()={}",
+ bases_len,
+ g1_size
+ );
+ ensure!(
+ sigma_len % g1_size == 0,
+ "PedersenBasesExpSigma body length {} not a multiple of size_of::()={}",
+ sigma_len,
+ g1_size
+ );
+
+ // Parse the index: u64 num_commitments followed by num × (u64
+ // basis_len, u64 sigma_len). Validate that the sum of per-commit
+ // lengths exactly matches the byte sections.
+ ensure!(idx_len >= 8, "pedersen index too short for num_commitments");
+ let num_commitments =
+ u64::from_le_bytes(mmap[idx_off..idx_off + 8].try_into().unwrap()) as usize;
+ let expected_idx_len = 8 + num_commitments * 16;
+ ensure!(
+ idx_len == expected_idx_len,
+ "pedersen index length mismatch (got {}, expected {})",
+ idx_len,
+ expected_idx_len
+ );
+
+ let mut commitment_keys = Vec::with_capacity(num_commitments);
+ let mut basis_cursor = bases_off;
+ let mut sigma_cursor = sigma_off;
+ let bases_end = bases_off + bases_len;
+ let sigma_end = sigma_off + sigma_len;
+
+ for i in 0..num_commitments {
+ let entry = idx_off + 8 + i * 16;
+ let basis_count = u64::from_le_bytes(mmap[entry..entry + 8].try_into().unwrap()) as usize;
+ let sigma_count =
+ u64::from_le_bytes(mmap[entry + 8..entry + 16].try_into().unwrap()) as usize;
+
+ let basis_bytes = basis_count * g1_size;
+ let sigma_bytes = sigma_count * g1_size;
+ ensure!(
+ basis_cursor + basis_bytes <= bases_end,
+ "pedersen basis #{} runs past PedersenBases section",
+ i
+ );
+ ensure!(
+ sigma_cursor + sigma_bytes <= sigma_end,
+ "pedersen basis_exp_sigma #{} runs past PedersenBasesExpSigma section",
+ i
+ );
+
+ // SAFETY: section offsets validated MMAP_ALIGN-aligned above,
+ // lengths are multiples of size_of::(), pointers stay
+ // within the section bounds we just checked. The raw bytes are
+ // in the same in-memory Montgomery layout written by
+ // `write_pk_sections` (see the `[G1Affine] as &[u8]` cast there
+ // — the inverse cast here is layout-compatible). The pointers
+ // are stored alongside the mmap they index into in
+ // `MmapProvingKey`; accessors are bound to `&self` on that
+ // struct so the pointers can never outlive the mapping.
+ let basis_ptr = unsafe { mmap.as_ptr().add(basis_cursor) as *const G1Affine };
+ let basis_exp_sigma_ptr = unsafe { mmap.as_ptr().add(sigma_cursor) as *const G1Affine };
+ commitment_keys.push(MmapPedersenProvingKey {
+ basis_ptr,
+ basis_len: basis_count,
+ basis_exp_sigma_ptr,
+ basis_exp_sigma_len: sigma_count,
+ });
+
+ basis_cursor += basis_bytes;
+ sigma_cursor += sigma_bytes;
+ }
+
+ ensure!(
+ basis_cursor == bases_end,
+ "PedersenBases section has {} trailing bytes after all commitments",
+ bases_end - basis_cursor
+ );
+ ensure!(
+ sigma_cursor == sigma_end,
+ "PedersenBasesExpSigma section has {} trailing bytes after all commitments",
+ sigma_end - sigma_cursor
+ );
+
+ Ok(commitment_keys)
+}
+
+/// Write the curve-point sections of a [`crate::ProvingKey`] in mmap-friendly
+/// raw layout, plus a small arkworks-encoded scalars+pedersen section.
+///
+/// Writes at the current file position. The 21-byte common header,
+/// [`MMAP_SENTINEL`], and the postcard-encoded prover metadata are written by
+/// the caller (lives in `provekit_prover::pkp_io`); this function appends the
+/// section table and section bodies.
+///
+/// Returns the number of bytes written.
+pub fn write_pk_sections(pk: &crate::ProvingKey, file: &mut File) -> Result {
+ // Build the scalars blob first so we know its length. As of
+ // PROVER_VERSION (1, 4) pedersen `commitment_keys` are no longer
+ // included here — they live in dedicated raw G1Affine sections
+ // (PedersenIndex / PedersenBases / PedersenBasesExpSigma) and are
+ // memcpy'd, not arkworks-deserialized, on load.
+ let mut sp_bytes: Vec = Vec::new();
+ pk.domain_size
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write domain_size")?;
+ pk.domain_gen
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write domain_gen")?;
+ pk.g1_alpha
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write g1_alpha")?;
+ pk.g1_beta
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write g1_beta")?;
+ pk.g1_delta
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write g1_delta")?;
+ pk.g2_beta
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write g2_beta")?;
+ pk.g2_delta
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write g2_delta")?;
+ pk.nb_infinity_a
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write nb_infinity_a")?;
+ pk.nb_infinity_b
+ .serialize_uncompressed(&mut sp_bytes)
+ .context("write nb_infinity_b")?;
+
+ // Build the pedersen index: u64 num_commitments, then per-commit
+ // (u64 basis_len, u64 sigma_len). The two body sections store the
+ // raw G1Affine bytes concatenated in the same order.
+ let mut pedersen_index: Vec = Vec::new();
+ pedersen_index.extend_from_slice(&(pk.commitment_keys.len() as u64).to_le_bytes());
+ let mut total_basis_count: u64 = 0;
+ let mut total_sigma_count: u64 = 0;
+ for ck in &pk.commitment_keys {
+ pedersen_index.extend_from_slice(&(ck.basis.len() as u64).to_le_bytes());
+ pedersen_index.extend_from_slice(&(ck.basis_exp_sigma.len() as u64).to_le_bytes());
+ total_basis_count += ck.basis.len() as u64;
+ total_sigma_count += ck.basis_exp_sigma.len() as u64;
+ }
+
+ // Section bodies (in the order they'll be written).
+ let g1_size = std::mem::size_of::();
+ let g2_size = std::mem::size_of::();
+
+ // (id, body_byte_len)
+ let sections: [(SectionId, u64); 11] = [
+ (SectionId::G1A, (pk.g1_a.len() * g1_size) as u64),
+ (SectionId::G1B, (pk.g1_b.len() * g1_size) as u64),
+ (SectionId::G1Z, (pk.g1_z.len() * g1_size) as u64),
+ (SectionId::G1K, (pk.g1_k.len() * g1_size) as u64),
+ (SectionId::G2B, (pk.g2_b.len() * g2_size) as u64),
+ (SectionId::InfinityA, pk.infinity_a.len() as u64),
+ (SectionId::InfinityB, pk.infinity_b.len() as u64),
+ (SectionId::Scalars, sp_bytes.len() as u64),
+ (SectionId::PedersenIndex, pedersen_index.len() as u64),
+ (SectionId::PedersenBases, total_basis_count * g1_size as u64),
+ (
+ SectionId::PedersenBasesExpSigma,
+ total_sigma_count * g1_size as u64,
+ ),
+ ];
+
+ // Compute byte offsets for each section body, padding each to MMAP_ALIGN.
+ // Offsets are absolute in the file. We need to know:
+ // table_start = current file pos + 4 (section_count u32)
+ // table_end = table_start + section_count * (4+8+8)
+ // body_start = round_up(table_end, MMAP_ALIGN)
+ let table_start = file.stream_position()? + 4;
+ let table_end = table_start + sections.len() as u64 * 20;
+ let mut cur_off = round_up(table_end, MMAP_ALIGN as u64);
+
+ let mut section_offsets: Vec<(SectionId, u64, u64)> = Vec::with_capacity(sections.len());
+ for &(id, len) in §ions {
+ section_offsets.push((id, cur_off, len));
+ cur_off = round_up(cur_off + len, MMAP_ALIGN as u64);
+ }
+ let total_end = cur_off;
+
+ // Write section count.
+ file.write_all(&(sections.len() as u32).to_le_bytes())?;
+ // Write section table.
+ for &(id, off, len) in §ion_offsets {
+ file.write_all(&(id as u32).to_le_bytes())?;
+ file.write_all(&off.to_le_bytes())?;
+ file.write_all(&len.to_le_bytes())?;
+ }
+ // Pad to body_start.
+ let body_start = section_offsets[0].1;
+ pad_to(file, body_start)?;
+
+ // Write section bodies, each followed by alignment padding for the next.
+ let g1_a_bytes = unsafe {
+ std::slice::from_raw_parts(pk.g1_a.as_ptr() as *const u8, pk.g1_a.len() * g1_size)
+ };
+ write_section_body(file, g1_a_bytes, section_offsets[1].1)?;
+
+ let g1_b_bytes = unsafe {
+ std::slice::from_raw_parts(pk.g1_b.as_ptr() as *const u8, pk.g1_b.len() * g1_size)
+ };
+ write_section_body(file, g1_b_bytes, section_offsets[2].1)?;
+
+ let g1_z_bytes = unsafe {
+ std::slice::from_raw_parts(pk.g1_z.as_ptr() as *const u8, pk.g1_z.len() * g1_size)
+ };
+ write_section_body(file, g1_z_bytes, section_offsets[3].1)?;
+
+ let g1_k_bytes = unsafe {
+ std::slice::from_raw_parts(pk.g1_k.as_ptr() as *const u8, pk.g1_k.len() * g1_size)
+ };
+ write_section_body(file, g1_k_bytes, section_offsets[4].1)?;
+
+ let g2_b_bytes = unsafe {
+ std::slice::from_raw_parts(pk.g2_b.as_ptr() as *const u8, pk.g2_b.len() * g2_size)
+ };
+ write_section_body(file, g2_b_bytes, section_offsets[5].1)?;
+
+ let infinity_a_bytes = unsafe {
+ std::slice::from_raw_parts(pk.infinity_a.as_ptr() as *const u8, pk.infinity_a.len())
+ };
+ write_section_body(file, infinity_a_bytes, section_offsets[6].1)?;
+
+ let infinity_b_bytes = unsafe {
+ std::slice::from_raw_parts(pk.infinity_b.as_ptr() as *const u8, pk.infinity_b.len())
+ };
+ write_section_body(file, infinity_b_bytes, section_offsets[7].1)?;
+
+ // Scalars (small, arkworks-encoded).
+ write_section_body(file, &sp_bytes, section_offsets[8].1)?;
+
+ // Pedersen index (small, hand-rolled).
+ write_section_body(file, &pedersen_index, section_offsets[9].1)?;
+
+ // Pedersen bases: raw G1Affine bytes concatenated. Mirrors the layout
+ // for the G1A/G1B/G1Z/G1K sections so the reader can recover the
+ // bases by memcpy instead of arkworks per-point deserialize.
+ for ck in &pk.commitment_keys {
+ let bytes = unsafe {
+ std::slice::from_raw_parts(ck.basis.as_ptr() as *const u8, ck.basis.len() * g1_size)
+ };
+ file.write_all(bytes)?;
+ }
+ pad_to(file, section_offsets[10].1)?;
+
+ // Pedersen basis_exp_sigma: raw G1Affine bytes concatenated.
+ for ck in &pk.commitment_keys {
+ let bytes = unsafe {
+ std::slice::from_raw_parts(
+ ck.basis_exp_sigma.as_ptr() as *const u8,
+ ck.basis_exp_sigma.len() * g1_size,
+ )
+ };
+ file.write_all(bytes)?;
+ }
+ pad_to(file, total_end)?;
+
+ Ok(total_end - (table_start - 4))
+}
+
+fn round_up(v: u64, align: u64) -> u64 {
+ (v + align - 1) / align * align
+}
+
+fn pad_to(file: &mut File, target: u64) -> Result<()> {
+ let cur = file.stream_position()?;
+ if cur < target {
+ let pad = vec![0u8; (target - cur) as usize];
+ file.write_all(&pad)?;
+ } else if cur > target {
+ bail!("pad_to: current position {} is past target {}", cur, target);
+ }
+ Ok(())
+}
+
+fn write_section_body(file: &mut File, body: &[u8], next_section_off: u64) -> Result<()> {
+ file.write_all(body)?;
+ pad_to(file, next_section_off)
+}
+
+// ---------------------------------------------------------------------------
+// R1CS chunk: raw-byte layout for the R1CS struct, appended after the PK
+// section table so the mmap reader can memcpy it back without going through
+// postcard (~70 ms → ~3-5 ms on the noir_sha256 benchmark).
+// ---------------------------------------------------------------------------
+
+/// Per-commitment lengths used by the writer to size the
+/// commitment-info chunk, and returned by the reader after parsing the
+/// chunk. Each triple is `(public_committed, private_committed,
+/// challenge_indices)` as `Vec` (the prover crate converts to and
+/// from `Vec` at the boundary).
+pub type CommitmentInfoTriple = (Vec, Vec, Vec);
+
+const R1CS_CHUNK_MAGIC: [u8; 4] = *b"R1CS";
+const CI_CHUNK_MAGIC: [u8; 4] = *b"CINF";
+
+/// Write an R1CS to disk in mmap-friendly raw byte layout. Caller passes
+/// the file at the position where the chunk should start; chunk is
+/// 8-byte aligned. Returns the absolute file position immediately
+/// after the chunk (which is where the next chunk, e.g. commitment_info,
+/// should be written).
+///
+/// Layout:
+/// ```text
+/// [ "R1CS" magic (4) ]
+/// [ pad (4) ]
+/// [ u64 num_public_inputs ]
+/// [ u64 num_virtual ]
+/// [ u64 a_num_rows ] [ u64 a_num_cols ]
+/// [ u64 b_num_rows ] [ u64 b_num_cols ]
+/// [ u64 c_num_rows ] [ u64 c_num_cols ]
+/// [ u64 interner_len ]
+/// [ raw Fr bytes × interner_len ] [ pad to 8 ]
+/// for each matrix (a, b, c):
+/// [ u64 new_row_indices_len ] [ raw u32 bytes ] [ pad to 8 ]
+/// [ u64 col_indices_len ] [ raw u32 bytes ] [ pad to 8 ]
+/// [ u64 values_len ] [ raw usize bytes ] [ pad to 8 ]
+/// ```
+pub fn write_r1cs_chunk(r1cs: &R1CS, file: &mut File) -> Result {
+ // Align start of chunk to 8 bytes so the raw arrays inside can be
+ // slice-cast.
+ let chunk_start = round_up(file.stream_position()?, MMAP_ALIGN as u64);
+ pad_to(file, chunk_start)?;
+
+ file.write_all(&R1CS_CHUNK_MAGIC)?;
+ file.write_all(&[0u8; 4])?; // pad to 8-byte alignment for the u64s
+ file.write_all(&(r1cs.num_public_inputs as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.num_virtual as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.a.num_rows as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.a.num_cols as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.b.num_rows as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.b.num_cols as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.c.num_rows as u64).to_le_bytes())?;
+ file.write_all(&(r1cs.c.num_cols as u64).to_le_bytes())?;
+
+ // Interner values
+ let interner_values = r1cs.interner.values_raw();
+ file.write_all(&(interner_values.len() as u64).to_le_bytes())?;
+ let interner_bytes = unsafe {
+ std::slice::from_raw_parts(
+ interner_values.as_ptr() as *const u8,
+ interner_values.len() * std::mem::size_of::(),
+ )
+ };
+ file.write_all(interner_bytes)?;
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+
+ for matrix in [&r1cs.a, &r1cs.b, &r1cs.c] {
+ write_sparse_matrix_arrays(matrix, file)?;
+ }
+
+ Ok(file.stream_position()?)
+}
+
+fn write_sparse_matrix_arrays(matrix: &SparseMatrix, file: &mut File) -> Result<()> {
+ let row_indices = matrix.new_row_indices_raw();
+ file.write_all(&(row_indices.len() as u64).to_le_bytes())?;
+ let row_bytes = unsafe {
+ std::slice::from_raw_parts(row_indices.as_ptr() as *const u8, row_indices.len() * 4)
+ };
+ file.write_all(row_bytes)?;
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+
+ let col_indices = matrix.col_indices_raw();
+ file.write_all(&(col_indices.len() as u64).to_le_bytes())?;
+ let col_bytes = unsafe {
+ std::slice::from_raw_parts(col_indices.as_ptr() as *const u8, col_indices.len() * 4)
+ };
+ file.write_all(col_bytes)?;
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+
+ let values = matrix.values_raw();
+ file.write_all(&(values.len() as u64).to_le_bytes())?;
+ let values_bytes = unsafe {
+ std::slice::from_raw_parts(
+ values.as_ptr() as *const u8,
+ values.len() * std::mem::size_of::(),
+ )
+ };
+ file.write_all(values_bytes)?;
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+
+ Ok(())
+}
+
+/// Parse the PK section table at `sections_start` and return the
+/// position where the PK section bodies end (max of `offset + len` over
+/// all sections, rounded up to `MMAP_ALIGN`). The R1CS chunk starts at
+/// this position. Does not consume the mmap.
+pub fn pk_sections_end_offset(mmap: &[u8], sections_start: usize) -> Result {
+ ensure!(
+ sections_start + 4 <= mmap.len(),
+ "section_count out of bounds"
+ );
+ let section_count =
+ u32::from_le_bytes(mmap[sections_start..sections_start + 4].try_into().unwrap());
+ let table_start = sections_start + 4;
+ let table_entry_bytes = 4 + 8 + 8;
+ let table_end = table_start + section_count as usize * table_entry_bytes;
+ ensure!(table_end <= mmap.len(), "pk section table out of bounds");
+
+ let mut max_end: usize = round_up(table_end as u64, MMAP_ALIGN as u64) as usize;
+ for i in 0..section_count {
+ let entry = table_start + i as usize * table_entry_bytes;
+ let off = u64::from_le_bytes(mmap[entry + 4..entry + 12].try_into().unwrap()) as usize;
+ let len = u64::from_le_bytes(mmap[entry + 12..entry + 20].try_into().unwrap()) as usize;
+ let end_rounded = round_up((off + len) as u64, MMAP_ALIGN as u64) as usize;
+ if end_rounded > max_end {
+ max_end = end_rounded;
+ }
+ }
+ Ok(max_end)
+}
+
+/// Read an R1CS chunk back from mmap bytes via memcpy. `bytes` should be
+/// the mmap slice starting at the chunk's first byte; the chunk consumes
+/// however many bytes its layout requires. Returns the parsed R1CS plus
+/// the number of bytes consumed (so the caller can advance to the next
+/// chunk).
+pub fn read_r1cs_chunk(bytes: &[u8]) -> Result<(R1CS, usize)> {
+ ensure!(bytes.len() >= 8, "r1cs chunk too short for magic");
+ ensure!(bytes[..4] == R1CS_CHUNK_MAGIC, "r1cs chunk magic mismatch");
+ let mut pos = 8usize;
+ let read_u64 = |bytes: &[u8], pos: &mut usize| -> Result {
+ ensure!(*pos + 8 <= bytes.len(), "r1cs chunk: short read for u64");
+ let v = u64::from_le_bytes(bytes[*pos..*pos + 8].try_into().unwrap());
+ *pos += 8;
+ Ok(v)
+ };
+
+ let num_public_inputs = read_u64(bytes, &mut pos)? as usize;
+ let num_virtual = read_u64(bytes, &mut pos)? as usize;
+ let a_num_rows = read_u64(bytes, &mut pos)? as usize;
+ let a_num_cols = read_u64(bytes, &mut pos)? as usize;
+ let b_num_rows = read_u64(bytes, &mut pos)? as usize;
+ let b_num_cols = read_u64(bytes, &mut pos)? as usize;
+ let c_num_rows = read_u64(bytes, &mut pos)? as usize;
+ let c_num_cols = read_u64(bytes, &mut pos)? as usize;
+
+ // Interner
+ let interner_len = read_u64(bytes, &mut pos)? as usize;
+ let fr_size = std::mem::size_of::();
+ let interner_bytes_len = interner_len * fr_size;
+ ensure!(
+ pos + interner_bytes_len <= bytes.len(),
+ "r1cs chunk: short read for interner"
+ );
+ // SAFETY: source bytes are in the same in-memory Montgomery layout
+ // written by `write_r1cs_chunk` (Fr-as-raw-bytes cast). Source is
+ // 8-byte aligned because `write_r1cs_chunk` pads after each blob.
+ let interner_slice: &[Fr] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(pos) as *const Fr, interner_len) };
+ let interner = Interner::from_values(interner_slice.to_vec());
+ pos += interner_bytes_len;
+ pos = round_up(pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let a = read_sparse_matrix_arrays(bytes, &mut pos, a_num_rows, a_num_cols)?;
+ let b = read_sparse_matrix_arrays(bytes, &mut pos, b_num_rows, b_num_cols)?;
+ let c = read_sparse_matrix_arrays(bytes, &mut pos, c_num_rows, c_num_cols)?;
+
+ let r1cs = R1CS {
+ num_public_inputs,
+ interner,
+ a,
+ b,
+ c,
+ num_virtual,
+ };
+ Ok((r1cs, pos))
+}
+
+fn read_sparse_matrix_arrays(
+ bytes: &[u8],
+ pos: &mut usize,
+ num_rows: usize,
+ num_cols: usize,
+) -> Result {
+ let read_u64 = |bytes: &[u8], pos: &mut usize| -> Result {
+ ensure!(*pos + 8 <= bytes.len(), "r1cs chunk: short read for u64");
+ let v = u64::from_le_bytes(bytes[*pos..*pos + 8].try_into().unwrap());
+ *pos += 8;
+ Ok(v)
+ };
+
+ let row_len = read_u64(bytes, pos)? as usize;
+ ensure!(*pos + row_len * 4 <= bytes.len(), "r1cs chunk: short row");
+ // SAFETY: writer cast u32 array to bytes; reader does the inverse.
+ // Source is 8-byte aligned because `write_r1cs_chunk` pads after
+ // each blob and `u32` only needs 4-byte alignment.
+ let row_slice: &[u32] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(*pos) as *const u32, row_len) };
+ let new_row_indices = row_slice.to_vec();
+ *pos += row_len * 4;
+ *pos = round_up(*pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let col_len = read_u64(bytes, pos)? as usize;
+ ensure!(*pos + col_len * 4 <= bytes.len(), "r1cs chunk: short cols");
+ let col_slice: &[u32] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(*pos) as *const u32, col_len) };
+ let col_indices = col_slice.to_vec();
+ *pos += col_len * 4;
+ *pos = round_up(*pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let val_len = read_u64(bytes, pos)? as usize;
+ let val_size = std::mem::size_of::();
+ ensure!(
+ *pos + val_len * val_size <= bytes.len(),
+ "r1cs chunk: short values"
+ );
+ let val_slice: &[InternedFieldElement] = unsafe {
+ std::slice::from_raw_parts(
+ bytes.as_ptr().add(*pos) as *const InternedFieldElement,
+ val_len,
+ )
+ };
+ let values = val_slice.to_vec();
+ *pos += val_len * val_size;
+ *pos = round_up(*pos as u64, MMAP_ALIGN as u64) as usize;
+
+ Ok(SparseMatrix::from_raw_parts(
+ num_rows,
+ num_cols,
+ new_row_indices,
+ col_indices,
+ values,
+ ))
+}
+
+// ---------------------------------------------------------------------------
+// commitment_info chunk: raw-byte layout for `Vec`,
+// stored as triples of `Vec` (the prover crate converts to/from
+// `Vec` at the boundary).
+// ---------------------------------------------------------------------------
+
+/// Write the commitment-info data in raw byte layout. Returns the
+/// absolute file position after the chunk.
+///
+/// Layout:
+/// ```text
+/// [ "CINF" magic (4) ][ pad (4) ]
+/// [ u64 num_commitments ]
+/// [ for each commitment: u64 pub_len, u64 priv_len, u64 chal_len ]
+/// [ pad to 8 ]
+/// [ raw u64 bytes: all pub_committed concatenated ][ pad to 8 ]
+/// [ raw u64 bytes: all priv_committed concatenated ][ pad to 8 ]
+/// [ raw u64 bytes: all chal_indices concatenated ][ pad to 8 ]
+/// ```
+pub fn write_commitment_info_chunk(
+ triples: &[CommitmentInfoTriple],
+ file: &mut File,
+) -> Result {
+ let chunk_start = round_up(file.stream_position()?, MMAP_ALIGN as u64);
+ pad_to(file, chunk_start)?;
+
+ file.write_all(&CI_CHUNK_MAGIC)?;
+ file.write_all(&[0u8; 4])?;
+ file.write_all(&(triples.len() as u64).to_le_bytes())?;
+ for (pub_v, priv_v, chal_v) in triples {
+ file.write_all(&(pub_v.len() as u64).to_le_bytes())?;
+ file.write_all(&(priv_v.len() as u64).to_le_bytes())?;
+ file.write_all(&(chal_v.len() as u64).to_le_bytes())?;
+ }
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+
+ for which in 0..3 {
+ for triple in triples {
+ let v = match which {
+ 0 => &triple.0,
+ 1 => &triple.1,
+ _ => &triple.2,
+ };
+ let bytes = unsafe { std::slice::from_raw_parts(v.as_ptr() as *const u8, v.len() * 8) };
+ file.write_all(bytes)?;
+ }
+ {
+ let p = file.stream_position()?;
+ pad_to(file, round_up(p, MMAP_ALIGN as u64))?;
+ }
+ }
+
+ Ok(file.stream_position()?)
+}
+
+/// Read the commitment-info chunk back via memcpy. Returns the triples
+/// and the number of bytes consumed.
+pub fn read_commitment_info_chunk(bytes: &[u8]) -> Result<(Vec, usize)> {
+ ensure!(bytes.len() >= 8, "ci chunk too short");
+ ensure!(bytes[..4] == CI_CHUNK_MAGIC, "ci chunk magic mismatch");
+ let mut pos = 8usize;
+ let read_u64 = |bytes: &[u8], pos: &mut usize| -> Result {
+ ensure!(*pos + 8 <= bytes.len(), "ci chunk: short read for u64");
+ let v = u64::from_le_bytes(bytes[*pos..*pos + 8].try_into().unwrap());
+ *pos += 8;
+ Ok(v)
+ };
+
+ let num_commitments = read_u64(bytes, &mut pos)? as usize;
+ let mut lens: Vec<(usize, usize, usize)> = Vec::with_capacity(num_commitments);
+ for _ in 0..num_commitments {
+ let p = read_u64(bytes, &mut pos)? as usize;
+ let pr = read_u64(bytes, &mut pos)? as usize;
+ let ch = read_u64(bytes, &mut pos)? as usize;
+ lens.push((p, pr, ch));
+ }
+ pos = round_up(pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let mut pub_vecs = Vec::with_capacity(num_commitments);
+ for &(p, ..) in &lens {
+ ensure!(pos + p * 8 <= bytes.len(), "ci chunk: short pub");
+ let s: &[u64] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(pos) as *const u64, p) };
+ pub_vecs.push(s.to_vec());
+ pos += p * 8;
+ }
+ pos = round_up(pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let mut priv_vecs = Vec::with_capacity(num_commitments);
+ for &(_, pr, _) in &lens {
+ ensure!(pos + pr * 8 <= bytes.len(), "ci chunk: short priv");
+ let s: &[u64] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(pos) as *const u64, pr) };
+ priv_vecs.push(s.to_vec());
+ pos += pr * 8;
+ }
+ pos = round_up(pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let mut chal_vecs = Vec::with_capacity(num_commitments);
+ for &(_, _, ch) in &lens {
+ ensure!(pos + ch * 8 <= bytes.len(), "ci chunk: short chal");
+ let s: &[u64] =
+ unsafe { std::slice::from_raw_parts(bytes.as_ptr().add(pos) as *const u64, ch) };
+ chal_vecs.push(s.to_vec());
+ pos += ch * 8;
+ }
+ pos = round_up(pos as u64, MMAP_ALIGN as u64) as usize;
+
+ let triples: Vec = pub_vecs
+ .into_iter()
+ .zip(priv_vecs.into_iter())
+ .zip(chal_vecs.into_iter())
+ .map(|((p, pr), ch)| (p, pr, ch))
+ .collect();
+
+ Ok((triples, pos))
+}
+
+/// Open a file and validate it is a mmap-format `.pkp` (i.e. has the
+/// [`MMAP_SENTINEL`] following the 21-byte common header). Returns the open
+/// file handle and the offset within it where the postcard metadata starts.
+///
+/// Used by the prover crate to coordinate metadata + section-body reads off
+/// the same file.
+pub fn open_mmap_pkp(path: &Path) -> Result<(File, u64)> {
+ let mut file = OpenOptions::new()
+ .read(true)
+ .open(path)
+ .with_context(|| format!("opening {}", path.display()))?;
+ // Skip the 21-byte common header (caller has already validated it via
+ // `provekit_common::binary_format`).
+ file.seek(SeekFrom::Start(21))?;
+ let mut sentinel = [0u8; 4];
+ file.read_exact(&mut sentinel)?;
+ ensure!(
+ sentinel == MMAP_SENTINEL,
+ "not an mmap-format .pkp (sentinel mismatch)"
+ );
+ Ok((file, 25))
+}
+
+#[cfg(test)]
+mod tests {
+ use {super::*, ark_ec::AffineRepr, provekit_common::R1CS, tempfile::tempdir};
+
+ /// Round-trip: setup a tiny PK, write its sections via
+ /// `write_pk_sections` into a bare file, then mmap-load and assert the
+ /// big arrays match byte-for-byte (and the small scalars equal their
+ /// originals). This is the format-stability test for the on-disk layout.
+ #[test]
+ fn test_mmap_pk_roundtrip() {
+ // Trivial circuit: x * x = y
+ let mut r1cs = R1CS::new();
+ r1cs.num_public_inputs = 1;
+ r1cs.add_witnesses(3);
+ let one = ark_bn254::Fr::from(1u64);
+ r1cs.add_constraint(&[(one, 2)], &[(one, 2)], &[(one, 1)]);
+ let (pk, _vk) = crate::setup::setup(&r1cs, &[], &[], &[]).unwrap();
+
+ let dir = tempdir().unwrap();
+ let path = dir.path().join("pk_sections.bin");
+
+ // Layout the test file as: [section_count + table + bodies] starting
+ // at offset 0, matching what `MmapProvingKey::load(mmap, 0)` expects.
+ {
+ let mut f = File::create(&path).unwrap();
+ write_pk_sections(&pk, &mut f).unwrap();
+ f.sync_all().unwrap();
+ }
+
+ let file = std::fs::File::open(&path).unwrap();
+ let mmap = unsafe { Mmap::map(&file).unwrap() };
+ let loaded = MmapProvingKey::load(mmap, 0).unwrap();
+
+ // Big arrays: bytewise equality between the live PK and the
+ // mmap-loaded view.
+ assert_eq!(loaded.g1_a().len(), pk.g1_a.len(), "g1_a length");
+ assert_eq!(loaded.g1_a(), pk.g1_a.as_slice(), "g1_a contents");
+ assert_eq!(loaded.g1_b(), pk.g1_b.as_slice(), "g1_b contents");
+ assert_eq!(loaded.g1_z(), pk.g1_z.as_slice(), "g1_z contents");
+ assert_eq!(loaded.g1_k(), pk.g1_k.as_slice(), "g1_k contents");
+ assert_eq!(loaded.g2_b(), pk.g2_b.as_slice(), "g2_b contents");
+ assert_eq!(loaded.infinity_a(), pk.infinity_a.as_slice());
+ assert_eq!(loaded.infinity_b(), pk.infinity_b.as_slice());
+
+ // Small scalars / individual points.
+ assert_eq!(loaded.domain_size, pk.domain_size);
+ assert_eq!(loaded.domain_gen, pk.domain_gen);
+ assert_eq!(loaded.g1_alpha, pk.g1_alpha);
+ assert_eq!(loaded.g1_beta, pk.g1_beta);
+ assert_eq!(loaded.g1_delta, pk.g1_delta);
+ assert_eq!(loaded.g2_beta, pk.g2_beta);
+ assert_eq!(loaded.g2_delta, pk.g2_delta);
+ assert_eq!(loaded.nb_infinity_a, pk.nb_infinity_a);
+ assert_eq!(loaded.nb_infinity_b, pk.nb_infinity_b);
+ assert_eq!(loaded.commitment_keys.len(), pk.commitment_keys.len());
+
+ // Sanity: the points are still on the curve after the mmap cast.
+ for p in loaded.g1_a() {
+ assert!(p.is_on_curve() || p.is_zero());
+ }
+ }
+
+ #[test]
+ fn test_section_id_roundtrip() {
+ for sid in [
+ SectionId::G1A,
+ SectionId::G1B,
+ SectionId::G1Z,
+ SectionId::G1K,
+ SectionId::G2B,
+ SectionId::InfinityA,
+ SectionId::InfinityB,
+ SectionId::Scalars,
+ SectionId::PedersenIndex,
+ SectionId::PedersenBases,
+ SectionId::PedersenBasesExpSigma,
+ ] {
+ let v = sid as u32;
+ assert_eq!(SectionId::from_u32(v), Some(sid));
+ }
+ assert_eq!(SectionId::from_u32(0), None);
+ assert_eq!(SectionId::from_u32(99), None);
+ }
+}
diff --git a/provekit/groth16/src/pedersen.rs b/provekit/groth16/src/pedersen.rs
new file mode 100644
index 000000000..2d96a1d03
--- /dev/null
+++ b/provekit/groth16/src/pedersen.rs
@@ -0,0 +1,334 @@
+/// Pedersen commitment scheme for BSB22 extension.
+///
+/// Ported from gnark-crypto's `ecc/bn254/fr/pedersen/pedersen.go`.
+///
+/// A Pedersen commitment C = Σ vᵢ·Gᵢ binds the prover to values v₁..vₖ
+/// using bases G₁..Gₖ from the trusted setup. The proof of knowledge (PoK)
+/// proves the prover knows the committed values without revealing them.
+use anyhow::{ensure, Result};
+use {
+ ark_bn254::{Fr, G1Affine, G1Projective, G2Affine, G2Projective},
+ ark_ec::{AffineRepr, CurveGroup, VariableBaseMSM},
+ ark_ff::{One, UniformRand, Zero},
+ ark_serialize::{CanonicalDeserialize, CanonicalSerialize},
+ zeroize::Zeroizing,
+};
+
+/// Pedersen proving key: bases for commitment and PoK generation.
+#[derive(Clone, Debug, CanonicalSerialize, CanonicalDeserialize)]
+pub struct ProvingKey {
+ /// Original bases [G₁, G₂, ..., Gₖ] from trusted setup.
+ pub basis: Vec,
+ /// Bases raised to secret sigma: [G₁^σ, G₂^σ, ..., Gₖ^σ].
+ pub basis_exp_sigma: Vec,
+}
+
+/// Pedersen verifying key: G2 elements for pairing-based verification.
+#[derive(Clone, Debug, CanonicalSerialize, CanonicalDeserialize)]
+pub struct VerifyingKey {
+ /// Random G2 generator chosen during setup.
+ pub g: G2Affine,
+ /// G^(-σ) where σ is the secret from setup.
+ pub g_sigma_neg: G2Affine,
+}
+
+/// Generate Pedersen commitment keys from bases.
+///
+/// `bases_per_commitment` is a slice of slices — one set of bases per
+/// commitment. `g2_point` is an optional pre-chosen G2 point (if None, a random
+/// one is sampled).
+///
+/// Ported from gnark-crypto `pedersen.Setup()`.
+pub fn setup(
+ bases_per_commitment: &[&[G1Affine]],
+ g2_point: Option,
+) -> Result<(Vec, VerifyingKey)> {
+ let mut rng = ark_std::rand::thread_rng();
+
+ // Choose G2 generator
+ let g = g2_point.unwrap_or_else(|| G2Projective::rand(&mut rng).into_affine());
+
+ // Sample secret sigma. `Zeroizing` wipes the field element when it drops,
+ // so the toxic Pedersen secret can't be recovered from freed memory after
+ // setup returns.
+ let sigma = Zeroizing::new(Fr::rand(&mut rng));
+ ensure!(!sigma.is_zero(), "sigma must be non-zero");
+
+ // Compute G^(-sigma)
+ let g_sigma_neg: G2Affine = (-(G2Projective::from(g) * *sigma)).into_affine();
+
+ let vk = VerifyingKey { g, g_sigma_neg };
+
+ let pks: Vec = bases_per_commitment
+ .iter()
+ .map(|bases| {
+ // BasisExpSigma[i] = Basis[i] * sigma
+ let basis_exp_sigma: Vec = bases
+ .iter()
+ .map(|b| (G1Projective::from(*b) * *sigma).into_affine())
+ .collect();
+
+ ProvingKey {
+ basis: bases.to_vec(),
+ basis_exp_sigma,
+ }
+ })
+ .collect();
+
+ Ok((pks, vk))
+}
+
+/// Chunk size for Pedersen MSMs. arkworks' `VariableBaseMSM` keeps a
+/// projective copy of every base plus per-thread bucket state, so a single
+/// 1M-element call holds hundreds of MB of transient memory. Splitting into
+/// 100k-element chunks caps that to ~tens of MB at the cost of ~10% wall
+/// clock.
+const PEDERSEN_MSM_CHUNK: usize = 100_000;
+
+fn chunked_g1_msm(bases: &[G1Affine], values: &[Fr]) -> Result {
+ ensure!(
+ bases.len() == values.len(),
+ "chunked_g1_msm length mismatch: {} bases vs {} values",
+ bases.len(),
+ values.len()
+ );
+ let mut acc = G1Projective::zero();
+ for (b_chunk, v_chunk) in bases
+ .chunks(PEDERSEN_MSM_CHUNK)
+ .zip(values.chunks(PEDERSEN_MSM_CHUNK))
+ {
+ acc += G1Projective::msm(b_chunk, v_chunk).map_err(crate::msm_err)?;
+ }
+ Ok(acc)
+}
+
+/// Borrowed view over a Pedersen `ProvingKey`'s bases. Same `commit` /
+/// `prove_knowledge` API as [`ProvingKey`], but the basis slices can point
+/// at either owned `Vec`s (legacy path) or mmap'd file pages
+/// (rapidsnark-style raw layout). Lets callers be polymorphic over the
+/// backing store without a runtime indirection or memcpy.
+#[derive(Clone, Copy)]
+pub struct ProvingKeyView<'a> {
+ pub basis: &'a [G1Affine],
+ pub basis_exp_sigma: &'a [G1Affine],
+}
+
+impl<'a> ProvingKeyView<'a> {
+ /// Compute Pedersen commitment: `C = Σ vᵢ · Basis[i]`.
+ pub fn commit(&self, values: &[Fr]) -> Result {
+ ensure!(
+ values.len() == self.basis.len(),
+ "commit: got {} values, expected {}",
+ values.len(),
+ self.basis.len()
+ );
+
+ if values.is_empty() {
+ return Ok(G1Affine::zero());
+ }
+
+ let commitment = chunked_g1_msm(self.basis, values)?;
+ Ok(commitment.into_affine())
+ }
+
+ /// Generate proof of knowledge: `PoK = Σ vᵢ · BasisExpSigma[i]`.
+ pub fn prove_knowledge(&self, values: &[Fr]) -> Result {
+ ensure!(
+ values.len() == self.basis_exp_sigma.len(),
+ "prove_knowledge: got {} values, expected {}",
+ values.len(),
+ self.basis_exp_sigma.len()
+ );
+
+ if values.is_empty() {
+ return Ok(G1Affine::zero());
+ }
+
+ let pok = chunked_g1_msm(self.basis_exp_sigma, values)?;
+ Ok(pok.into_affine())
+ }
+}
+
+impl ProvingKey {
+ /// Borrow this owned key as a view. Cheap — just two slice references.
+ pub fn view(&self) -> ProvingKeyView<'_> {
+ ProvingKeyView {
+ basis: &self.basis,
+ basis_exp_sigma: &self.basis_exp_sigma,
+ }
+ }
+
+ /// Compute Pedersen commitment: `C = Σ vᵢ · Basis[i]`.
+ ///
+ /// Ported from gnark-crypto `ProvingKey.Commit()`.
+ pub fn commit(&self, values: &[Fr]) -> Result {
+ self.view().commit(values)
+ }
+
+ /// Generate proof of knowledge: `PoK = Σ vᵢ · BasisExpSigma[i]`.
+ ///
+ /// Proves the prover knows the values inside the commitment without
+ /// revealing them. The verifier checks e(C, G^(-σ)) · e(PoK, G) == 1.
+ ///
+ /// Ported from gnark-crypto `ProvingKey.ProveKnowledge()`.
+ pub fn prove_knowledge(&self, values: &[Fr]) -> Result {
+ self.view().prove_knowledge(values)
+ }
+}
+
+/// Fold multiple G1 points into one using a random linear combination.
+///
+/// Returns: `points[0] + coeff·points[1] + coeff²·points[2] + ...`
+///
+/// Ported from gnark-crypto `G1Affine.Fold()`.
+pub fn fold(points: &[G1Affine], coeff: Fr) -> Result {
+ if points.is_empty() {
+ return Ok(G1Affine::zero());
+ }
+ if points.len() == 1 {
+ return Ok(points[0]);
+ }
+
+ // Build scalars: [1, coeff, coeff², coeff³, ...]
+ let mut scalars = Vec::with_capacity(points.len());
+ let mut power = Fr::one();
+ for _ in 0..points.len() {
+ scalars.push(power);
+ power *= coeff;
+ }
+
+ let result = G1Projective::msm(points, &scalars).map_err(crate::msm_err)?;
+ Ok(result.into_affine())
+}
+
+/// Batch verify multiple commitments against multiple verifying keys.
+///
+/// Checks that for each commitment Cᵢ with PoKᵢ and verifying key VKᵢ:
+/// e(Cᵢ, VKᵢ.GSigmaNeg) · e(PoKᵢ, VKᵢ.G) == 1
+///
+/// All PoKs are expected to have already been folded into a single point.
+///
+/// Ported from gnark-crypto `pedersen.BatchVerifyMultiVk()`.
+pub fn batch_verify_multi_vk(
+ vks: &[VerifyingKey],
+ commitments: &[G1Affine],
+ folded_pok: G1Affine,
+ folding_challenge: Fr,
+) -> Result<()> {
+ use {ark_bn254::Bn254, ark_ec::pairing::Pairing};
+
+ ensure!(
+ vks.len() == commitments.len(),
+ "batch_verify: {} vks vs {} commitments",
+ vks.len(),
+ commitments.len()
+ );
+
+ if vks.is_empty() {
+ return Ok(());
+ }
+
+ // All VKs must share the same G point. `setup()` always emits a single G,
+ // but a deserialized batch could mix VKs whose `g` differs — folding
+ // `g_sigma_neg` against `vks[0].g` would then quietly check the wrong
+ // pairing equation, so reject the batch outright.
+ let g = vks[0].g;
+ ensure!(
+ vks.iter().all(|v| v.g == g),
+ "batch_verify: all verifying keys must share the same G point"
+ );
+
+ // Fold commitments: C_folded = C₀ + challenge·C₁ + challenge²·C₂ + ...
+ let folded_commitment = fold(commitments, folding_challenge)?;
+
+ // Fold GSigmaNeg: we need Σ rⁱ·VKᵢ.GSigmaNeg
+ // Since all G points are the same, this simplifies to:
+ // GSigmaNeg_folded = Σ rⁱ · GSigmaNeg_i
+ let g_sigma_negs: Vec = vks.iter().map(|vk| vk.g_sigma_neg).collect();
+ let fold_scalars: Vec = {
+ let mut s = Vec::with_capacity(vks.len());
+ let mut power = Fr::one();
+ for _ in 0..vks.len() {
+ s.push(power);
+ power *= folding_challenge;
+ }
+ s
+ };
+ let g_sigma_neg_folded: G2Affine = {
+ use ark_ec::VariableBaseMSM;
+ ::msm(&g_sigma_negs, &fold_scalars)
+ .map_err(crate::msm_err)?
+ .into_affine()
+ };
+
+ // Pairing check: e(folded_commitment, g_sigma_neg_folded) · e(folded_pok, g) ==
+ // 1
+ let result = Bn254::multi_pairing([folded_commitment, folded_pok], [g_sigma_neg_folded, g]);
+
+ ensure!(
+ result.0.is_one(),
+ "pedersen batch verification failed: pairing check did not pass"
+ );
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use {super::*, ark_ff::UniformRand};
+
+ #[test]
+ fn test_commit_and_verify() {
+ let mut rng = ark_std::test_rng();
+
+ // Generate random bases
+ let bases: Vec = (0..5)
+ .map(|_| G1Projective::rand(&mut rng).into_affine())
+ .collect();
+
+ let (pks, vk) = setup(&[&bases], None).unwrap();
+ let pk = &pks[0];
+
+ // Commit to random values
+ let values: Vec = (0..5).map(|_| Fr::rand(&mut rng)).collect();
+ let commitment = pk.commit(&values).unwrap();
+ let pok = pk.prove_knowledge(&values).unwrap();
+
+ // Verify
+ batch_verify_multi_vk(
+ &[vk],
+ &[commitment],
+ pok,
+ Fr::one(), // trivial challenge for single commitment
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn test_fold_single() {
+ let mut rng = ark_std::test_rng();
+ let p = G1Projective::rand(&mut rng).into_affine();
+ let result = fold(&[p], Fr::rand(&mut rng)).unwrap();
+ assert_eq!(result, p);
+ }
+
+ #[test]
+ fn test_commit_wrong_values_fails() {
+ let mut rng = ark_std::test_rng();
+ let bases: Vec = (0..3)
+ .map(|_| G1Projective::rand(&mut rng).into_affine())
+ .collect();
+ let (pks, vk) = setup(&[&bases], None).unwrap();
+ let pk = &pks[0];
+
+ let values: Vec = (0..3).map(|_| Fr::rand(&mut rng)).collect();
+ let commitment = pk.commit(&values).unwrap();
+
+ // Generate PoK with WRONG values
+ let wrong_values: Vec = (0..3).map(|_| Fr::rand(&mut rng)).collect();
+ let wrong_pok = pk.prove_knowledge(&wrong_values).unwrap();
+
+ let result = batch_verify_multi_vk(&[vk], &[commitment], wrong_pok, Fr::one());
+ assert!(result.is_err());
+ }
+}
diff --git a/provekit/groth16/src/prover.rs b/provekit/groth16/src/prover.rs
new file mode 100644
index 000000000..af66e1288
--- /dev/null
+++ b/provekit/groth16/src/prover.rs
@@ -0,0 +1,479 @@
+//! Groth16+BSB22 prover building blocks: generates proofs from R1CS + witness.
+//!
+//! Ported from gnark's `backend/groth16/bn254/prove.go`.
+//!
+//! The end-to-end proving flow (orchestrated by `provekit_prover::Prove for
+//! Groth16Prover` in `provekit/prover/src/lib.rs`) is:
+//! 1. (BSB22) Commit to pre-challenge witness values via Pedersen.
+//! 2. (BSB22) Derive challenges from commitment hashes.
+//! 3. Compute quotient polynomial H via FFT (see [`compute_h`]).
+//! 4. Compute proof elements Ar, Bs, Krs via MSM (see [`prove_ar_bs_bs1`] and
+//! [`prove_krs`]).
+//! 5. (BSB22) Generate and fold proofs of knowledge (see [`bsb22_pok`]).
+//!
+//! The caller owns the BSB22 witness-splitting flow (solve w1 → commit →
+//! derive challenges → solve w2). Functions in this module take the completed
+//! witness and commitments as inputs.
+
+use {
+ crate::{pedersen, CommitmentInfo, BSB22_FOLD_DST, COMMITMENT_DST, FR_BYTES},
+ anyhow::{ensure, Result},
+ ark_bn254::{Fr, G1Affine, G1Projective, G2Affine, G2Projective},
+ ark_ec::{AffineRepr, CurveGroup, VariableBaseMSM},
+ ark_ff::{FftField, Field, One, PrimeField, Zero},
+ ark_poly::{EvaluationDomain, Radix2EvaluationDomain},
+ rayon::{self, prelude::*},
+ tracing::{info_span, instrument},
+};
+
+/// BSB22 batched proof of knowledge over all commitments, folded into a
+/// single G1 element. Independent of `H`, so callers can run this in
+/// parallel with [`compute_h`].
+#[instrument(skip_all)]
+pub fn bsb22_pok(
+ commitment_keys: &[pedersen::ProvingKeyView<'_>],
+ committed_values: &[Vec],
+ challenge_wire_indices: &[usize],
+ wire_values: &[Fr],
+) -> Result {
+ let poks: Vec = commitment_keys
+ .iter()
+ .zip(committed_values.iter())
+ .map(|(ck, vals)| ck.prove_knowledge(vals))
+ .collect::>>()?;
+
+ if poks.is_empty() {
+ return Ok(G1Affine::zero());
+ }
+
+ let mut commitments_serialized = vec![0u8; FR_BYTES * challenge_wire_indices.len()];
+ for (j, &wire_idx) in challenge_wire_indices.iter().enumerate() {
+ let val = wire_values.get(wire_idx).ok_or_else(|| {
+ anyhow::anyhow!(
+ "challenge wire index {wire_idx} out of bounds (witness len = {})",
+ wire_values.len()
+ )
+ })?;
+ let bytes = fr_to_bytes(val)?;
+ commitments_serialized[FR_BYTES * j..FR_BYTES * (j + 1)].copy_from_slice(&bytes);
+ }
+
+ let challenge = hash_to_fr(&commitments_serialized, BSB22_FOLD_DST)?;
+ pedersen::fold(&poks, challenge)
+}
+
+/// Compute `A_r`, `B_s`, and `Bs1` (the G1 form of `B_s` needed later in the
+/// `Krs` cross-term). Independent of `H`, so callers can run this in
+/// parallel with `compute_h`.
+#[allow(clippy::too_many_arguments)]
+#[instrument(skip_all)]
+pub fn prove_ar_bs_bs1(
+ g1_a: &[G1Affine],
+ g1_b: &[G1Affine],
+ g2_b: &[G2Affine],
+ infinity_a: &[bool],
+ infinity_b: &[bool],
+ wire_values: &[Fr],
+ g1_alpha: G1Affine,
+ g1_beta: G1Affine,
+ g2_beta: G2Affine,
+ g2_delta: G2Affine,
+ r_delta: G1Affine,
+ s_delta: G1Affine,
+ s_scalar: Fr,
+) -> Result<(G1Affine, G2Affine, G1Projective)> {
+ let (wire_values_a, wire_values_b) = {
+ let _s = info_span!("filter_wires_ab").entered();
+ rayon::join(
+ || {
+ wire_values
+ .iter()
+ .enumerate()
+ .filter(|(i, _)| !infinity_a[*i])
+ .map(|(_, v)| *v)
+ .collect::>()
+ },
+ || {
+ wire_values
+ .iter()
+ .enumerate()
+ .filter(|(i, _)| !infinity_b[*i])
+ .map(|(_, v)| *v)
+ .collect::>()
+ },
+ )
+ };
+
+ let _s = info_span!("msm_ar_bs").entered();
+ // Sequential, not nested-rayon::join: arkworks' MSM is already rayon-
+ // parallel internally, so concurrent MSMs would just stack bucket
+ // allocators (~3×) without speeding up wall-clock. Sequential keeps one
+ // bucket set alive at a time — important when this whole function runs
+ // in parallel with `compute_h`.
+ let ar = {
+ let msm = G1Projective::msm(g1_a, &wire_values_a).map_err(crate::msm_err)?;
+ let mut result = msm;
+ result += G1Projective::from(g1_alpha);
+ result += G1Projective::from(r_delta);
+ result.into_affine()
+ };
+ let bs = {
+ let msm =
+ ::msm(g2_b, &wire_values_b).map_err(crate::msm_err)?;
+ let mut result = msm;
+ result += G2Projective::from(g2_beta);
+ result += G2Projective::from(g2_delta) * s_scalar;
+ result.into_affine()
+ };
+ let bs1 = {
+ let msm = G1Projective::msm(g1_b, &wire_values_b).map_err(crate::msm_err)?;
+ let mut result = msm;
+ result += G1Projective::from(g1_beta);
+ result += G1Projective::from(s_delta);
+ result
+ };
+ Ok((ar, bs, bs1))
+}
+
+/// Compute `Krs`, the final Groth16 group element. Depends on the quotient
+/// polynomial `H` and the `(A_r, Bs1)` outputs of [`prove_ar_bs_bs1`].
+#[allow(clippy::too_many_arguments)]
+#[instrument(skip_all)]
+pub fn prove_krs(
+ g1_k: &[G1Affine],
+ g1_z: &[G1Affine],
+ h: &[Fr],
+ wire_values: &[Fr],
+ r1cs_nb_public: usize,
+ commitment_info: &[CommitmentInfo],
+ challenge_wire_indices: &[usize],
+ domain_size: u64,
+ ar: G1Affine,
+ bs1: G1Projective,
+ kr_delta: G1Affine,
+ r_scalar: Fr,
+ s_scalar: Fr,
+) -> Result {
+ let private_wire_values: Vec = {
+ let _s = info_span!("filter_private_wires").entered();
+ let mut to_remove: Vec = Vec::new();
+ for ci in commitment_info {
+ to_remove.extend_from_slice(&ci.private_committed);
+ }
+ to_remove.extend_from_slice(challenge_wire_indices);
+ to_remove.sort_unstable();
+ to_remove.dedup();
+ filter_by_sorted_indices(&wire_values[r1cs_nb_public..], &to_remove, r1cs_nb_public)
+ };
+
+ ensure!(
+ private_wire_values.len() == g1_k.len(),
+ "private wire count mismatch: got {}, expected {}",
+ private_wire_values.len(),
+ g1_k.len()
+ );
+
+ let _s = info_span!("msm_krs").entered();
+ let size_h = domain_size as usize - 1;
+
+ let (krs1_result, krs2_result) = rayon::join(
+ || G1Projective::msm(g1_k, &private_wire_values).map_err(crate::msm_err),
+ || {
+ if !h.is_empty() && !g1_z.is_empty() {
+ let h_slice = &h[..size_h.min(h.len())];
+ let z_slice = &g1_z[..size_h.min(g1_z.len())];
+ let min_len = h_slice.len().min(z_slice.len());
+ G1Projective::msm(&z_slice[..min_len], &h_slice[..min_len]).map_err(crate::msm_err)
+ } else {
+ Ok(G1Projective::zero())
+ }
+ },
+ );
+
+ let mut result = krs1_result? + krs2_result?;
+ result += G1Projective::from(kr_delta);
+
+ // Cross-terms: s·Ar + r·Bs1
+ let (s_ar, r_bs1) = rayon::join(|| G1Projective::from(ar) * s_scalar, || bs1 * r_scalar);
+ result += s_ar;
+ result += r_bs1;
+
+ Ok(result.into_affine())
+}
+
+/// Filter a slice by removing elements at sorted absolute indices.
+///
+/// `slice` starts at absolute index `base_offset`. `sorted_indices` contains
+/// absolute indices to remove (must be sorted and deduplicated).
+/// Returns a new Vec with the matching elements removed.
+///
+/// Uses a merge-scan which is O(n + k) for pre-sorted indices.
+fn filter_by_sorted_indices(slice: &[Fr], sorted_indices: &[usize], base_offset: usize) -> Vec {
+ if sorted_indices.is_empty() {
+ return slice.to_vec();
+ }
+ let mut result = Vec::with_capacity(slice.len());
+ let mut remove_idx = 0;
+ for (i, val) in slice.iter().enumerate() {
+ let abs_idx = i + base_offset;
+ // Advance past any indices below current position
+ while remove_idx < sorted_indices.len() && sorted_indices[remove_idx] < abs_idx {
+ remove_idx += 1;
+ }
+ // Skip this element if it's in the removal list
+ if remove_idx < sorted_indices.len() && sorted_indices[remove_idx] == abs_idx {
+ remove_idx += 1;
+ continue;
+ }
+ result.push(*val);
+ }
+ result
+}
+
+/// Compute quotient polynomial H from the R1CS solution vectors.
+///
+/// Given the wire-level evaluations of A·w, B·w, C·w for each constraint,
+/// compute H such that A·B - C = H·Z where Z is the vanishing polynomial.
+///
+/// The buffers are consumed: the `a_evals` allocation is reused in-place
+/// for the returned H coefficients (avoiding an extra domain-sized
+/// allocation), and `b_evals`/`c_evals` are dropped at the end of the call.
+/// Buffers shorter than `domain.size()` are zero-padded internally.
+#[instrument(skip_all)]
+pub fn compute_h(
+ mut a_evals: Vec,
+ mut b_evals: Vec,
+ mut c_evals: Vec,
+ domain: &Radix2EvaluationDomain,
+) -> Result> {
+ let n = domain.size();
+
+ // Pad to domain size
+ a_evals.resize(n, Fr::zero());
+ b_evals.resize(n, Fr::zero());
+ c_evals.resize(n, Fr::zero());
+
+ // IFFT → coset FFT for each buffer. The three pipelines are independent
+ // (separate buffers, immutable domain refs), so run them in parallel.
+ let coset_domain = domain
+ .get_coset(Fr::GENERATOR)
+ .ok_or_else(|| anyhow::anyhow!("failed to construct coset domain"))?;
+ rayon::join(
+ || {
+ domain.ifft_in_place(&mut a_evals);
+ coset_domain.fft_in_place(&mut a_evals);
+ },
+ || {
+ rayon::join(
+ || {
+ domain.ifft_in_place(&mut b_evals);
+ coset_domain.fft_in_place(&mut b_evals);
+ },
+ || {
+ domain.ifft_in_place(&mut c_evals);
+ coset_domain.fft_in_place(&mut c_evals);
+ },
+ )
+ },
+ );
+
+ // Pointwise: a[i] = (a[i] * b[i] - c[i]) / Z(coset), computed in parallel.
+ // Reuses a_evals in-place to avoid an extra domain-sized allocation.
+ // Z(g·ωⁱ) = (g·ωⁱ)^N - 1 = g^N - 1 (constant on coset)
+ let z_inv = {
+ let gen_n = Fr::GENERATOR.pow([n as u64]);
+ (gen_n - Fr::one())
+ .inverse()
+ .ok_or_else(|| anyhow::anyhow!("Z(coset) is zero, cannot invert"))?
+ };
+
+ a_evals
+ .par_iter_mut()
+ .zip(b_evals.par_iter())
+ .zip(c_evals.par_iter())
+ .for_each(|((a, b), c)| {
+ *a = (*a * b - c) * z_inv;
+ });
+
+ // IFFT on coset: evaluation on coset → coefficient form
+ coset_domain.ifft_in_place(&mut a_evals);
+
+ Ok(a_evals)
+}
+
+/// Convert a field element to its canonical compressed byte form.
+pub fn fr_to_bytes(val: &Fr) -> Result> {
+ use ark_serialize::CanonicalSerialize;
+ let mut bytes = vec![0u8; FR_BYTES];
+ val.serialize_compressed(&mut bytes[..])
+ .map_err(|e| anyhow::anyhow!("failed to serialize Fr: {e}"))?;
+ Ok(bytes)
+}
+
+/// RFC 9380 Section 5.3: expand_message_xmd using SHA-256.
+///
+/// Expands a message and DST into `len_in_bytes` pseudorandom bytes.
+/// This is the core building block for hash-to-field.
+fn expand_message_xmd(msg: &[u8], dst: &[u8], len_in_bytes: usize) -> Result> {
+ use sha2::{Digest, Sha256};
+
+ let b_in_bytes = 32usize; // SHA-256 output size
+ let r_in_bytes = 64usize; // SHA-256 block size
+
+ ensure!(dst.len() <= 255, "DST must be at most 255 bytes");
+ let ell = len_in_bytes.div_ceil(b_in_bytes);
+ ensure!(ell <= 255, "expand_message_xmd: output too large");
+
+ // DST_prime = DST || I2OSP(len(DST), 1)
+ let mut dst_prime = Vec::with_capacity(dst.len() + 1);
+ dst_prime.extend_from_slice(dst);
+ dst_prime.push(dst.len() as u8);
+
+ // Z_pad = I2OSP(0, r_in_bytes) — 64 zero bytes
+ let z_pad = vec![0u8; r_in_bytes];
+
+ // l_i_b_str = I2OSP(len_in_bytes, 2) — 2-byte big-endian
+ let l_i_b_str = [(len_in_bytes >> 8) as u8, (len_in_bytes & 0xff) as u8];
+
+ // b_0 = H(Z_pad || msg || l_i_b_str || I2OSP(0, 1) || DST_prime)
+ let mut h = Sha256::new();
+ h.update(&z_pad);
+ h.update(msg);
+ h.update(l_i_b_str);
+ h.update([0u8]); // I2OSP(0, 1)
+ h.update(&dst_prime);
+ let b_0: [u8; 32] = h.finalize().into();
+
+ // b_1 = H(b_0 || I2OSP(1, 1) || DST_prime)
+ let mut h = Sha256::new();
+ h.update(b_0);
+ h.update([1u8]);
+ h.update(&dst_prime);
+ let mut b_prev: [u8; 32] = h.finalize().into();
+
+ let mut output = Vec::with_capacity(len_in_bytes);
+ output.extend_from_slice(&b_prev);
+
+ // b_i = H(strxor(b_0, b_(i-1)) || I2OSP(i, 1) || DST_prime)
+ for i in 2..=ell {
+ let mut xored = [0u8; 32];
+ for j in 0..32 {
+ xored[j] = b_0[j] ^ b_prev[j];
+ }
+ let mut h = Sha256::new();
+ h.update(xored);
+ h.update([i as u8]);
+ h.update(&dst_prime);
+ b_prev = h.finalize().into();
+ output.extend_from_slice(&b_prev);
+ }
+
+ output.truncate(len_in_bytes);
+ Ok(output)
+}
+
+/// Hash bytes with a domain separator to produce a field element.
+///
+/// Matches gnark's `fr.Hash(msg, dst, 1)`: uses expand_message_xmd (RFC 9380)
+/// with L = 48 bytes (32 byte field + 16 byte security parameter) to produce
+/// an unbiased field element.
+pub fn hash_to_fr(msg: &[u8], dst: &[u8]) -> Result {
+ // L = ceil((ceil(log2(p)) + k) / 8) where k=128 (security parameter)
+ // For BN254: ceil((254 + 128) / 8) = ceil(382/8) = 48
+ const L: usize = 48;
+
+ let pseudo_random_bytes = expand_message_xmd(msg, dst, L)?;
+
+ // Interpret as big-endian integer and reduce mod p
+ Ok(Fr::from_be_bytes_mod_order(&pseudo_random_bytes))
+}
+
+/// Hash bytes with a domain separator to produce multiple field elements.
+///
+/// Matches gnark's `fr.Hash(msg, dst, count)`.
+pub fn hash_to_fr_multi(msg: &[u8], dst: &[u8], count: usize) -> Result> {
+ const L: usize = 48;
+
+ let pseudo_random_bytes = expand_message_xmd(msg, dst, count * L)?;
+
+ let result = (0..count)
+ .map(|i| Fr::from_be_bytes_mod_order(&pseudo_random_bytes[i * L..(i + 1) * L]))
+ .collect();
+ Ok(result)
+}
+
+/// Hash a Pedersen commitment to derive a BSB22 challenge.
+///
+/// Used during witness solving: Hash(C || public_values) → challenge.
+/// Matches gnark's commitment hashing with
+/// `hash_to_field.New("bsb22-commitment")`.
+pub fn derive_commitment_challenge(commitment: &G1Affine, public_values: &[Fr]) -> Result {
+ use ark_serialize::CanonicalSerialize;
+
+ let mut data = Vec::new();
+
+ // Serialize commitment point
+ let mut commitment_bytes = Vec::new();
+ commitment.serialize_uncompressed(&mut commitment_bytes)?;
+ data.extend_from_slice(&commitment_bytes);
+
+ // Serialize public values
+ for val in public_values {
+ let bytes = fr_to_bytes(val)?;
+ data.extend_from_slice(&bytes);
+ }
+
+ hash_to_fr(&data, COMMITMENT_DST)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_hash_to_fr_deterministic() {
+ let data = b"test data";
+ let dst = b"test dst";
+ let h1 = hash_to_fr(data, dst).unwrap();
+ let h2 = hash_to_fr(data, dst).unwrap();
+ assert_eq!(h1, h2);
+ }
+
+ #[test]
+ fn test_hash_to_fr_different_inputs() {
+ let h1 = hash_to_fr(b"input1", b"dst").unwrap();
+ let h2 = hash_to_fr(b"input2", b"dst").unwrap();
+ assert_ne!(h1, h2);
+ }
+
+ #[test]
+ fn test_expand_message_xmd_basic() {
+ // Verify expand_message_xmd produces deterministic output
+ let out1 = expand_message_xmd(b"hello", b"dst", 48).unwrap();
+ let out2 = expand_message_xmd(b"hello", b"dst", 48).unwrap();
+ assert_eq!(out1, out2);
+ assert_eq!(out1.len(), 48);
+ }
+
+ #[test]
+ fn test_expand_message_xmd_different_inputs() {
+ let out1 = expand_message_xmd(b"hello", b"dst", 48).unwrap();
+ let out2 = expand_message_xmd(b"world", b"dst", 48).unwrap();
+ assert_ne!(out1, out2);
+ }
+
+ #[test]
+ fn test_hash_to_fr_produces_nonzero() {
+ let h = hash_to_fr(b"test", b"dst").unwrap();
+ assert!(!h.is_zero());
+ }
+
+ #[test]
+ fn test_hash_to_fr_multi() {
+ let results = hash_to_fr_multi(b"test", b"dst", 3).unwrap();
+ assert_eq!(results.len(), 3);
+ // All should be different
+ assert_ne!(results[0], results[1]);
+ assert_ne!(results[1], results[2]);
+ }
+}
diff --git a/provekit/groth16/src/setup.rs b/provekit/groth16/src/setup.rs
new file mode 100644
index 000000000..a8f479528
--- /dev/null
+++ b/provekit/groth16/src/setup.rs
@@ -0,0 +1,422 @@
+/// Groth16 trusted setup: generates ProvingKey and VerifyingKey from R1CS.
+///
+/// Ported from gnark's `backend/groth16/bn254/setup.go`.
+/// Notation follows DIZK paper Figure 4.
+use anyhow::Result;
+use {
+ crate::{pedersen, CommitmentInfo},
+ ark_bn254::{Fr, G1Affine, G1Projective, G2Affine, G2Projective},
+ ark_ec::{AffineRepr, CurveGroup},
+ ark_ff::{Field, One, UniformRand, Zero},
+ ark_poly::{EvaluationDomain, Radix2EvaluationDomain},
+ ark_std::rand::Rng,
+ provekit_common::R1CS,
+ rayon::prelude::*,
+};
+
+/// Toxic waste: secret random values used during setup and then destroyed.
+///
+/// `ZeroizeOnDrop` wipes every secret field when the value goes out of scope,
+/// so the trusted-setup secrets can't be recovered from freed memory.
+#[derive(zeroize::Zeroize, zeroize::ZeroizeOnDrop)]
+struct ToxicWaste {
+ t: Fr,
+ alpha: Fr,
+ beta: Fr,
+ gamma: Fr,
+ delta: Fr,
+ gamma_inv: Fr,
+ delta_inv: Fr,
+}
+
+impl ToxicWaste {
+ fn sample(rng: &mut R) -> Result {
+ let sample_nonzero = |rng: &mut R| -> Fr {
+ loop {
+ let v = Fr::rand(rng);
+ if !v.is_zero() {
+ return v;
+ }
+ }
+ };
+
+ let t = sample_nonzero(rng);
+ let alpha = sample_nonzero(rng);
+ let beta = sample_nonzero(rng);
+ let gamma = sample_nonzero(rng);
+ let delta = sample_nonzero(rng);
+
+ Ok(ToxicWaste {
+ t,
+ alpha,
+ beta,
+ gamma,
+ delta,
+ gamma_inv: gamma
+ .inverse()
+ .ok_or_else(|| anyhow::anyhow!("gamma is zero, cannot invert"))?,
+ delta_inv: delta
+ .inverse()
+ .ok_or_else(|| anyhow::anyhow!("delta is zero, cannot invert"))?,
+ })
+ }
+}
+
+/// Run the Groth16 trusted setup.
+///
+/// Generates a ProvingKey and VerifyingKey from the given R1CS.
+/// The toxic waste is sampled internally and dropped at the end of this
+/// function. For production use, this should be replaced by an MPC ceremony.
+///
+/// `challenge_wire_indices` lists ALL wire indices that hold challenge values
+/// (treated as public).
+///
+/// CONTRACT: `challenge_wire_indices` must be flattened across commitments
+/// in the same order as `commitment_info`/`num_challenges_per_commitment`,
+/// i.e. `[commit0_wire0, commit0_wire1, ..., commit1_wire0, ...]`. Within
+/// each commitment, the order must match the order the verifier inserts
+/// derived challenges into `extended_public` (the `hash_to_fr_multi` output
+/// order for multi-challenge commitments). Violating this contract causes
+/// `vk.g1_k` to be paired with the wrong public-input scalars at verify
+/// time, producing a silent miscompute for multi-commitment circuits.
+pub fn setup(
+ r1cs: &R1CS,
+ commitment_info: &[CommitmentInfo],
+ num_challenges_per_commitment: &[usize],
+ challenge_wire_indices: &[usize],
+) -> Result<(crate::ProvingKey, crate::VerifyingKey)> {
+ let mut rng = ark_std::rand::thread_rng();
+ let toxic = ToxicWaste::sample(&mut rng)?;
+
+ let nb_wires = r1cs.num_witnesses();
+ // nb_public_variables includes constant-1 wire
+ let nb_public_variables = 1 + r1cs.num_public_inputs;
+ let private_committed: Vec> = commitment_info
+ .iter()
+ .map(|c| c.private_committed.clone())
+ .collect();
+ let nb_private_committed: usize = private_committed.iter().map(|v| v.len()).sum();
+ let total_challenge_wires = challenge_wire_indices.len();
+
+ // All challenge wire indices are treated as public on the Groth16 level.
+ let nb_public = nb_public_variables + total_challenge_wires;
+ let nb_private = nb_wires - nb_public_variables - nb_private_committed - total_challenge_wires;
+
+ // FFT domain
+ let domain = Radix2EvaluationDomain::::new(r1cs.num_constraints())
+ .ok_or_else(|| anyhow::anyhow!("failed to create FFT domain"))?;
+ let domain_size = domain.size() as u64;
+
+ // Evaluate A, B, C at the toxic waste point t using Lagrange basis.
+ let (a_at_t, b_at_t, c_at_t) = evaluate_abc_at_t(r1cs, &domain, &toxic)?;
+
+ // Compute K values: K(i) = (β·A(i) + α·B(i) + C(i)) / γ or / δ
+ let mut pk_k = Vec::with_capacity(nb_private); // private wires → divided by δ
+ let mut vk_k = Vec::with_capacity(nb_public); // public wires → divided by γ
+ let mut ck_k: Vec> = commitment_info
+ .iter()
+ .map(|c| Vec::with_capacity(c.private_committed.len()))
+ .collect();
+
+ // Track which wires are committed (using a merged iterator approach)
+ let mut committed_map: std::collections::HashMap =
+ std::collections::HashMap::new();
+ for (ci, info) in commitment_info.iter().enumerate() {
+ for &wire_id in &info.private_committed {
+ committed_map.insert(wire_id, ci);
+ }
+ }
+
+ let commitment_wire_set: std::collections::HashSet =
+ challenge_wire_indices.iter().copied().collect();
+
+ let k_at = |i: usize| -> Fr {
+ // K(i) = β·A(i) + α·B(i) + C(i)
+ toxic.beta * a_at_t[i] + toxic.alpha * b_at_t[i] + c_at_t[i]
+ };
+
+ // Pass 1: public wires (constant + Noir public inputs), in wire-index
+ // order. `vk.g1_k[0]` corresponds to the constant-1 wire and is paired
+ // with the implicit `1` term in the verifier; `vk.g1_k[1..1+num_public]`
+ // is paired with `public_witness` in the same order Noir emits public
+ // inputs.
+ for i in 0..nb_public_variables {
+ vk_k.push(k_at(i) * toxic.gamma_inv);
+ }
+
+ // Pass 2: challenge wires in commitment-iteration order. The verifier
+ // appends derived challenges to `extended_public` in this same order
+ // (`for (i, _) in vk.public_and_commitment_committed.iter().enumerate()`
+ // → `extended_public.extend_from_slice(&challenges)`), so the bases
+ // emitted here line up with the scalars the verifier produces.
+ for &wire_idx in challenge_wire_indices {
+ vk_k.push(k_at(wire_idx) * toxic.gamma_inv);
+ }
+
+ // Pass 3: private wires. Each goes either to a commitment bucket (if
+ // it's in `private_committed` for some commitment) or to `pk_k`.
+ // Challenge wires that landed in the private range are skipped — they
+ // were already pushed to `vk_k` in pass 2.
+ for i in nb_public_variables..nb_wires {
+ if commitment_wire_set.contains(&i) {
+ continue;
+ }
+ let k_val = k_at(i);
+ if let Some(&ci) = committed_map.get(&i) {
+ ck_k[ci].push(k_val * toxic.gamma_inv);
+ } else {
+ pk_k.push(k_val * toxic.delta_inv);
+ }
+ }
+
+ // Z(τ) scalars: Z(t)/δ · t^i for i in 0..domain_size
+ let z_at_t: Fr = {
+ let t_n = toxic.t.pow([domain_size]);
+ (t_n - Fr::one()) * toxic.delta_inv
+ };
+ let mut z_scalars = Vec::with_capacity(domain_size as usize);
+ let mut z_cur = z_at_t;
+ for _ in 0..domain_size {
+ z_scalars.push(z_cur);
+ z_cur *= toxic.t;
+ }
+
+ // Mark infinity points (where A(τ) or B(τ) is zero)
+ let mut infinity_a = vec![false; nb_wires];
+ let mut infinity_b = vec![false; nb_wires];
+ let mut a_scalars_filtered = Vec::new();
+ let mut b_scalars_filtered = Vec::new();
+
+ for i in 0..nb_wires {
+ if a_at_t[i] == Fr::zero() {
+ infinity_a[i] = true;
+ } else {
+ a_scalars_filtered.push(a_at_t[i]);
+ }
+ if b_at_t[i] == Fr::zero() {
+ infinity_b[i] = true;
+ } else {
+ b_scalars_filtered.push(b_at_t[i]);
+ }
+ }
+
+ let nb_infinity_a = infinity_a.iter().filter(|&&x| x).count() as u64;
+ let nb_infinity_b = infinity_b.iter().filter(|&&x| x).count() as u64;
+
+ // Scalar multiplication for G1 points — parallelized via rayon
+ let g1_gen = G1Affine::generator();
+
+ let g1_alpha = scalar_mul_g1(&g1_gen, &toxic.alpha);
+ let g1_beta = scalar_mul_g1(&g1_gen, &toxic.beta);
+ let g1_delta = scalar_mul_g1(&g1_gen, &toxic.delta);
+
+ let g1_a: Vec = a_scalars_filtered
+ .par_iter()
+ .map(|s| scalar_mul_g1(&g1_gen, s))
+ .collect();
+
+ let g1_b: Vec = b_scalars_filtered
+ .par_iter()
+ .map(|s| scalar_mul_g1(&g1_gen, s))
+ .collect();
+
+ let mut g1_z: Vec = z_scalars
+ .par_iter()
+ .map(|s| scalar_mul_g1(&g1_gen, s))
+ .collect();
+ // No bit-reverse permutation: arkworks' IFFT outputs H in natural order,
+ // so Z points must also be in natural order for the MSM Σ h[i]·Z[i].
+ // deg(H) = (n-1)+(n-1)-n = n-2, so we need n-1 Z points
+ let size_z = domain_size as usize - 1;
+ g1_z.truncate(size_z);
+
+ let g1_vk_k: Vec = vk_k.par_iter().map(|s| scalar_mul_g1(&g1_gen, s)).collect();
+ let g1_pk_k: Vec = pk_k.par_iter().map(|s| scalar_mul_g1(&g1_gen, s)).collect();
+
+ // Commitment bases in G1
+ let g1_ck_k: Vec> = ck_k
+ .iter()
+ .map(|ck| ck.par_iter().map(|s| scalar_mul_g1(&g1_gen, s)).collect())
+ .collect();
+
+ // Scalar multiplication for G2 points — parallelized via rayon
+ let g2_gen = G2Affine::generator();
+ let g2_beta = scalar_mul_g2(&g2_gen, &toxic.beta);
+ let g2_delta = scalar_mul_g2(&g2_gen, &toxic.delta);
+ let g2_gamma = scalar_mul_g2(&g2_gen, &toxic.gamma);
+
+ let g2_b: Vec = b_scalars_filtered
+ .par_iter()
+ .map(|s| scalar_mul_g2(&g2_gen, s))
+ .collect();
+
+ // Pedersen commitment setup
+ let g2_random = G2Projective::rand(&mut rng).into_affine();
+ let mut pk_commitment_keys = Vec::new();
+ let mut vk_commitment_keys = Vec::new();
+
+ for ck_bases in &g1_ck_k {
+ if ck_bases.is_empty() {
+ continue;
+ }
+ let (pks, vk) = pedersen::setup(&[ck_bases], Some(g2_random))?;
+ let pk = pks
+ .into_iter()
+ .next()
+ .ok_or_else(|| anyhow::anyhow!("pedersen::setup returned empty proving key vector"))?;
+ pk_commitment_keys.push(pk);
+ vk_commitment_keys.push(vk);
+ }
+
+ // Public and commitment committed indices for verification
+ let public_and_commitment_committed: Vec> = commitment_info
+ .iter()
+ .map(|c| c.public_and_commitment_committed.clone())
+ .collect();
+
+ // Build VerifyingKey
+ let mut vk = crate::VerifyingKey {
+ g1_alpha,
+ g1_k: g1_vk_k,
+ g2_beta,
+ g2_delta,
+ g2_gamma,
+ g2_delta_neg: G2Affine::zero(), // will be set by precompute
+ g2_gamma_neg: G2Affine::zero(),
+ e_alpha_beta: ark_ff::AdditiveGroup::ZERO,
+ commitment_keys: vk_commitment_keys,
+ public_and_commitment_committed,
+ num_challenges_per_commitment: num_challenges_per_commitment.to_vec(),
+ };
+ vk.precompute()?;
+
+ // Build ProvingKey
+ let pk = crate::ProvingKey {
+ domain_size,
+ domain_gen: Fr::from(domain.group_gen()),
+ g1_alpha,
+ g1_beta,
+ g1_delta,
+ g1_a,
+ g1_b,
+ g1_k: g1_pk_k,
+ g1_z,
+ g2_beta,
+ g2_delta,
+ g2_b,
+ infinity_a,
+ infinity_b,
+ nb_infinity_a,
+ nb_infinity_b,
+ commitment_keys: pk_commitment_keys,
+ };
+
+ // toxic waste is dropped here — in production this is the MPC ceremony's job.
+ // `ToxicWaste` is `ZeroizeOnDrop`, so the secret field elements are wiped
+ // from memory when this drop runs.
+ drop(toxic);
+
+ Ok((pk, vk))
+}
+
+/// Evaluate A(τ), B(τ), C(τ) for each wire using Lagrange interpolation at τ.
+///
+/// Ported from gnark's `setupABC()`.
+fn evaluate_abc_at_t(
+ r1cs: &R1CS,
+ domain: &Radix2EvaluationDomain,
+ toxic: &ToxicWaste,
+) -> Result<(Vec, Vec, Vec)> {
+ let nb_wires = r1cs.num_witnesses();
+ let mut a = vec![Fr::zero(); nb_wires];
+ let mut b = vec![Fr::zero(); nb_wires];
+ let mut c = vec![Fr::zero(); nb_wires];
+
+ let w = domain.group_gen();
+ let n = r1cs.num_constraints();
+
+ // Precompute [τ - ω^i] and their inverses
+ let mut t_minus_wi = Vec::with_capacity(n + 1);
+ let mut wi = Fr::one();
+ for _ in 0..=n {
+ t_minus_wi.push(toxic.t - wi);
+ wi *= w;
+ }
+ let t_minus_wi_inv = {
+ let mut inv = t_minus_wi.clone();
+ ark_ff::batch_inversion(&mut inv);
+ inv
+ };
+
+ // L₀(τ) = (τⁿ - 1) / (n · (τ - ω⁰))
+ let t_n = toxic.t.pow([domain.size() as u64]);
+ let n_inv = Fr::from(domain.size() as u64)
+ .inverse()
+ .ok_or_else(|| anyhow::anyhow!("FFT domain size is zero, cannot invert"))?;
+ let mut lagrange = (t_n - Fr::one()) * t_minus_wi_inv[0] * n_inv;
+
+ // Accumulate: for each constraint row, add coeff * Lⱼ(τ) to the appropriate
+ // wire. Iterates directly over SparseMatrix rows instead of gnark's Term
+ // lists.
+ let lookup_coeff = |interned| {
+ r1cs.interner
+ .get(interned)
+ .ok_or_else(|| anyhow::anyhow!("R1CS interner missing value for matrix entry"))
+ };
+ for j in 0..n {
+ for (col, interned) in r1cs.a.iter_row(j) {
+ a[col] += lookup_coeff(interned)? * lagrange;
+ }
+ for (col, interned) in r1cs.b.iter_row(j) {
+ b[col] += lookup_coeff(interned)? * lagrange;
+ }
+ for (col, interned) in r1cs.c.iter_row(j) {
+ c[col] += lookup_coeff(interned)? * lagrange;
+ }
+
+ // Lⱼ₊₁(τ) = ω · Lⱼ(τ) · (τ - ω^j) / (τ - ω^(j+1))
+ if j + 1 < n {
+ lagrange *= w;
+ lagrange *= t_minus_wi[j];
+ lagrange *= t_minus_wi_inv[j + 1];
+ }
+ }
+
+ Ok((a, b, c))
+}
+
+/// Scalar multiplication in G1.
+fn scalar_mul_g1(base: &G1Affine, scalar: &Fr) -> G1Affine {
+ (G1Projective::from(*base) * scalar).into_affine()
+}
+
+/// Scalar multiplication in G2.
+fn scalar_mul_g2(base: &G2Affine, scalar: &Fr) -> G2Affine {
+ (G2Projective::from(*base) * scalar).into_affine()
+}
+
+#[cfg(test)]
+mod tests {
+ use {super::*, provekit_common::FieldElement};
+
+ /// Simple test: setup with a trivial R1CS should not panic.
+ #[test]
+ fn test_setup_trivial() {
+ // x * x = y (where wire 0=constant, wire 1=public output y, wire 2=secret x)
+ let mut r1cs = R1CS::new();
+ r1cs.num_public_inputs = 1; // one public input (y), excludes constant wire
+ r1cs.add_witnesses(3); // wire 0 (const), wire 1 (y), wire 2 (x)
+
+ let one = FieldElement::from(1u64);
+ // A: x (wire 2), B: x (wire 2), C: y (wire 1)
+ r1cs.add_constraint(
+ &[(one, 2)], // A: 1·x
+ &[(one, 2)], // B: 1·x
+ &[(one, 1)], // C: 1·y
+ );
+
+ let (pk, vk) = setup(&r1cs, &[], &[], &[]).unwrap();
+ assert!(!pk.g1_a.is_empty());
+ assert!(!vk.g1_k.is_empty());
+ }
+}
diff --git a/provekit/groth16/src/types.rs b/provekit/groth16/src/types.rs
new file mode 100644
index 000000000..cb8799cb6
--- /dev/null
+++ b/provekit/groth16/src/types.rs
@@ -0,0 +1,339 @@
+/// Core Groth16+BSB22 types: Proof, ProvingKey, VerifyingKey.
+///
+/// Ported from gnark's `backend/groth16/bn254/setup.go` and `prove.go`.
+/// Notation follows Figure 4 in the DIZK paper.
+use ark_bn254::{Bn254, Fr, G1Affine, G2Affine, G2Projective};
+use {
+ crate::pedersen,
+ ark_ec::{pairing::Pairing, AffineRepr},
+ ark_ff::Zero,
+ ark_serialize::{CanonicalDeserialize, CanonicalSerialize},
+ serde::{Deserialize, Deserializer, Serialize, Serializer},
+};
+
+/// A Groth16+BSB22 proof.
+///
+/// Contains the standard Groth16 elements (Ar, Bs, Krs) plus
+/// BSB22 Pedersen commitments and a batched proof of knowledge.
+#[derive(Clone, Debug, CanonicalSerialize, CanonicalDeserialize)]
+pub struct Proof {
+ /// `[A]₁ = Σ wᵢ·[Aᵢ(τ)]₁ + [α]₁ + r·[δ]₁`
+ pub ar: G1Affine,
+ /// `[B]₂ = Σ wᵢ·[Bᵢ(τ)]₂ + [β]₂ + s·[δ]₂`
+ pub bs: G2Affine,
+ /// `[C]₁ = Σ wᵢ·[Kᵢ(τ)]₁ + Σ hⱼ·[Zⱼ(τ)]₁ + s·[A]₁ + r·[B]₁ - rs·[δ]₁`
+ pub krs: G1Affine,
+ /// Pedersen commitments (BSB22 extension).
+ pub commitments: Vec,
+ /// Batched proof of knowledge for all commitments.
+ pub commitment_pok: G1Affine,
+}
+
+impl Proof {
+ /// Checks that proof elements are on the curve and in the correct subgroup.
+ pub fn is_valid(&self) -> bool {
+ // Ar must be a non-zero G1 point on the curve.
+ // G1 has cofactor 1 on BN254, so on-curve implies in-subgroup.
+ if !self.ar.is_on_curve() || self.ar.is_zero() {
+ return false;
+ }
+
+ // Bs is a G2 point. BN254 G2 has a non-trivial cofactor, so
+ // on-curve does NOT imply in-subgroup. Explicit check required.
+ if !self.bs.is_on_curve()
+ || self.bs.is_zero()
+ || !self.bs.is_in_correct_subgroup_assuming_on_curve()
+ {
+ return false;
+ }
+
+ // Krs must be a non-zero G1 point on the curve. A zero Krs is
+ // overwhelmingly unlikely for an honest prover (`r`/`s` are sampled
+ // uniformly), and accepting it widens the surface for malformed or
+ // malicious proofs.
+ if !self.krs.is_on_curve() || self.krs.is_zero() {
+ return false;
+ }
+
+ // Commitment points (G1) must be on the curve.
+ for c in &self.commitments {
+ if !c.is_on_curve() {
+ return false;
+ }
+ }
+ if !self.commitment_pok.is_on_curve() {
+ return false;
+ }
+
+ true
+ }
+}
+
+/// Groth16 proving key.
+///
+/// Contains all curve points needed by the prover to generate a proof.
+/// These are computed during trusted setup from the toxic waste.
+#[derive(Clone, Debug, CanonicalSerialize, CanonicalDeserialize)]
+pub struct ProvingKey {
+ /// FFT domain cardinality (number of constraints rounded up to power of 2).
+ pub domain_size: u64,
+ /// Generator of the FFT domain.
+ pub domain_gen: Fr,
+
+ // -- G1 elements --
+ /// `[α]₁`
+ pub g1_alpha: G1Affine,
+ /// `[β]₁`
+ pub g1_beta: G1Affine,
+ /// `[δ]₁`
+ pub g1_delta: G1Affine,
+ /// `[Aᵢ(τ)]₁` for each wire (excluding infinity points).
+ pub g1_a: Vec,
+ /// `[Bᵢ(τ)]₁` for each wire (excluding infinity points).
+ pub g1_b: Vec,
+ /// `[Kᵢ(τ)]₁` for private wires only.
+ pub g1_k: Vec,
+ /// `[τⁱ · Z(τ)/δ]₁` for i in `0..domain_size-1`.
+ pub g1_z: Vec,
+
+ // -- G2 elements --
+ /// `[β]₂`
+ pub g2_beta: G2Affine,
+ /// `[δ]₂`
+ pub g2_delta: G2Affine,
+ /// `[Bᵢ(τ)]₂` for each wire (excluding infinity points).
+ pub g2_b: Vec,
+
+ // -- Infinity tracking --
+ /// `infinity_a[i] == true` means wire `i` has `A(τ) == 0`.
+ pub infinity_a: Vec,
+ /// `infinity_b[i] == true` means wire `i` has `B(τ) == 0`.
+ pub infinity_b: Vec,
+ /// Count of infinity points in A.
+ pub nb_infinity_a: u64,
+ /// Count of infinity points in B.
+ pub nb_infinity_b: u64,
+
+ /// Pedersen commitment proving keys (one per BSB22 commitment).
+ pub commitment_keys: Vec,
+}
+
+/// Groth16 verifying key.
+///
+/// Contains the minimal curve points needed by the verifier.
+/// Note: precomputed fields (g2_delta_neg, g2_gamma_neg, e_alpha_beta)
+/// are not serialized — call `precompute()` after deserialization.
+#[derive(Clone, Debug)]
+pub struct VerifyingKey {
+ // -- G1 elements --
+ /// `[α]₁`
+ pub g1_alpha: G1Affine,
+ /// `[Kᵢ(τ)]₁` for public wires (including commitment wires).
+ pub g1_k: Vec,
+
+ // -- G2 elements --
+ /// `[β]₂`
+ pub g2_beta: G2Affine,
+ /// `[δ]₂`
+ pub g2_delta: G2Affine,
+ /// `[γ]₂`
+ pub g2_gamma: G2Affine,
+
+ // -- Precomputed (set by precompute(), not serialized) --
+ /// `-[δ]₂`
+ pub g2_delta_neg: G2Affine,
+ /// `-[γ]₂`
+ pub g2_gamma_neg: G2Affine,
+ /// `e([α]₁, [β]₂)`
+ pub e_alpha_beta: ::TargetField,
+
+ /// Pedersen commitment verifying keys (one per BSB22 commitment).
+ pub commitment_keys: Vec,
+ /// For each commitment, the indices of public/commitment-committed wires.
+ ///
+ /// Indices are **absolute witness indices**: position 0 is the constant-1
+ /// ONE_WIRE, public input `i` lives at index `1 + i`, and challenge wires
+ /// follow the public range. Index 0 is therefore never a valid entry —
+ /// the verifier maps `idx` → `extended_public[idx - 1]` to strip the
+ /// ONE_WIRE offset (see [`crate::verifier::verify`]). Producers (e.g.
+ /// `cli/src/cmd/prepare.rs`) populate this with `(1..num_public)`.
+ pub public_and_commitment_committed: Vec>,
+ /// Number of challenges derived from each commitment.
+ /// Single-challenge: all 1s. Multi-challenge: `[N]` for one commitment
+ /// producing N challenges.
+ pub num_challenges_per_commitment: Vec,
+}
+
+impl CanonicalSerialize for VerifyingKey {
+ fn serialize_with_mode(
+ &self,
+ writer: W,
+ compress: ark_serialize::Compress,
+ ) -> Result<(), ark_serialize::SerializationError> {
+ let mut w = writer;
+ self.g1_alpha.serialize_with_mode(&mut w, compress)?;
+ self.g1_k.serialize_with_mode(&mut w, compress)?;
+ self.g2_beta.serialize_with_mode(&mut w, compress)?;
+ self.g2_delta.serialize_with_mode(&mut w, compress)?;
+ self.g2_gamma.serialize_with_mode(&mut w, compress)?;
+ self.commitment_keys.serialize_with_mode(&mut w, compress)?;
+ self.public_and_commitment_committed
+ .serialize_with_mode(&mut w, compress)?;
+ self.num_challenges_per_commitment
+ .serialize_with_mode(&mut w, compress)?;
+ Ok(())
+ }
+
+ fn serialized_size(&self, compress: ark_serialize::Compress) -> usize {
+ self.g1_alpha.serialized_size(compress)
+ + self.g1_k.serialized_size(compress)
+ + self.g2_beta.serialized_size(compress)
+ + self.g2_delta.serialized_size(compress)
+ + self.g2_gamma.serialized_size(compress)
+ + self.commitment_keys.serialized_size(compress)
+ + self
+ .public_and_commitment_committed
+ .serialized_size(compress)
+ + self.num_challenges_per_commitment.serialized_size(compress)
+ }
+}
+
+impl ark_serialize::Valid for VerifyingKey {
+ fn check(&self) -> Result<(), ark_serialize::SerializationError> {
+ self.g1_alpha.check()?;
+ for pt in &self.g1_k {
+ pt.check()?;
+ }
+ self.g2_beta.check()?;
+ self.g2_delta.check()?;
+ self.g2_gamma.check()?;
+ for ck in &self.commitment_keys {
+ ck.check()?;
+ }
+ if self.commitment_keys.len() != self.public_and_commitment_committed.len() {
+ return Err(ark_serialize::SerializationError::InvalidData);
+ }
+ if self.num_challenges_per_commitment.len() != self.commitment_keys.len() {
+ return Err(ark_serialize::SerializationError::InvalidData);
+ }
+ Ok(())
+ }
+}
+
+impl CanonicalDeserialize for VerifyingKey {
+ fn deserialize_with_mode(
+ reader: R,
+ compress: ark_serialize::Compress,
+ validate: ark_serialize::Validate,
+ ) -> Result {
+ let mut r = reader;
+ let g1_alpha = G1Affine::deserialize_with_mode(&mut r, compress, validate)?;
+ let g1_k = Vec::::deserialize_with_mode(&mut r, compress, validate)?;
+ let g2_beta = G2Affine::deserialize_with_mode(&mut r, compress, validate)?;
+ let g2_delta = G2Affine::deserialize_with_mode(&mut r, compress, validate)?;
+ let g2_gamma = G2Affine::deserialize_with_mode(&mut r, compress, validate)?;
+ let commitment_keys =
+ Vec::::deserialize_with_mode(&mut r, compress, validate)?;
+ let public_and_commitment_committed =
+ Vec::>::deserialize_with_mode(&mut r, compress, validate)?;
+ let num_challenges_per_commitment =
+ Vec::::deserialize_with_mode(&mut r, compress, validate)?;
+
+ // Compute cached values inline so a freshly deserialized VK is
+ // immediately ready to verify. Equivalent to calling `precompute()`
+ // — kept here so callers cannot forget that step.
+ let e_alpha_beta = Bn254::pairing(g1_alpha, g2_beta).0;
+ let g2_delta_neg: G2Affine = (-G2Projective::from(g2_delta)).into();
+ let g2_gamma_neg: G2Affine = (-G2Projective::from(g2_gamma)).into();
+
+ Ok(Self {
+ g1_alpha,
+ g1_k,
+ g2_beta,
+ g2_delta,
+ g2_gamma,
+ g2_delta_neg,
+ g2_gamma_neg,
+ e_alpha_beta,
+ commitment_keys,
+ public_and_commitment_committed,
+ num_challenges_per_commitment,
+ })
+ }
+}
+
+impl VerifyingKey {
+ /// Precompute cached values: e(α,β), -δ₂, -γ₂.
+ /// Must be called after deserialization.
+ pub fn precompute(&mut self) -> anyhow::Result<()> {
+ use ark_ec::pairing::Pairing;
+ self.e_alpha_beta = Bn254::pairing(self.g1_alpha, self.g2_beta).0;
+
+ self.g2_delta_neg = (-G2Projective::from(self.g2_delta)).into();
+
+ self.g2_gamma_neg = (-G2Projective::from(self.g2_gamma)).into();
+
+ Ok(())
+ }
+
+ /// Number of public witness elements expected (excluding the constant 1
+ /// wire).
+ pub fn nb_public_witness(&self) -> usize {
+ self.g1_k.len() - 1
+ }
+}
+
+// Serde adapters for ProvingKey.
+//
+// The proving key is large (hundreds of MB) and arkworks-serialized bytes are
+// best read/written outside postcard's wire format to avoid materializing the
+// full byte stream in memory. The .pkp file layout treats the PK as an
+// out-of-band section appended after the postcard-encoded `Prover` (see
+// `provekit_prover::pkp_io`), so the serde impls here are no-ops:
+// * `Serialize` writes `()` (postcard emits zero bytes).
+// * `Deserialize` ignores the input and yields `ProvingKey::empty()`.
+//
+// In practice these impls only run for `Groth16Prover` round-trips; the file
+// I/O layer fills in the real PK after postcard returns.
+impl Serialize for ProvingKey {
+ fn serialize(&self, serializer: S) -> Result {
+ // Emit a unit value: postcard encodes `()` as zero bytes, leaving the
+ // PK out of the postcard stream entirely.
+ serializer.serialize_unit()
+ }
+}
+
+impl<'de> Deserialize<'de> for ProvingKey {
+ fn deserialize>(deserializer: D) -> Result {
+ let _: () = Deserialize::deserialize(deserializer)?;
+ Ok(ProvingKey::empty())
+ }
+}
+
+impl ProvingKey {
+ /// A zero-state placeholder used while a `Groth16Prover` is being
+ /// reconstituted out of band. The actual proving key is loaded separately
+ /// by the .pkp I/O path and replaces this placeholder before any
+ /// cryptographic operations occur.
+ pub fn empty() -> Self {
+ ProvingKey {
+ domain_size: 0,
+ domain_gen: Fr::zero(),
+ g1_alpha: G1Affine::zero(),
+ g1_beta: G1Affine::zero(),
+ g1_delta: G1Affine::zero(),
+ g1_a: Vec::new(),
+ g1_b: Vec::new(),
+ g1_k: Vec::new(),
+ g1_z: Vec::new(),
+ g2_beta: G2Affine::zero(),
+ g2_delta: G2Affine::zero(),
+ g2_b: Vec::new(),
+ infinity_a: Vec::new(),
+ infinity_b: Vec::new(),
+ nb_infinity_a: 0,
+ nb_infinity_b: 0,
+ commitment_keys: Vec::new(),
+ }
+ }
+}
diff --git a/provekit/groth16/src/verifier.rs b/provekit/groth16/src/verifier.rs
new file mode 100644
index 000000000..90f3538de
--- /dev/null
+++ b/provekit/groth16/src/verifier.rs
@@ -0,0 +1,181 @@
+/// Groth16+BSB22 verifier: verifies proofs against a verifying key.
+///
+/// Ported from gnark's `backend/groth16/bn254/verify.go`.
+///
+/// Verification steps:
+/// 1. Subgroup check on proof elements
+/// 2. Recompute BSB22 commitment challenges from proof commitments
+/// 3. Verify Pedersen commitment PoKs via batch verification
+/// 4. Compute public input contribution via MSM
+/// 5. Check the Groth16 pairing equation
+use anyhow::{ensure, Context, Result};
+use {
+ crate::{
+ pedersen,
+ prover::{derive_commitment_challenge, hash_to_fr, hash_to_fr_multi},
+ types::{Proof, VerifyingKey},
+ BSB22_FOLD_DST, COMMITMENT_DST, FR_BYTES,
+ },
+ ark_bn254::{Bn254, Fr, G1Projective},
+ ark_ec::{pairing::Pairing, CurveGroup, VariableBaseMSM},
+};
+
+/// Verify a Groth16+BSB22 proof.
+///
+/// # Arguments
+/// * `proof` - The proof to verify.
+/// * `vk` - The verifying key (must have `precompute()` called).
+/// * `public_witness` - Public input values (excluding the constant 1 wire).
+pub fn verify(proof: &Proof, vk: &VerifyingKey, public_witness: &[Fr]) -> Result<()> {
+ let total_challenges: usize = vk.num_challenges_per_commitment.iter().sum();
+ // Guard the subtraction below: a malformed VK with more declared
+ // challenges than g1_k entries would otherwise underflow `usize` (panic
+ // in debug, wrap in release — release still rejects via the size-check
+ // a few lines down, but the panic in debug is a DoS surface and the
+ // wrap masks the real problem).
+ ensure!(
+ vk.g1_k.len() >= total_challenges + 1,
+ "invalid verifying key: g1_k has {} entries but {} challenges + ONE_WIRE were declared",
+ vk.g1_k.len(),
+ total_challenges,
+ );
+ let nb_public_vars = vk.g1_k.len() - total_challenges;
+ let expected_commitments = vk.public_and_commitment_committed.len();
+
+ ensure!(
+ vk.commitment_keys.len() == expected_commitments,
+ "invalid verifying key: got {} commitment keys, expected {}",
+ vk.commitment_keys.len(),
+ expected_commitments
+ );
+ ensure!(
+ proof.commitments.len() == expected_commitments,
+ "invalid proof: got {} commitments, expected {}",
+ proof.commitments.len(),
+ expected_commitments
+ );
+ ensure!(
+ vk.num_challenges_per_commitment.len() == expected_commitments,
+ "invalid verifying key: got {} challenge counts, expected {}",
+ vk.num_challenges_per_commitment.len(),
+ expected_commitments
+ );
+ ensure!(
+ public_witness.len() == nb_public_vars - 1,
+ "invalid witness size: got {}, expected {} (public - ONE_WIRE)",
+ public_witness.len(),
+ nb_public_vars - 1
+ );
+
+ // Step 1: Subgroup check
+ ensure!(proof.is_valid(), "proof elements not in correct subgroup");
+
+ // Step 2: Recompute commitment challenges and verify BSB22
+ let mut extended_public = public_witness.to_vec();
+ let mut commitments_serialized = vec![0u8; total_challenges * FR_BYTES];
+ let mut serial_offset = 0usize;
+
+ for (i, committed_indices) in vk.public_and_commitment_committed.iter().enumerate() {
+ let num_challenges = vk.num_challenges_per_commitment[i];
+
+ let public_vals: Vec = committed_indices
+ .iter()
+ .map(|&idx| {
+ ensure!(
+ idx > 0 && idx - 1 < extended_public.len(),
+ "commitment public index {} out of bounds (extended_public len = {})",
+ idx,
+ extended_public.len()
+ );
+ Ok(extended_public[idx - 1])
+ })
+ .collect::>>()?;
+
+ if num_challenges <= 1 {
+ let challenge = derive_commitment_challenge(&proof.commitments[i], &public_vals)?;
+ extended_public.push(challenge);
+ let bytes = crate::prover::fr_to_bytes(&challenge)?;
+ commitments_serialized[FR_BYTES * serial_offset..FR_BYTES * (serial_offset + 1)]
+ .copy_from_slice(&bytes);
+ serial_offset += 1;
+ } else {
+ let challenge_data = {
+ use ark_serialize::CanonicalSerialize;
+ let mut data = Vec::new();
+ let mut commitment_bytes = Vec::new();
+ proof.commitments[i]
+ .serialize_uncompressed(&mut commitment_bytes)
+ .map_err(|e| anyhow::anyhow!("serialize commitment: {e}"))?;
+ data.extend_from_slice(&commitment_bytes);
+ for val in &public_vals {
+ let bytes = crate::prover::fr_to_bytes(val)?;
+ data.extend_from_slice(&bytes);
+ }
+ data
+ };
+
+ let challenges = hash_to_fr_multi(&challenge_data, COMMITMENT_DST, num_challenges)?;
+
+ for ch in &challenges {
+ let bytes = crate::prover::fr_to_bytes(ch)?;
+ commitments_serialized[FR_BYTES * serial_offset..FR_BYTES * (serial_offset + 1)]
+ .copy_from_slice(&bytes);
+ serial_offset += 1;
+ }
+
+ extended_public.extend_from_slice(&challenges);
+ }
+ }
+
+ // Step 3: Verify BSB22 Pedersen commitments
+ if !vk.commitment_keys.is_empty() {
+ let folding_challenge = hash_to_fr(&commitments_serialized, BSB22_FOLD_DST)?;
+
+ pedersen::batch_verify_multi_vk(
+ &vk.commitment_keys,
+ &proof.commitments,
+ proof.commitment_pok,
+ folding_challenge,
+ )
+ .context("Pedersen batch verification failed")?;
+ }
+
+ // Step 4: Compute public input contribution
+ let k_sum = {
+ let mut sum = G1Projective::from(vk.g1_k[0]);
+
+ if !extended_public.is_empty() {
+ let msm_bases = &vk.g1_k[1..1 + extended_public.len()];
+ let msm = G1Projective::msm(msm_bases, &extended_public).map_err(crate::msm_err)?;
+ sum += msm;
+ }
+
+ for c in &proof.commitments {
+ sum += G1Projective::from(*c);
+ }
+
+ sum.into_affine()
+ };
+
+ // Step 5: Pairing check
+ let left = Bn254::multi_pairing([proof.krs, proof.ar, k_sum], [
+ vk.g2_delta_neg,
+ proof.bs,
+ vk.g2_gamma_neg,
+ ]);
+
+ ensure!(
+ left.0 == vk.e_alpha_beta,
+ "pairing check failed: proof is invalid"
+ );
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Integration tests would go here, requiring a full setup → prove → verify
+ // cycle.
+}
diff --git a/provekit/prover/Cargo.toml b/provekit/prover/Cargo.toml
index 82f848326..fd017ee3f 100644
--- a/provekit/prover/Cargo.toml
+++ b/provekit/prover/Cargo.toml
@@ -16,13 +16,18 @@ parallel = ["provekit-common/parallel"]
[dependencies]
# Workspace crates
provekit-common.workspace = true
+provekit-groth16.workspace = true
# Noir language
acir.workspace = true
noirc_abi.workspace = true
# Cryptography and proof systems
+ark-bn254 = { version = "0.5.0", default-features = false, features = ["curve"] }
+ark-ec = { version = "0.5", features = ["parallel"] }
ark-ff.workspace = true
+ark-poly.workspace = true
+ark-serialize.workspace = true
ark-std.workspace = true
whir.workspace = true
@@ -30,15 +35,23 @@ whir.workspace = true
anyhow.workspace = true
num-bigint.workspace = true
postcard.workspace = true
+rayon.workspace = true
+serde.workspace = true
tracing.workspace = true
-# Target-specific dependencies: only on non-WASM targets
+# Target-specific dependencies: only on non-WASM targets.
+# `xz2` and `zstd` wrap C libraries and don't build for wasm32; `bytes` is only
+# used by `pkp_io`, which is itself non-wasm.
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
bn254_blackbox_solver = { workspace = true, optional = true }
nargo = { workspace = true, optional = true }
noir_artifact_cli = { workspace = true, optional = true }
mavros-vm.workspace = true
mavros-artifacts.workspace = true
+bytes.workspace = true
+memmap2 = { workspace = true }
+xz2.workspace = true
+zstd.workspace = true
[lints]
workspace = true
diff --git a/provekit/prover/src/lib.rs b/provekit/prover/src/lib.rs
index 28af714d9..dbdd987eb 100644
--- a/provekit/prover/src/lib.rs
+++ b/provekit/prover/src/lib.rs
@@ -8,8 +8,8 @@ use {
acir::native_types::{Witness, WitnessMap},
anyhow::{Context, Result},
provekit_common::{
- utils::noir_to_native, FieldElement, NoirElement, NoirProof, NoirProver, Prover,
- PublicInputs, TranscriptSponge,
+ utils::noir_to_native, FieldElement, NoirElement, NoirProof, NoirProver, PublicInputs,
+ TranscriptSponge,
},
std::mem::size_of,
tracing::{debug, info_span, instrument},
@@ -30,12 +30,28 @@ pub(crate) mod bigint_mod;
pub(crate) mod ec_arith;
#[cfg(not(target_arch = "wasm32"))]
pub mod input_utils;
+// `pkp_io` depends on `xz2`/`zstd`/`bytes`, none of which build on wasm32.
+#[cfg(not(target_arch = "wasm32"))]
+pub mod pkp_io;
+// Mmap-backed `.pkp` I/O (rapidsnark-style). Same extension as legacy `.pkp`,
+// distinguished by an in-file sentinel; see `pkp_mmap_io` module docs.
+#[cfg(not(target_arch = "wasm32"))]
+pub mod pkp_mmap_io;
+pub mod prover_types;
pub(crate) mod r1cs;
mod whir_r1cs;
mod witness;
// Public re-exports for items used by integration tests and benchmarks.
-pub use {ec_arith::ec_scalar_mul, r1cs::solve_witness_vec};
+#[cfg(not(target_arch = "wasm32"))]
+pub use pkp_io::{deserialize_pkp, read_pkp, serialize_pkp, write_pkp};
+#[cfg(not(target_arch = "wasm32"))]
+pub use pkp_mmap_io::{is_mmap_pkp, read_pkp_mmap, write_pkp_mmap};
+pub use {
+ ec_arith::ec_scalar_mul,
+ prover_types::{Groth16CommitmentInfo, Groth16PkSource, Groth16Prover, Prover},
+ r1cs::solve_witness_vec,
+};
/// `prove` and `prove_with_toml` are native-only (cfg-gated out on wasm32).
/// `prove_with_witness` is available on all targets. `MavrosProver` does not
@@ -82,6 +98,38 @@ fn generate_noir_witness(
.witness)
}
+#[instrument(skip_all)]
+#[cfg(all(feature = "witness-generation", not(target_arch = "wasm32")))]
+fn generate_noir_witness_for_groth16(
+ prover: &mut Groth16Prover,
+ input_map: InputMap,
+) -> Result> {
+ let solver = Bn254BlackBoxSolver::default();
+ let mut output_buffer = Vec::new();
+ let mut foreign_call_executor = DefaultForeignCallBuilder {
+ output: &mut output_buffer,
+ enable_mocks: false,
+ resolver_url: None,
+ root_path: None,
+ package_name: None,
+ }
+ .build();
+
+ let initial_witness = prover.witness_generator.abi().encode(&input_map, None)?;
+
+ let mut witness_stack = nargo::ops::execute_program(
+ &prover.program,
+ initial_witness,
+ &solver,
+ &mut foreign_call_executor,
+ )?;
+
+ Ok(witness_stack
+ .pop()
+ .context("Missing witness results")?
+ .witness)
+}
+
impl Prove for NoirProver {
#[cfg(all(feature = "witness-generation", not(target_arch = "wasm32")))]
#[instrument(skip_all)]
@@ -260,7 +308,7 @@ impl Prove for NoirProver {
.prove_noir(merlin, r1cs, commitments, full_witness, &public_inputs)
.context("While proving R1CS instance")?;
- Ok(NoirProof {
+ Ok(NoirProof::Whir {
public_inputs,
whir_r1cs_proof,
})
@@ -354,7 +402,7 @@ impl Prove for MavrosProver {
)
.context("While proving R1CS instance")?;
- Ok(NoirProof {
+ Ok(NoirProof::Whir {
public_inputs,
whir_r1cs_proof,
})
@@ -380,12 +428,329 @@ impl Prove for MavrosProver {
}
}
+impl Prove for Groth16Prover {
+ #[cfg(all(feature = "witness-generation", not(target_arch = "wasm32")))]
+ #[instrument(skip_all)]
+ fn prove(mut self, input_map: InputMap) -> Result {
+ let witness = generate_noir_witness_for_groth16(&mut self, input_map)?;
+ self.prove_with_witness(witness)
+ }
+
+ #[cfg(all(feature = "witness-generation", not(target_arch = "wasm32")))]
+ #[instrument(skip_all)]
+ fn prove_with_toml(self, prover_toml: impl AsRef) -> Result {
+ let (input_map, _return_value) =
+ read_inputs_from_file(prover_toml.as_ref(), self.witness_generator.abi())?;
+ self.prove(input_map)
+ }
+
+ #[instrument(skip_all)]
+ fn prove_with_witness(
+ self,
+ acir_witness_idx_to_value_map: WitnessMap,
+ ) -> Result {
+ use ark_serialize::CanonicalSerialize;
+
+ // Take ownership of each field so we can drop the large ones the
+ // moment they stop being used.
+ let Groth16Prover {
+ program,
+ r1cs,
+ split_witness_builders,
+ witness_generator,
+ groth16_pk: pk,
+ commitment_info,
+ } = self;
+
+ let mut public_input_indices = program.functions[0].public_inputs().indices();
+ public_input_indices.sort_unstable();
+ let public_inputs = if public_input_indices.is_empty() {
+ PublicInputs::new()
+ } else {
+ let values = public_input_indices
+ .iter()
+ .map(|&idx| {
+ let noir_val = acir_witness_idx_to_value_map
+ .get(&Witness::from(idx))
+ .ok_or_else(|| anyhow::anyhow!("Missing public input at index {idx}"))?;
+ Ok(noir_to_native(*noir_val))
+ })
+ .collect::>>()?;
+ PublicInputs::from_vec(values)
+ };
+
+ // ABI / circuit metadata aren't touched after public-input extraction.
+ // Dropping them shrinks resident memory before witness solving — the
+ // current peak phase.
+ drop(program);
+ drop(witness_generator);
+
+ let num_witnesses = r1cs.num_witnesses();
+
+ let has_commitments = !commitment_info.is_empty();
+
+ // Allocate witness vector
+ let mut witness: Vec