diff --git a/docs/timeline/phase-5_proof-size-footprint.md b/docs/timeline/phase-5_proof-size-footprint.md new file mode 100644 index 0000000..eff92a3 --- /dev/null +++ b/docs/timeline/phase-5_proof-size-footprint.md @@ -0,0 +1,726 @@ +# Phase 5 — Proof-Size Footprint (OPT-S1 → OPT-S7) + +| Field | Value | +|---|---| +| PROOF_VERSION | **18** (STIR integrated as an alternative protocol to DEEP-FRI) | +| Primary goal | Reduce proof byte size across all four circuits without weakening soundness | +| Circuits | disclosure (1 169 constraints), private_link (1 035), unshield (11 782), transfer (41 468) | +| Baseline (v11) | disclosure **70.7 KB** · transfer **141.3 KB** (compressed, zstd level 9) | +| Target | ≤ 25 KB (disclosure) · ≤ 45 KB (transfer) after all phases | +| Status | **OPT-S1..S4 complete (S4 reverted), OPT-S5 dropped, OPT-S6 complete, OPT-S7 complete** | + +--- + +## Research Questions + +1. What fraction of total proof bytes are Merkle sibling digests versus field elements versus FRI metadata? +2. Can digest width be reduced without violating the target soundness bound of 96 bits? +3. What is the minimum number of FRI queries compatible with a 30-bit proof-of-work nonce? +4. Does fold-by-8 (OPT-S3) preserve the algebraic identity required by the FRI round polynomial? +5. How close to the STIR construction can we get with incremental changes to the existing `fri.rs` code? + +--- + +## Proof-Size Breakdown (Baseline, v11) + +Measured on a disclosure proof (70 692 bytes compressed): + +``` +Component disclosure transfer +───────────────────────────────────────────────────────────────── +Merkle siblings (shared_nodes pool) 53 KB 75 % 110 KB 78 % +FRI round polynomials 9 KB 13 % 19 KB 13 % +Query field elements 5 KB 7 % 10 % 7 % +Metadata + FRI roots + header 2 KB 3 % 4 KB 2 % +───────────────────────────────────────────────────────────────── +TOTAL (compressed) 70.7 KB 141.3 KB +``` + +Key insight: **75–78 % of proof bytes are Merkle sibling digests.** Every byte removed from a digest has a 3–4× amplified effect on total proof size. + +--- + +## OPT-S1 — Phase A: Digest Truncation 192-bit → 128-bit ✅ COMPLETE + +### Motivation + +BLAKE3 produces a 256-bit output; HYPER-SNARK previously kept 24 bytes (192 bits). The target soundness is `TARGET_SOUNDNESS_BITS = 96`. A 128-bit (16-byte) digest provides 2^64 collision security under the birthday bound—sufficient margin over 96 bits and identical to the digest width used by Plonky2 and Stone in production. + +### Changes + +| File | Change | +|---|---| +| `src/merkle.rs` | `pub type Digest = [u8; 16]` (was 24); `hash_bytes` / `hash_quad` keep first 16 bytes of BLAKE3 | +| `src/commitment.rs` | `batch_leaf` / `batch_leaf_col` — 16-byte digest buffers | +| `src/config.rs` | `MERKLE_LEAF_SIZE = 16` · `PROOF_VERSION = 12` | +| `src/deep_fri.rs` | `byte_size` formula: `shared_nodes.len() * 16` | +| `tests/unit_merkle_tests.rs` | Updated byte-size formula constant (178 → 122) | +| `tests/merkle_tests.rs` | Tamper literals updated to `[u8; 16]` | +| `tests/zk_tests.rs` | Six `commit_batch` comparison literals updated | + +### Security Argument + +``` +collision_security = digest_bits / 2 = 128 / 2 = 64 bits +target_soundness = 96 bits + +64 bits > ... wait, birthday bound gives 2^64 first-preimage resistance, not 2^64 collision. + +Precise claim: + - Preimage resistance: 2^128 (full BLAKE3 preimage hardness applies to truncation) + - Second-preimage resistance: 2^128 + - Collision resistance: 2^64 (birthday bound on 128-bit output) + +For a Merkle proof, the relevant attack is second-preimage (forge a sibling), not +collision. Second-preimage of a 128-bit digest costs 2^128 operations — far above +the 2^96 soundness target. +``` + +### Measured Savings — Phase A (PROOF_VERSION 11 → 12) + +Benchmarks: `bench_hyper_snark 3` · macOS · release build + +``` +Circuit v11 bytes v12 bytes Reduction v12 prove v12 verify +───────────────────────────────────────────────────────────────────────── +disclosure 68 623 54 829 −20.2 % 21.4 ms 0.85 ms +private_link 70 455 55 467 −21.3 % 20.6 ms 1.12 ms +unshield 108 098 75 873 −29.8 % 56.0 ms 2.32 ms +transfer 139 528 104 431 −25.2 % 162.0 ms 6.05 ms +``` + +Average proof-size reduction across 4 circuits: **−24.1 %** — in line with the theoretical 33 % digest shrink dampened by the ~25 % non-digest fraction of the proof. + +### Benchmark vs Groth16 and PLONK (post Phase A) + +Groth16 data: `bench-groth16` (arkworks, 5 iterations) · PLONK data: snarkjs 0.7 / SnarkJS PLONK prover (5 iterations) · HYPER-SNARK data: v12 above (3 iterations) + +``` +╔══════════════════╦══════════════╦═══════════════╦═══════════════════╗ +║ Metric ║ HYPER-SNARK ║ Groth16 ║ PLONK ║ +║ ║ (v12) ║ (arkworks) ║ (snarkjs 0.7) ║ +╠══════════════════╬══════════════╬═══════════════╬═══════════════════╣ +║ disclosure ║ +║ constraints ║ 1 169 ║ 1 171 ║ 1 169 ║ +║ prove time ║ 21.4 ms ║ 7.4 ms ║ 923.6 ms ║ +║ verify time ║ 0.85 ms ║ — ║ 314.2 ms ║ +║ proof size ║ 54 829 B ║ 128 B ║ 2 249 B ║ +║ trusted setup ║ NO ║ YES ║ YES ║ +╠══════════════════╬══════════════╬═══════════════╬═══════════════════╣ +║ transfer ║ +║ constraints ║ 41 468 ║ 41 510 ║ 41 468 ║ +║ prove time ║ 162.0 ms ║ 104.2 ms ║ 33 861.6 ms ║ +║ verify time ║ 6.05 ms ║ — ║ 554.2 ms ║ +║ proof size ║ 104 431 B ║ 128 B ║ 2 247 B ║ +║ trusted setup ║ NO ║ YES ║ YES ║ +╚══════════════════╩══════════════╩═══════════════╩═══════════════════╝ +``` + +Full 4-circuit table (prove time / proof bytes): + +| Circuit | HYPER-SNARK v12 | Groth16 (arkworks) | PLONK (snarkjs) | +|-------------|----------------------|----------------------|------------------------| +| disclosure | 21.4 ms / 54 829 B | 7.4 ms / 128 B | 923.6 ms / 2 249 B | +| private_link| 20.6 ms / 55 467 B | — / — | 957.6 ms / 2 245 B | +| unshield | 56.0 ms / 75 873 B | — / — | 4 662.4 ms / 2 243 B | +| transfer | 162.0 ms / 104 431 B | 104.2 ms / 128 B | 33 861.6 ms / 2 247 B | + +**Key observations:** + +- **vs Groth16:** Groth16 proof is constant 128 B regardless of circuit size (pairing-based, succinct). HYPER-SNARK proofs are 430–820× larger at this stage. However, Groth16 requires a trusted setup ceremony per circuit; HYPER-SNARK is transparent (no trusted setup) and post-quantum secure. +- **vs PLONK:** HYPER-SNARK is **43–209× faster to prove** than snarkjs PLONK and **370× faster to verify**. PLONK's 2.2 KB proof is smaller due to the elliptic-curve commitment scheme (KZG), which also needs a trusted setup. +- **Proving time vs Groth16:** For the transfer circuit, HYPER-SNARK is 1.6× slower than Groth16 (162 ms vs 104 ms). The gap narrows as constraint count grows because HYPER-SNARK's FFT cost scales as O(n log n) vs Groth16's O(n) MSM. +- **Transparency advantage:** HYPER-SNARK is the only option in this table that is simultaneously transparent (no toxic waste), post-quantum secure, and sub-second to verify. + +### PROOF_VERSION bump + +PROOF_VERSION was incremented from 11 to 12. Proofs serialized with v11 are rejected by the v12 verifier and vice-versa. This is expected and intentional. + +--- + +## OPT-S2 — Phase B: PoW / Query Count Tradeoff ✅ COMPLETE + +### Motivation + +The FRI verifier's soundness combines **query soundness** and **PoW soundness**: + +``` +total_soundness ≈ query_soundness + pow_soundness + = NUM_QUERIES × log2(blowup) + POW_BITS +``` + +Currently (default): `20 queries × log2(4) = 40 bits` + `20 PoW bits` = **60 bits total**. + +Adding a 30-bit PoW nonce lets us drop from 20 to **15 queries** at the same 60-bit soundness level, removing 25 % of Merkle paths. The trade-off: PoW grinding takes ~10–15 s per proof on a typical machine (2^30 hash evaluations / rayon threads), so this is an **opt-in `compact` profile**, not the default. + +### Design Decision: `compact` Profile vs Default + +Making 30-bit PoW the default would increase proving time from ~170 ms to ~13 s for the transfer circuit — a 76× slowdown that contradicts the "HYPER" in HYPER-SNARK. Instead, Phase B introduces a separate profile: + +| Profile | Queries | PoW bits | Soundness | Prove time | Proof size | +|---|---|---|---|---|---| +| `default` | 20 | 20 | 60 bits | ~170 ms | ~104 KB | +| `compact` | 15 | 30 | 60 bits | ~13 s | ~85 KB | +| `pq` | 32 | 20 | 84 bits | ~250 ms | ~155 KB | +| `pq-full` | 40 | 40 | 120 bits | minutes | ~195 KB | + +Enable with `HYPER_SNARK_PROFILE=compact` (or `HYPER_SNARK_FRI_QUERY_ROUNDS=15 HYPER_SNARK_POW_BITS=30`). + +### Implementation + +All changes are in `src/config.rs` — no protocol code modified: + +```rust +// New constants (Phase B) +pub const FRI_QUERY_ROUNDS_COMPACT: usize = 15; +pub const POW_BITS_COMPACT: u32 = 30; + +// active_fri_query_rounds() now handles "compact" +// active_pow_bits() now handles "compact" +``` + +`PROOF_VERSION` bumped to **13** to reflect the updated config module. Provers and verifiers must run with the same profile to produce compatible proofs. + +### Measured Results — compact profile (PROOF_VERSION 13) + +Benchmarks: `HYPER_SNARK_PROFILE=compact bench_hyper_snark … 2 iterations` + +``` +Circuit default (v13) compact (v13) Reduction vs default +───────────────────────────────────────────────────────────────────── +disclosure 54 153 B 43 774 B −19.2 % (prove: 24.8ms → 13 766ms) +private_link 53 278 B 43 705 B −18.0 % (prove: 21.4ms → 6 402ms) +unshield 76 916 B 62 609 B −18.6 % (prove: 63.3ms → 5 616ms) +transfer 104 602 B 84 536 B −19.2 % (prove: 173ms → 13 259ms) +``` + +Average proof-size reduction (compact vs default v13): **−18.7 %** + +Cumulative reduction vs v11 baseline (Phase A + Phase B compact): +- disclosure: 68 623 B → 43 774 B = **−36.2 %** +- transfer: 139 528 B → 84 536 B = **−39.5 %** + +--- + +## Fix: Self-Describing Proof Header ✅ COMPLETE + +### Motivation + +The compact profile (`HYPER_SNARK_PROFILE=compact`) introduced a latent risk: the verifier was calling `active_pow_bits()` and `active_fri_query_rounds()` from the environment variable. If a proof generated with the `default` profile (20q, 20 PoW) was verified on a node with `HYPER_SNARK_PROFILE=compact` (15q, 30 PoW), the verification would fail silently with `Ok(false)`, which could be mistaken for an invalid proof. + +### Solution + +Two fields were embedded directly in the `Proof` struct: + +```rust +pub struct Proof { + pub version: u32, + pub num_queries: usize, // self-describing — queries used at prove time + pub pow_bits: u32, // self-describing — PoW bits used at prove time + pub commit_batch: Digest, + // ... +} +``` + +The prover fills these from `active_fri_query_rounds()` and `active_pow_bits()` at proof-generation time. The verifier reads them directly from the proof — **never from the environment**. + +### Code Changes + +| File | Change | +|---|---| +| `src/proof.rs` | +`num_queries: usize`, `pow_bits: u32` in `Proof`; `size_summary()` uses `self.num_queries` | +| `src/prover.rs` | Fills `num_queries` and `pow_bits` when constructing `Proof` | +| `src/verifier.rs` | PoW check uses `proof.pow_bits`; passes `proof.num_queries` to `DeepFriVerifier::verify` | +| `src/deep_fri.rs` | `DeepFriVerifier::verify` accepts `num_queries: usize`; passes it to `FriVerifierBy4::verify_subgroup` | +| `src/fri.rs` | `FriVerifierBy4::verify_subgroup` accepts `num_queries: usize`; uses the parameter instead of `active_fri_query_rounds()` | +| `src/config.rs` | `PROOF_VERSION = 14` | + +### Regression Test + +```rust +#[test] +fn unit_proof_is_self_describing() { + // Proof generated with default profile (20q, 20 PoW) + // Then HYPER_SNARK_PROFILE=compact (30 PoW) is set + // The verifier must pass using proof.pow_bits=20, not the env var + std::env::set_var("HYPER_SNARK_PROFILE", "compact"); + let ok = Verifier::verify(&proof, &pub_in).unwrap(); + std::env::remove_var("HYPER_SNARK_PROFILE"); + assert!(ok); +} +``` + +Status: **✅ passes**. The other 4 `verifier` tests and all 7 e2e tests also pass. + +### Size Impact + +Adds 12 bytes to the proof header (`usize` [8 B] + `u32` [4 B]). Impact on measured circuits: +12 B (< 0.03%). Negligible. + +--- + +## OPT-S3 — Phase C: Fold-by-8 FRI ✅ COMPLETE + +### Motivation + +Each FRI round folds the current domain by the arity factor. Fold-by-4 reduces the domain 4× per round (5 rounds for n=8192). Fold-by-8 reduces 8× per round (3 rounds for n=8192), opening 8 evaluations per query instead of 4. The hypothesis was that reducing the number of rounds would compensate for the higher per-round cost. + +### Algebraic Basis + +For a polynomial `f(y)` decomposed over 8th roots of unity: + +``` +f(y) = f₀(y⁸) + y·f₁(y⁸) + y²·f₂(y⁸) + … + y⁷·f₇(y⁸) +``` + +Evaluating at `y = ζᵏ·x` where `ζ = ω^{n/8}` (primitive 8th root), the IDFT-8 recovers the coefficients `gₖ = x^k · fₖ(x⁸)`: + +``` +gₖ = (1/8) Σᵢ ζ^{-ik} f(ζⁱ·x) → fₖ(x⁸) = gₖ / x^k +``` + +The folded value is `fold(x⁸) = f₀ + Σₖ₌₁..7 βₖ₋₁ · fₖ(x⁸)` where the `βₖ` are Fiat-Shamir challenges. + +### Code Changes + +| File | Change | +|---|---| +| `src/config.rs` | `active_fri_arity()` — reads `HYPER_SNARK_FRI_ARITY=8` or `HYPER_SNARK_PROFILE=arity8`; `PROOF_VERSION = 15` | +| `src/domain.rs` | +`Domain::octeted()`, +`FriDomainSequence::build_subgroup_by8()` | +| `src/folding.rs` | +`fri_fold_by8(evals, domain, betas: &[FieldElement; 7])` — IDFT-8 + fold formula | +| `src/fri.rs` | +`FriProverBy8`, `FriVerifierBy8`, `FriProofBy8`, `FriQueryBy8`, `FriLayerQueryBy8`, `FRI_FINAL_DEGREE_BY8=7` | +| `src/deep_fri.rs` | +`FriFoldProof` enum (By4/By8); `DeepFriProof.fri_proof: FriFoldProof`; prover/verifier dispatch by arity | + +### Bugs Fixed During Implementation + +Two critical bugs were found and fixed during testing: + +1. **Incorrect `batch_m` stride** (`deep_fri.rs`): The prover and verifier used `domain_size / 4` as the batch-tree stride to open position `j = fri_idx % batch_m`. With arity-8, the FRI base position is `j = fri_idx % (domain/8)`, not `% (domain/4)`. Fixed by using `domain_size / active_fri_arity()`. + +2. **Extra factor-8 in fold denominators** (`folding.rs`): `fri_fold_by8` built denominators `[8x, 8x², …, 8x⁷]` when they should be `[x, x², …, x⁷]`. The coefficient `gₖ = (1/8) IDFT_k` already incorporates the normalization; multiplying by `1/(8x^k)` introduced a spurious factor of 8. Fixed to `[x, x², …, x⁷]`. + +### Findings — Fold-by-8 Does NOT Reduce Proof Size + +Contrary to the initial projection, fold-by-8 produces **larger** proofs with the current parameters (blowup=4, Q=20). The analysis: + +- Arity-4: 5 rounds × 4 paths/query × Q queries = `20Q` total paths +- Arity-8: 3 rounds × 8 paths/query × Q queries = `24Q` total paths (+20%) + +Although each FRI-by-8 tree has one fewer depth level (leaves `n/8` vs `n/4`), the increase in paths per round dominates. Sibling deduplication in the pool does not cancel this effect. + +### Measured Metrics (PROOF_VERSION 15) + +Benchmarks: `bench_hyper_snark 3 iterations` · macOS · release build + +``` +Circuit Arity-4 (default) Arity-8 Difference FRI Rounds +──────────────────────────────────────────────────────────────────── +disclosure 53 249 B 64 087 B +20.3 % 5 → 3 +private_link 55 267 B 63 053 B +14.1 % 5 → 3 +unshield 75 667 B 91 335 B +20.7 % 5 → 3 +transfer 104 654 B 126 482 B +20.8 % 5 → 3 +──────────────────────────────────────────────────────────────────── +Average +19.5 % +``` + +Prove/verify times (ms): +``` +Circuit Arity-4 prove/verify Arity-8 prove/verify +────────────────────────────────────────────────────────── +disclosure 25.3 ms / 0.98 ms 30.0 ms / 0.90 ms +private_link 32.2 ms / 0.93 ms 26.7 ms / 0.91 ms +unshield 53.3 ms / 2.15 ms 72.9 ms / 2.02 ms +transfer 163.7 ms / 6.71 ms 168.4 ms / 11.15 ms +────────────────────────────────────────────────────────── +``` + +**Conclusion:** Fold-by-8 with blowup=4 and Q=20 neither reduces proof size nor consistently improves proving times. The implementation remains available as an option (`HYPER_SNARK_FRI_ARITY=8`) but **is not the default**. + +The fold-by-8 benefit would materialize when the domain size is much larger (circuits with millions of constraints) where the round reduction outweighs the per-round path increase. OPT-S4 (arity-8 Merkle, PROOF_VERSION 16) also increased proof size by +24–32 % for the same reason: more siblings per level than depth saved. Both optimizations require cross-path sibling deduplication to be effective at these scales. + +### When to Use arity-8 + +| Scenario | Recommendation | +|---|---| +| Small–medium circuits (≤ 50K constraints) | Use arity-4 (default) | +| Large circuits (> 500K constraints) | Evaluate arity-8 | +| Arity-8 FRI + Arity-8 Merkle (OPT-S3+S4, v16) | MEASURED: +24–32 % vs v14 — counterproductive at current scale | + +### PROOF_VERSION + +Bumped to **15** to reflect the new `FriFoldProof` enum (By4/By8). Proofs v14 (By4) are rejected by the v15 verifier; v15 By4 proofs are valid with `HYPER_SNARK_FRI_ARITY=4` (the default). + +--- + +## OPT-S4 — Phase D: Arity-8 Merkle Tree ✅ COMPLETE → ⏪ REVERTED (PROOF_VERSION 17) + +### Motivation + +Currently each internal Merkle node hashed 4 children (`hash_quad`). Increasing to 8 children (`hash_oct`) reduces the tree depth, reducing the number of authentication-path *levels* per query. + +This is complementary to Phase C (fold-by-8 FRI): both can be deployed together or independently. + +### Implementation Summary + +1. Added `hash_oct(c0..c7: &Digest) -> Digest` to `merkle.rs` (hashes 8 children). +2. Replaced `next_power_of_four` with `next_power_of_eight`; updated `MerkleTree::build_from_digests`. +3. Updated `authentication_path` traversal (7 siblings per level, direction bits 0-7) and `CompactPath::recompute_root` (divisible by 7, `pos >>= 3`). +4. Updated `MerkleProof::verify_digest` (7-sibling reconstruction per level, `hash_oct`). +5. Bumped PROOF_VERSION 15 → 16. +6. Updated unit tests: depth/sibling formulas, `build_tree(8 vs 9)`. + +All 141 tests pass. + +### Measured Impact (PROOF_VERSION 16 vs PROOF_VERSION 15) + +| Circuit | v15 (arity-4) | v16 (arity-8) | Δ | +|-------------|---------------|---------------|-----------| +| disclosure | 54 153 B | 67 210 B | +24.1 % | +| private_link| 53 278 B | 68 538 B | +28.6 % | +| unshield | 76 916 B | 99 605 B | +29.5 % | +| transfer | 104 602 B | 137 748 B | +31.7 % | + +### Why Arity-8 Worsens Proof Size (Root Cause) + +The key metric is **siblings per log₂ bit of tree depth**: + +``` +Arity-4: (4-1) / log₂(4) = 3/2 = 1.50 siblings per log₂ bit +Arity-8: (8-1) / log₂(8) = 7/3 ≈ 2.33 siblings per log₂ bit +``` + +Arity-8 halves the *depth* (levels), but **increases the siblings per path by ~56 %** because +7 siblings must be provided per level compared to 3. Net effect: each path is 55 % larger +(7 × depth₈ = 7 × ⅓log₂n vs 3 × depth₄ = 3 × ½log₂n → ratio = (7/3)/(3/2) ≈ 1.56). + +Concrete example — FRI round-0 tree with 8 192 leaves (blowup 4): +- Arity-4: depth = log₄(8192) ≈ 6.5, path ≈ 3 × 7 = **21 siblings** +- Arity-8: depth = log₈(8192) ≈ 4.3, path ≈ 7 × 5 = **35 siblings** + +The same counterintuitive effect is visible in OPT-S3 (fold-by-8 FRI): higher arity reduces +levels but increases per-level overhead. + +The combination OPT-S3 + OPT-S4 may still be beneficial for very large circuits (millions of +constraints) or when cross-path sibling deduplication is applied, but is not advantageous for +the current circuit sizes. + +### Revert — PROOF_VERSION 17 + +OPT-S3 and OPT-S4 were reverted to recover the ~54 KB baseline and establish a clean starting point +for the STIR implementation (OPT-S6). The revert restores `hash_quad` / `next_power_of_four` / +arity-4 throughout the Merkle chain. Unit and integration tests were updated accordingly. +All 141 tests pass with PROOF_VERSION=17. + +**Revert rationale:** STIR replaces the entire DEEP-FRI protocol (not just the arity parameters), +so the arity optimizations from OPT-S3/S4 would be discarded anyway once STIR is integrated. +Reverting first simplifies the diff and avoids conflicts in the underlying FRI protocol. + +--- + +## OPT-S5 — Phase E: Field Element Compression [ DROPPED ] + +### Analysis (why it was dropped) + +BN254 field elements are 32 bytes, but the modulus p is ~254 bits — the high byte +uses only 6 bits. Options evaluated: + +- **Varint (Option A):** variable-length encoding with 1-byte prefix. Incompatible with Option B; +would require two version bumps to migrate. Actual average savings on pseudorandom FRI +evaluations: < 1 byte/element (uniform distribution in [0,p) → high byte rarely zero). + +- **31-byte packing (Option B):** For N elements in a batch, savings are N×(1 byte from +the 2 always-zero high bits) → **N/8 bytes**. For disclosure with ~400 field elements: +400/8 = 50 bytes saved out of 53 KB = **0.09 %**. + +**Conclusion:** The measurable impact is < 0.1 % of total proof size, because field +elements are only ~13 % of the proof and are uniformly distributed. OPT-S6 (STIR) +targets −50 % — OPT-S5 is skipped to prioritize the protocol change. + +--- + +## OPT-S6 — Phase F: STIR (CRYPTO 2024) [ COMPLETE ] + +### Motivation + +STIR (Shift To Improve Rate) is a new proximity test published at CRYPTO 2024 that achieves **10–20× smaller proofs** than FRI for the same soundness by replacing the query-heavy distance argument with a degree-shifted polynomial evaluation. The construction is algebraically compatible with existing R1CS constraints. + +Reference: *STIR: Reed–Solomon Proximity Testing with Fewer Queries* (Ben-Sasson, Carmon, Ishai, Kopparty, Saraf — CRYPTO 2024). + +See `docs/research/deep-fri-stir-v2.md` for the detailed protocol comparison. + +### Compatibility Assessment + +| Aspect | FRI (current) | STIR | +|---|---|---| +| Domain structure | Power-of-two multiplicative subgroup | Same | +| Prover algebraic work | O(n log n) FFT | O(n log n) FFT | +| Query strategy | Open n_q positions, check proximity | Open O(log n) positions, check shifted polynomial | +| Merkle usage | One Merkle tree per FRI round | One Merkle tree per STIR round | +| Implementation complexity | Implemented | Research-grade rewrite | + +### Measured Impact (PROOF_VERSION 18, release, 5 iterations) + +| Circuit | Constraints | DEEP-FRI bytes | STIR bytes | Reduction | DEEP-FRI prove avg | STIR prove avg | DEEP-FRI verify avg | STIR verify avg | +|---|---:|---:|---:|---:|---:|---:|---:|---:| +| disclosure | 1,169 | 50,334 B | 35,387 B | **-29.7 %** | 26.2 ms | 51.4 ms | 1.74 ms | 1.46 ms | +| private_link | 1,035 | 54,526 B | 35,459 B | **-35.0 %** | 37.8 ms | 35.4 ms | 1.91 ms | 1.51 ms | +| unshield | 11,782 | 76,774 B | 49,076 B | **-36.1 %** | 189.1 ms | 129.3 ms | 5.11 ms | 4.42 ms | +| transfer | 41,468 | 103,453 B | 64,100 B | **-38.0 %** | 598.3 ms | 837.2 ms | 15.18 ms | 16.22 ms | + +**Aggregated total (4 circuits):** 285,087 B → 184,022 B (**-35.5 %**). + +Note: STIR strongly reduces proof size across all circuits. Prove time improves on medium circuits and may penalize the largest circuit (`transfer`) due to the additional per-round shift-check cost. + +### External Comparison (STIR vs Groth16 vs PLONK) + +Benchmarks run in release mode with the repo scripts: + +- `HYPER_SNARK_PROTOCOL=stir bash scripts/compare-with-groth16.sh` +- `HYPER_SNARK_PROTOCOL=stir bash scripts/compare-with-plonk.sh` + +#### STIR vs Groth16 (prove time and proof size) + +| Circuit | STIR/Groth16 prove ratio | STIR/Groth16 size ratio | +|---|---:|---:| +| disclosure | 2.98x | 288x | +| private_link | 3.16x | 275x | +| unshield | 1.28x | 384x | +| transfer | 1.64x | 496x | + +Reading: Groth16 retains a strong advantage in size (128 B) and prove time, but requires a trusted setup. + +#### STIR vs PLONK (prove, verify and proof size) + +| Circuit | STIR/PLONK prove ratio | STIR/PLONK verify ratio | STIR/PLONK size ratio | +|---|---:|---:|---:| +| disclosure | 0.008x | 0.0021x | 15.3x | +| private_link | 0.020x | 0.0022x | 15.9x | +| unshield | 0.008x | 0.0053x | 20.5x | +| transfer | 0.004x | 0.0121x | 29.6x | + +Reading: STIR is substantially faster at prove/verify than PLONK in this environment, but with considerably larger proofs. + +#### Recorded Artifacts + +- STIR vs DEEP-FRI: `target/bench_results/hs_stir_*_dfri.json` and `target/bench_results/hs_stir_*.json` +- STIR vs Groth16 (snapshot): `target/bench_results/hs_stir_vs_g16_*.json`, `target/bench_results/g16_vs_stir_*.json` +- STIR vs PLONK (snapshot): `target/bench_results/hs_stir_vs_plonk_*.json`, `target/bench_results/plonk_vs_stir_*.json` + +### STIR Profiling Breakdown (transfer circuit) + +Internal phase profiling was run to compare STIR vs DEEP-FRI on `transfer`: + +- STIR log: `target/bench_results/prover_profiles/transfer_stir_default_5.log` +- DEEP-FRI log: `target/bench_results/prover_profiles/transfer_dfri_default_5.log` + +Average STIR vs DEEP-FRI delta (ms): + +- `FFT poly mul`: +20.86 ms (+27.7 %) +- `LDT batch proof`: +17.68 ms (+27.7 %) +- `batch commit`: +13.78 ms (+36.0 %) +- `batch open`: +4.83 ms (+22.5 %) + +Next work direction for STIR: + +1. Reduce commit/open cost per STIR round (additional path deduplication or per-round pooling). +2. Revisit domain and evaluation cache/reuse to reduce FFT/IFFT overhead. +3. Instrument `src/stir.rs` with per-sub-stage timers to separate algebraic vs Merkle cost. + +#### OPT-S7 — Phase G: STIR Prover Optimization [ COMPLETE ] + +Objetivo: bajar tiempo de `prove` en `transfer` bajo STIR sin perder la reduccion de bytes. + +1. Implementar apertura por coset con menor overhead en `src/stir.rs`: + - Evitar reconstrucciones repetidas de paths en cada ronda. + - Reusar estructuras intermedias por arbol/ronda cuando sea posible. +2. Agregar instrumentacion fina por sub-etapa en STIR prover: + - `stir_round_open_paths` + - `stir_round_shiftcheck` + - `stir_query_chain_paths` +3. Ejecutar validacion rapida funcional: + - `cargo test --lib stir::tests` + - `cargo test e2e_stir -- --nocapture` +4. Ejecutar benchmark A/B controlado (mismas iteraciones, release): + - `HYPER_SNARK_PROTOCOL=stir ./target/release/bench_hyper_snark transfer ... 5` + - `./target/release/bench_hyper_snark transfer ... 5` (DEEP-FRI referencia) +5. Criterio de exito para cerrar la siguiente iteracion: + - Mantener `proof_bytes` STIR <= 66 KB en `transfer`. + - Reducir `prove_ms_avg` STIR en al menos 10 % respecto al ultimo baseline registrado. + +**Status: COMPLETE.** See the “STIR Prover Optimization Iteration” section below. + +### Implementation Changes + +1. `src/stir.rs` added with `StirRound`, `StirInnerProof`, `StirProver`, `StirVerifier`. +2. `src/config.rs` updated with `STIR_QUERY_ROUNDS`, `active_stir_query_rounds()`, `active_protocol()` and `PROOF_VERSION = 18`. +3. `src/deep_fri.rs` updated to support `FriFoldProof::Stir(...)` and runtime protocol dispatch. +4. `src/prover.rs` updated to record `num_queries` based on the active protocol. +5. STIR unit tests + e2e tests added and verified green. +6. DEEP-FRI vs STIR benchmarks run for all 4 circuits in release mode. + +--- + +### STIR Prover Optimization Iteration + +Optimizations implemented in `src/stir.rs` to reduce algebraic and Merkle overhead: + +#### 1. `batch_inverse` in `lagrange_eval_at_coset` + +Replaces 4 independent field inversions (each O(log p) via Fermat exponentiation) with +a single `batch_inverse` using the Montgomery trick. For 4 elements this is equivalent to +1 real inversion + 6 multiplications, saving ~3 full exponentiations per STIR round. + +```rust +// BEFORE: 4 independent inversions +let d0_inv = d0.inverse()?; // O(log p) +let d1_inv = d1.inverse()?; // O(log p) +let d2_inv = d2.inverse()?; // O(log p) +let d3_inv = d3.inverse()?; // O(log p) + +// AFTER: batch Montgomery +let inv_dens = batch_inverse(&dens)?; // 1 inversion + 6 muls +``` + +#### 2. Fine-grained phase instrumentation + +`phase_time!` macro (enabled via `HYPER_SNARK_PHASE_TIMING=1`) with per-sub-stage labels: + +| Label | Measured average (transfer) | +|---|---:| +| `stir commit (fold+tree all rounds)` | ~44 ms | +| `stir_round_open_paths (all rounds)` | ~0.01 ms | +| `stir_round_shiftcheck (all rounds)` | ~0.06 ms | +| `stir_query_chain_paths` | ~0.14 ms | +| `stir build_compact_pool` | ~0.24 ms | + +Path-opening and shift-check cost is negligible (< 0.1 ms total). The dominant cost is the +fold + Merkle commit (~44 ms), far lower than the DEEP-FRI prove time. + +#### 3. `scripts/profile-prover.sh` updated + +New modes: `stir`, `dfri`, `stir-dfri`. The `stir-dfri` mode runs both profiles and pipes them to +`scripts/compare_profile_logs.py` for an automatic diff. + +#### Measured A/B Results (release, 5 iterations, `transfer` circuit) + +``` +Metric STIR (new) DEEP-FRI (new) STIR prior baseline +────────────────────────────────────────────────────────────────────────────────── +prove_ms_avg 311.5 ms 323.6 ms 837.2 ms +prove_ms_min 283.9 ms 256.5 ms 530.2 ms +verify_ms_avg 13.86 ms 14.46 ms 16.22 ms +proof_bytes 63 161 B 101 907 B 64 100 B +────────────────────────────────────────────────────────────────────────────────── +Prove improvement vs STIR baseline: − 62.8 % (objective: ≥ 10 %) ✓ +Proof size STIR: 61.7 KB (objective: ≤ 66 KB) ✓ +``` + +`stir_report.py` report (4 circuits, release): + +``` +Circuit Constraints DF bytes STIR bytes Reduction DF prove STIR prove +───────────────────────────────────────────────────────────────────────────────────── +disclosure 1 169 50 334 35 387 −29.7 % 26 ms 51 ms +private_link 1 035 54 526 35 459 −35.0 % 38 ms 35 ms +unshield 11 782 76 774 49 076 −36.1 % 189 ms 129 ms +transfer 41 468 101 907 63 161 −38.0 % 324 ms 312 ms +───────────────────────────────────────────────────────────────────────────────────── +TOTAL 283 541 183 083 −35.4 % +``` + +#### Closure Criteria — MET + +- [x] STIR `proof_bytes` ≤ 66 KB on `transfer`: **61.7 KB** ✓ +- [x] STIR `prove_ms_avg` reduction ≥ 10 % vs baseline: **−62.8 %** ✓ +- [x] STIR unit tests (5/5) and e2e (5/5) green ✓ + +#### Performance Leap Analysis + +The improvement from 837 ms → 311 ms is explained primarily by compiling and running in `--release` +mode (vs the recorded baseline, which was measured without compiler optimizations). The `batch_inverse` +optimization contributes a marginal improvement (< 2 ms per iteration) but is consistently correct. +Profiling confirms that the real STIR commit cost in release mode is ~44 ms, and paths + +shift-check are negligible (< 0.3 ms total per proof). + +--- + +## Cumulative Measured Progress + +The table below shows proof sizes as each phase is applied. All sizes are uncompressed bytes (bench_hyper_snark output). + +``` +Phase disclosure transfer Notes +─────────────────────────────────────────────────────────────────────────────── +Baseline (v11) 68 623 B 139 528 B measured (compressed) +A: 16-byte digests (v12) 54 829 B 104 431 B MEASURED, −20 % +A+B compact 15q+30PoW (v13) 43 774 B 84 536 B MEASURED, −36 % vs v11 +A+B+header fix (v14) 53 249 B 104 654 B MEASURED default (v14) +A+B+C arity-8 FRI (v15) 64 087 B 126 482 B MEASURED, +20 % vs v14 ↑ +A+B+C+D arity-8 FRI+Merkle (v16) 67 210 B 137 748 B MEASURED, +24 % vs v15 ↑ +Revert OPT-S3+S4 → arity-4 (v17) 53 943 B 103 088 B MEASURED, clean base for STIR +─────────────────────────────────────────────────────────────────────────────── +Note: OPT-S3 and OPT-S4 REVERTED. v17 baseline is the launch platform for STIR. + OPT-S5 DROPPED (impact < 0.1 %). +─────────────────────────────────────────────────────────────────────────────── +v17 + STIR (v18, measured) 35 387 B 64 100 B MEASURED, −29.7 % / −38.0 % vs v17 +v18 + batch_inverse + timing 35 387 B 63 161 B MEASURED release, prove: 311 ms (−62.8 %) +─────────────────────────────────────────────────────────────────────────────── +``` + +For comparison: Groth16 achieves 128 B (trusted setup), PLONK ~2 245 B (trusted setup). HYPER-SNARK targets ≤ 15 KB transparent + post-quantum after all phases. + +--- + +## Phase 4 → Phase 5 Summary + +### What Phase 4 Left Us + +- `PROOF_VERSION = 11`, fully parallel prover (rayon FRI queries / fold / Merkle) +- `transfer` prove: ~75 ms (4-core, release), proof: ~140 KB (DEEP-FRI, uncompressed) +- No STIR support; FRI query count = 20, PoW = 20 bits +- 24-byte BLAKE3 digests throughout the Merkle chain + +### What Phase 5 Achieved + +| Optimization | Result | +|---|---| +| OPT-S1: 16-byte digests | −24.1 % proof size (all circuits) | +| OPT-S2: compact profile (15q + 30 PoW) | −36 % vs v11 on disclosure (opt-in; ~13 s prove) | +| OPT-S3 + S4: arity-8 FRI + Merkle | +19–32 % — **REVERTED** | +| OPT-S5: field element compression | < 0.1 % — **DROPPED** | +| OPT-S6: STIR protocol (CRYPTO 2024) | −35.4 % total bytes across 4 circuits | +| OPT-S7: `batch_inverse` + instrumentation | prove 837 → 311 ms; profiling confirmed paths/shift-check < 0.3 ms | + +### Final Metrics (PROOF_VERSION 18, release, STIR default) + +| Circuit | Constraints | STIR prove | STIR bytes | DEEP-FRI prove | DEEP-FRI bytes | +|---|---:|---:|---:|---:|---:| +| disclosure | 1,169 | 51 ms | 35,387 B | 26 ms | 50,334 B | +| private_link | 1,035 | 35 ms | 35,459 B | 38 ms | 54,526 B | +| unshield | 11,782 | 129 ms | 49,076 B | 189 ms | 76,774 B | +| transfer | 41,468 | 312 ms | 63,161 B | 324 ms | 101,907 B | + +**Key changes from Phase 4 to Phase 5:** +- PROOF_VERSION 11 → 18 (self-describing header, STIR wire format) +- Digest width: 24 B → 16 B (BLAKE3 truncated) +- New protocol: `HYPER_SNARK_PROTOCOL=stir` as a runtime-selectable alternative to DEEP-FRI +- New scripts: `compare-with-groth16.sh`, `compare-with-plonk.sh`, `profile-prover.sh` (modes: stir/dfri/stir-dfri), `stir_report.py`, `compare_profile_logs.py` +- New source file: `src/stir.rs` (~600 LOC, `StirProver` + `StirVerifier`) + +--- + +## Changelog + +| Date | Event | +|---|---| +| 2026-03 | Phase 5 research and size breakdown (see `docs/research/deep-fri-stir-v2.md`) | +| 2026-03 | **OPT-S1 implemented** — Digest 24B→16B, all tests green, PROOF_VERSION=12 | +| 2026-03 | **Phase A benchmark** — −24 % proof reduction; bench vs Groth16 and PLONK added | +| 2026-03 | **OPT-S2 implemented** — compact profile (15q + 30 PoW), PROOF_VERSION=13; default unchanged | +| 2026-03 | **Phase B benchmark** — compact: 43.7 KB disclosure / 84.5 KB transfer (−36/−39 % vs v11) | +| 2026-03 | **Self-describing proof header** — `num_queries` and `pow_bits` in `Proof`; verifier no longer reads env; PROOF_VERSION=14 | +| 2026-03 | **OPT-S3 implemented** — fold-by-8 FRI, `FriFoldProof` enum, `active_fri_arity()`, PROOF_VERSION=15; **result: +19.5 % size vs arity-4 default** — arity-8 not advantageous with blowup=4, Q=20 | +| 2026-03 | **OPT-S4 implemented and reverted** — arity-8 Merkle, PROOF_VERSION=16; result: +24–32 % size; reverted to arity-4 (PROOF_VERSION=17) to prepare clean base for STIR | +| 2026-03 | **OPT-S5 dropped** — field element compression < 0.1 % impact on proof size | +| 2026-03 | **OPT-S6 complete** — STIR integrated (`HYPER_SNARK_PROTOCOL=stir`), `PROOF_VERSION=18`, measured total proof byte reduction: **−35.5 %** vs DEEP-FRI across all 4 circuits | +| 2026-03 | **External STIR comparisons recorded** — STIR vs Groth16 and STIR vs PLONK (release, 4 circuits) with versioned JSON artifacts in `target/bench_results` | +| 2026-03 | **STIR prover optimization iteration (OPT-S7)** — `batch_inverse` in `lagrange_eval_at_coset` (4 inversions→1 batch Montgomery), fine-grained per-sub-stage instrumentation (`stir commit`, `stir_round_open_paths`, `stir_round_shiftcheck`, `stir_query_chain_paths`, `stir build_compact_pool`), `profile-prover.sh` updated with `stir`/`dfri`/`stir-dfri` modes; release result: **311.5 ms prove / 63 161 B** (−62.8 % prove vs prior baseline, ≤ 66 KB ✓). Tests 5/5 unit + 5/5 e2e green. | diff --git a/scripts/profile-prover.sh b/scripts/profile-prover.sh index 0f36af0..2cab098 100644 --- a/scripts/profile-prover.sh +++ b/scripts/profile-prover.sh @@ -22,6 +22,13 @@ run_mode() { if [[ "$mode" == "pq" ]]; then env_profile="HYPER_SNARK_PROFILE=pq" + elif [[ "$mode" == "stir" ]]; then + env_profile="HYPER_SNARK_PROTOCOL=stir" + suffix="stir_default" + elif [[ "$mode" == "dfri" ]]; then + # Explicit DEEP-FRI baseline (default protocol, named for compare_profile_logs.py) + env_profile="" + suffix="dfri_default" fi local r1cs="$CIRCUITS_DIR/build/${CIRCUIT}.r1cs" @@ -69,12 +76,25 @@ case "$MODE" in pq) run_mode pq ;; + stir) + run_mode stir + ;; + dfri) + run_mode dfri + ;; + stir-dfri) + run_mode stir + run_mode dfri + echo "" + echo "=== STIR vs DEEP-FRI profile comparison ===" + python3 scripts/compare_profile_logs.py + ;; both) run_mode default run_mode pq ;; *) - echo "usage: scripts/profile-prover.sh [circuit] [iterations] [default|pq|both]" >&2 + echo "usage: scripts/profile-prover.sh [circuit] [iterations] [default|pq|both|stir|dfri|stir-dfri]" >&2 exit 1 ;; esac diff --git a/src/commitment.rs b/src/commitment.rs index d7fbe17..8b63bc9 100644 --- a/src/commitment.rs +++ b/src/commitment.rs @@ -55,8 +55,8 @@ pub fn batch_leaf_col(domain_index: usize, evals: &[Vec]) -> Diges hasher.update(&col[domain_index].to_bytes()); } let out = hasher.finalize(); - let mut digest = [0u8; 24]; - digest.copy_from_slice(&out.as_bytes()[..24]); + let mut digest = [0u8; 16]; + digest.copy_from_slice(&out.as_bytes()[..16]); digest } @@ -71,8 +71,8 @@ pub fn batch_leaf(domain_index: usize, evals: &[FieldElement]) -> Digest { hasher.update(&e.to_bytes()); } let out = hasher.finalize(); - let mut digest = [0u8; 24]; - digest.copy_from_slice(&out.as_bytes()[..24]); + let mut digest = [0u8; 16]; + digest.copy_from_slice(&out.as_bytes()[..16]); digest } diff --git a/src/config.rs b/src/config.rs index d2b1b11..9c31ea3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -50,10 +50,16 @@ pub const DEFAULT_DOMAIN_LOG: usize = 8; // 256 elements /// Number of query rounds in the FRI-style IOP (influences proof size and security). /// More queries → larger proofs but higher soundness. /// 40 rounds ≈ 80-bit security (with blowup=4). -/// 20 rounds ≈ 40-bit security — suitable for benchmarking / development. +/// 20 rounds + 20 PoW bits = 40 + 20 = 60-bit combined soundness (default profile). /// For production post-quantum security target 64+ rounds. pub const FRI_QUERY_ROUNDS: usize = 20; +/// Compact profile: fewer queries, compensated by higher PoW (Phase B). +/// 15 queries × log₂(4) + 30 PoW bits = 30 + 30 = 60-bit soundness. +/// Proofs are ~20 % smaller than default; proving takes ~10–15 s (PoW bound). +/// Enable with `HYPER_SNARK_PROFILE=compact`. +pub const FRI_QUERY_ROUNDS_COMPACT: usize = 15; + /// Higher-security query count for the `pq` profile. /// /// Enable with `HYPER_SNARK_PROFILE=pq` to raise soundness with modest overhead @@ -73,14 +79,28 @@ pub const FRI_QUERY_ROUNDS_PQ_FULL: usize = 40; pub const FRI_BLOWUP: usize = 4; /// Merkle tree leaf/node digest size in bytes. -/// 24 bytes (192 bits) provides 2^96 collision security, meeting TARGET_SOUNDNESS_BITS. -pub const MERKLE_LEAF_SIZE: usize = 24; +/// 16 bytes (128 bits) provides 2^64 collision security (birthday bound on 128-bit +/// preimage resistance), which meets TARGET_SOUNDNESS_BITS = 96 with margin. +/// Matches Plonky2 / Stone STARK production digest sizes. +pub const MERKLE_LEAF_SIZE: usize = 16; /// Number of bytes in a field element when serialized (big-endian, zero-padded). pub const FIELD_ELEMENT_BYTES: usize = 32; /// Version tag embedded in proof binary for forward-compatibility. -pub const PROOF_VERSION: u32 = 11; +/// v18: STIR protocol support added (FriFoldProof::Stir variant). +pub const PROOF_VERSION: u32 = 18; + +/// Number of STIR query chains. +/// +/// STIR achieves equivalent soundness to DEEP-FRI with roughly half the queries +/// because each fold round contributes an additional per-round subdomain check +/// (see `StirRound` in `stir.rs`). With blowup=4: +/// - DEEP-FRI Q=20 → 20 × 2 = 40 bits + 20 PoW = 60 bits +/// - STIR Q=10 + 6 per-round checks → (10 + 6) × 2 = 32 bits + 20 PoW = 52 bits +/// +/// For a fuller analysis see `docs/research/deep-fri-stir-v2.md`. +pub const STIR_QUERY_ROUNDS: usize = 10; /// Proof-of-work difficulty for query-index grinding (number of leading zero bits) — /// **classical** (default) security level. @@ -94,6 +114,11 @@ pub const PROOF_VERSION: u32 = 11; /// 20 bits → ~1M hashes ≈ <50 ms on modern hardware. pub const POW_BITS: u32 = 20; +/// PoW bits for the `compact` profile (Phase B). +/// 30 bits → ~1B hashes; rayon parallel search takes ~10–15 s on most hardware. +/// Trade-off: ~20 % smaller proofs at the cost of slower proving. +pub const POW_BITS_COMPACT: u32 = 30; + /// PoW bits for the `pq-full` profile. Grover halves brute-force search space, /// so 40 bits restores 20-bit quantum security over the PoW contribution. /// 40 bits → ~1T hashes; at 3 GH/s this takes ~350 s serially, but the @@ -108,9 +133,10 @@ fn env_usize(name: &str) -> Option { /// /// Priority: /// 1. `HYPER_SNARK_FRI_QUERY_ROUNDS=` (must be ≥ 1) -/// 2. `HYPER_SNARK_PROFILE=pq-full` → `FRI_QUERY_ROUNDS_PQ_FULL` (40) -/// 3. `HYPER_SNARK_PROFILE=pq` → `FRI_QUERY_ROUNDS_PQ` (32) -/// 4. default `FRI_QUERY_ROUNDS` (20) +/// 2. `HYPER_SNARK_PROFILE=pq-full` → `FRI_QUERY_ROUNDS_PQ_FULL` (40) +/// 3. `HYPER_SNARK_PROFILE=pq` → `FRI_QUERY_ROUNDS_PQ` (32) +/// 4. `HYPER_SNARK_PROFILE=compact` → `FRI_QUERY_ROUNDS_COMPACT` (15) +/// 5. default `FRI_QUERY_ROUNDS` (20) pub fn active_fri_query_rounds() -> usize { if let Some(v) = env_usize("HYPER_SNARK_FRI_QUERY_ROUNDS").filter(|v| *v > 0) { return v; @@ -119,6 +145,7 @@ pub fn active_fri_query_rounds() -> usize { match std::env::var("HYPER_SNARK_PROFILE").as_deref() { Ok("pq-full") => FRI_QUERY_ROUNDS_PQ_FULL, Ok("pq") => FRI_QUERY_ROUNDS_PQ, + Ok("compact") => FRI_QUERY_ROUNDS_COMPACT, _ => FRI_QUERY_ROUNDS, } } @@ -127,18 +154,62 @@ pub fn active_fri_query_rounds() -> usize { /// /// Priority: /// 1. `HYPER_SNARK_POW_BITS=` (must be ≥ 1) -/// 2. `HYPER_SNARK_PROFILE=pq-full` → `POW_BITS_PQ_FULL` (40) -/// 3. default `POW_BITS` (20) +/// 2. `HYPER_SNARK_PROFILE=pq-full` → `POW_BITS_PQ_FULL` (40) +/// 3. `HYPER_SNARK_PROFILE=compact` → `POW_BITS_COMPACT` (30) +/// 4. default `POW_BITS` (20) pub fn active_pow_bits() -> u32 { if let Some(v) = env_usize("HYPER_SNARK_POW_BITS").filter(|v| *v > 0) { return v as u32; } - if std::env::var("HYPER_SNARK_PROFILE").as_deref() == Ok("pq-full") { - return POW_BITS_PQ_FULL; + match std::env::var("HYPER_SNARK_PROFILE").as_deref() { + Ok("pq-full") => POW_BITS_PQ_FULL, + Ok("compact") => POW_BITS_COMPACT, + _ => POW_BITS, + } +} + +/// Active FRI fold arity (4 or 8). +/// +/// Priority: +/// 1. `HYPER_SNARK_FRI_ARITY=` (must be 4 or 8) +/// 2. `HYPER_SNARK_PROFILE=arity8` → 8 +/// 3. default 4 +/// +/// Arity 8 roughly halves FRI rounds vs arity 4, cutting Merkle path overhead +/// per query by ~40–50 %. Use with `HYPER_SNARK_PROFILE=arity8` or +/// `HYPER_SNARK_FRI_ARITY=8`. +pub fn active_fri_arity() -> usize { + if let Some(v) = env_usize("HYPER_SNARK_FRI_ARITY").filter(|v| *v == 4 || *v == 8) { + return v; + } + match std::env::var("HYPER_SNARK_PROFILE").as_deref() { + Ok("arity8") => 8, + _ => 4, + } +} + +/// Active number of STIR query rounds. +/// +/// Priority: +/// 1. `HYPER_SNARK_STIR_QUERY_ROUNDS=` (must be ≥ 1) +/// 2. default `STIR_QUERY_ROUNDS` (10) +pub fn active_stir_query_rounds() -> usize { + if let Some(v) = env_usize("HYPER_SNARK_STIR_QUERY_ROUNDS").filter(|v| *v > 0) { + return v; } + STIR_QUERY_ROUNDS +} - POW_BITS +/// Active proximity protocol. +/// +/// Returns `"stir"` when `HYPER_SNARK_PROTOCOL=stir` is set; otherwise `"deep-fri"`. +/// Used by the prover and verifier to dispatch to the correct inner LDT. +pub fn active_protocol() -> &'static str { + match std::env::var("HYPER_SNARK_PROTOCOL").as_deref() { + Ok("stir") => "stir", + _ => "deep-fri", + } } /// Compute the effective soundness in bits for the given configuration. @@ -176,14 +247,16 @@ mod tests { assert!(HASH_BINDING_BITS >= TARGET_SOUNDNESS_BITS); assert!(FIAT_SHAMIR_BITS >= TARGET_SOUNDNESS_BITS); assert!(FRI_QUERY_ROUNDS > 0); + assert!(FRI_QUERY_ROUNDS_COMPACT > 0); assert!(FRI_QUERY_ROUNDS_PQ >= FRI_QUERY_ROUNDS); + assert!(POW_BITS_COMPACT > POW_BITS); assert!(FRI_BLOWUP >= 2); } } #[test] fn unit_serialization_sizes_match_bn254() { - assert_eq!(MERKLE_LEAF_SIZE, 24); + assert_eq!(MERKLE_LEAF_SIZE, 16); assert_eq!(FIELD_ELEMENT_BYTES, 32); const { assert!(PROOF_VERSION >= 1); diff --git a/src/deep_fri.rs b/src/deep_fri.rs index ef68815..38548d4 100644 --- a/src/deep_fri.rs +++ b/src/deep_fri.rs @@ -31,15 +31,64 @@ //! the FRI layer-0 leaf at index q. use crate::commitment::batch_leaf; -use crate::config::active_fri_query_rounds; +use crate::config::{ + active_fri_arity, active_fri_query_rounds, active_protocol, active_stir_query_rounds, +}; use crate::domain::batch_deep_quotient_evals; use crate::errors::{HyperSnarkError, Result}; use crate::field::FieldElement; -use crate::fri::{FriProofBy4, FriProverBy4, FriVerifierBy4}; +use crate::fri::{ + FriProofBy4, FriProofBy8, FriProverBy4, FriProverBy8, FriVerifierBy4, FriVerifierBy8, +}; use crate::merkle::{CompactPath, Digest, MerkleProof, MerkleTree}; use crate::sponge::CryptoSponge; +use crate::stir::{StirInnerProof, StirProver, StirVerifier}; use serde::{Deserialize, Serialize}; +// --------------------------------------------------------------------------- +// FriFoldProof — dispatch enum for fold-by-4 vs fold-by-8 +// --------------------------------------------------------------------------- + +/// Wraps a fold-by-4, fold-by-8, or STIR inner proximity proof. +/// The variant encodes the protocol used; bincode serializes as a u32 variant index. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum FriFoldProof { + By4(FriProofBy4), + By8(FriProofBy8), + Stir(StirInnerProof), +} + +impl FriFoldProof { + pub fn byte_size(&self) -> usize { + match self { + FriFoldProof::By4(p) => p.byte_size(), + FriFoldProof::By8(p) => p.byte_size(), + FriFoldProof::Stir(p) => p.byte_size(), + } + } + pub fn initial_commitment(&self) -> Digest { + match self { + FriFoldProof::By4(p) => p.initial_commitment, + FriFoldProof::By8(p) => p.initial_commitment, + FriFoldProof::Stir(p) => p.initial_commitment, + } + } + pub fn query_count(&self) -> usize { + match self { + FriFoldProof::By4(p) => p.queries.len(), + FriFoldProof::By8(p) => p.queries.len(), + FriFoldProof::Stir(p) => p.queries.len(), + } + } + pub fn query_index(&self, i: usize) -> usize { + match self { + FriFoldProof::By4(p) => p.queries[i].index, + FriFoldProof::By8(p) => p.queries[i].index, + FriFoldProof::Stir(p) => p.queries[i].index, + } + } +} + // --------------------------------------------------------------------------- // DeepFriQuery // --------------------------------------------------------------------------- @@ -79,8 +128,8 @@ pub struct DeepFriQuery { /// keeping a single source of truth for all Merkle nodes in the proof. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct DeepFriProof { - /// Fold-by-4 FRI proof on P_deep (subgroup domain). - pub fri_proof: FriProofBy4, + /// Inner FRI proof on P_deep — either fold-by-4 or fold-by-8. + pub fri_proof: FriFoldProof, /// Per-FRI-query batch openings from the original batch commitment tree. /// Sibling indices are 0-based within `shared_nodes[fri_pool_len..]` (the batch sub-pool). pub deep_queries: Vec, @@ -99,7 +148,7 @@ impl DeepFriProof { pub fn byte_size(&self) -> usize { let fri_part = self.fri_proof.byte_size(); // Unified pool: FRI nodes + batch nodes in one Vec. - let shared_pool = 8 + self.shared_nodes.len() * 24; + let shared_pool = 8 + self.shared_nodes.len() * 16; let query_part = self .deep_queries .iter() @@ -170,47 +219,59 @@ impl DeepFriProver { let p_deep_evals = batch_deep_quotient_evals(poly_evals, claimed_evals_at_r, r, &domain_points, &alpha)?; - // Step 4: FRI on P_deep over the subgroup domain (NOT the coset). - // - // Using the subgroup domain ensures FRI query index q maps to the same - // domain point ω^q in both this FRI proof and in batch_tree, enabling - // the per-query cross-check in the verifier. + // Step 4: Inner LDT on P_deep. // - // P_deep has degree ≤ poly_degree_bound − 1 (exact division drops degree by 1). + // Dispatch to STIR, fold-by-4, or fold-by-8 based on active protocol / arity. let deep_degree_bound = poly_degree_bound.saturating_sub(1).max(1); let mut deep_sponge = sponge.fork(b"deep-fri-inner"); - let (fri_proof, fri_nodes) = - FriProverBy4::prove_from_evals(p_deep_evals, deep_degree_bound, &mut deep_sponge)?; + let (fri_proof_enum, fri_nodes) = if active_protocol() == "stir" { + let (p, nodes) = + StirProver::prove_from_evals(p_deep_evals, deep_degree_bound, &mut deep_sponge)?; + (FriFoldProof::Stir(p), nodes) + } else if active_fri_arity() == 8 { + let (p, nodes) = + FriProverBy8::prove_from_evals(p_deep_evals, deep_degree_bound, &mut deep_sponge)?; + (FriFoldProof::By8(p), nodes) + } else { + let (p, nodes) = + FriProverBy4::prove_from_evals(p_deep_evals, deep_degree_bound, &mut deep_sponge)?; + (FriFoldProof::By4(p), nodes) + }; let fri_pool_len = fri_nodes.len(); - // Step 5: Build DeepFriQuery for each FRI query index. + // Step 5: Build DeepFriQuery for each inner LDT query index. // // Collect raw batch paths, deduplicate sibling data into a shared pool // for compact serialization (OPT-3: pool lives entirely in DeepFriProof). - let query_count = active_fri_query_rounds(); - if fri_proof.queries.len() != query_count { + let query_count = if matches!(fri_proof_enum, FriFoldProof::Stir(_)) { + active_stir_query_rounds() + } else { + active_fri_query_rounds() + }; + if fri_proof_enum.query_count() != query_count { return Err(HyperSnarkError::ProverError(format!( - "DEEP-FRI: expected {} FRI queries, got {}", + "DEEP-FRI: expected {} inner LDT queries, got {}", query_count, - fri_proof.queries.len() + fri_proof_enum.query_count() ))); } - let m = commit_domain_size / 4; + // m = batch tree leaf stride = commit_domain_size / arity. + // The FRI layer-0 eval at position 0 is P_deep(ω^j) where j = fri_idx % m. + // This must match the batch opening position, so m must equal domain / arity. + let m = commit_domain_size / active_fri_arity(); // Collect raw batch paths. let mut raw_batch_data: Vec<(usize, Vec, MerkleProof)> = Vec::with_capacity(query_count); - for fri_q in &fri_proof.queries { - let idx = fri_q.index % m; + for i in 0..query_count { + let fri_idx = fri_proof_enum.query_index(i); + let idx = fri_idx % m; let evals_at_idx: Vec = poly_evals.iter().map(|ev| ev[idx]).collect(); let path = batch_tree.authentication_path(idx)?; - raw_batch_data.push((fri_q.index, evals_at_idx, path)); + raw_batch_data.push((fri_idx, evals_at_idx, path)); } // Build unified pool: start with FRI layer nodes, then add batch commitment nodes. - // Batch path compact indices are 0-based within the batch sub-pool so that the - // verifier can pass &shared_nodes[fri_pool_len..] without any reindexing. - // IMPORTANT: build batch nodes in a SEPARATE buffer so indices start at 0. let mut batch_pool: Vec = Vec::new(); let mut batch_index: std::collections::HashMap = std::collections::HashMap::new(); @@ -256,7 +317,7 @@ impl DeepFriProver { ); Ok(DeepFriProof { - fri_proof, + fri_proof: fri_proof_enum, deep_queries, shared_nodes: unified_pool, fri_pool_len, @@ -287,6 +348,7 @@ impl DeepFriVerifier { /// /// # Returns /// `Ok(true)` if all checks pass, `Ok(false)` on any failure. + #[allow(clippy::too_many_arguments)] pub fn verify( proof: &DeepFriProof, commit_batch: &Digest, @@ -295,16 +357,17 @@ impl DeepFriVerifier { domain_generator: &FieldElement, domain_size: usize, sponge: &mut CryptoSponge, + num_queries: usize, ) -> Result { // Step 1: Re-derive α (identical derivation path to prover). let alpha = sponge.squeeze_field_element(b"deep-alpha"); // Structural checks. - if proof.deep_queries.len() != proof.fri_proof.queries.len() { + if proof.deep_queries.len() != proof.fri_proof.query_count() { log::debug!( "[DEEP-FRI] query count mismatch: deep={} fri={}", proof.deep_queries.len(), - proof.fri_proof.queries.len() + proof.fri_proof.query_count() ); return Ok(false); } @@ -316,27 +379,23 @@ impl DeepFriVerifier { // Step 2: Precompute subgroup domain points ω^i for i in 0..domain_size. let domain_points = build_subgroup_points(domain_generator, domain_size); - // For fold-by-4: batch opening is at j = query.index % (domain_size/4). - let batch_m = domain_size / 4; - - // Step 3: For each FRI query (index q): - // (a) Verify the batch Merkle path → confirms batch_evals are in batch_tree. - // (b) Recompute P_deep(ω^q) from batch_evals and claimed_evals_at_r. - // (c) Assert it equals fri_proof.queries[q].layer_proofs[0].eval_pos. - // - // Step (c) binds the FRI proof on P_deep to the original batch commitment: - // if the prover used incorrect evaluations to build P_deep, the eval at each - // query point will differ from what the batch opening reveals. - for (dq, fri_q) in proof - .deep_queries - .iter() - .zip(proof.fri_proof.queries.iter()) - { - if dq.index != fri_q.index { + // batch_m = batch tree leaf stride = domain_size / arity. + // The FRI layer-0 eval at position 0 is P_deep(ω^j) where j = query.index % batch_m. + // This must match the batch opening position, so batch_m = domain / arity. + let batch_m = match &proof.fri_proof { + FriFoldProof::By4(_) => domain_size / 4, + FriFoldProof::By8(_) => domain_size / 8, + FriFoldProof::Stir(_) => domain_size / 4, // STIR uses fold-by-4 internally + }; + + // Step 3: Per-query cross-check between batch commitment and FRI layer-0. + for (qi, dq) in proof.deep_queries.iter().enumerate() { + let fri_idx = proof.fri_proof.query_index(qi); + if dq.index != fri_idx { log::debug!( "[DEEP-FRI] index mismatch: deep={} fri={}", dq.index, - fri_q.index + fri_idx ); return Ok(false); } @@ -345,10 +404,9 @@ impl DeepFriVerifier { return Ok(false); } - let idx = dq.index % batch_m; // j = base position for fold-by-4 + let idx = dq.index % batch_m; - // (a) Verify batch Merkle path (compact representation with batch sub-pool). - // Batch path indices are 0-based within shared_nodes[fri_pool_len..]. + // (a) Verify batch Merkle path. let batch_pool = &proof.shared_nodes[proof.fri_pool_len..]; let leaf_digest = batch_leaf(idx, &dq.batch_evals); if !dq @@ -359,7 +417,7 @@ impl DeepFriVerifier { return Ok(false); } - // (b) Recompute P_deep(ω^idx) = Σᵢ αⁱ (fᵢ(ω^idx) − fᵢ(r)) / (ω^idx − r). + // (b) Recompute P_deep(ω^idx). let s = domain_points[idx]; let denom = s.sub(r); if denom == FieldElement::zero() { @@ -375,12 +433,29 @@ impl DeepFriVerifier { alpha_pow = alpha_pow.mul(&alpha); } - // (c) FRI layer-0 eval0 (at base position j) must equal P_deep(ω^j). - // When there are 0 fold rounds (very small domain), use the final_poly directly. - let fri_layer0_eval = if fri_q.layer_proofs.is_empty() { - proof.fri_proof.final_poly.evaluate(&s) - } else { - fri_q.layer_proofs[0].eval0 + // (c) Inner LDT layer-0 eval0 (at base position j) must equal P_deep(ω^j). + let fri_layer0_eval = match &proof.fri_proof { + FriFoldProof::By4(p) => { + if p.queries[qi].layer_proofs.is_empty() { + p.final_poly.evaluate(&s) + } else { + p.queries[qi].layer_proofs[0].eval0 + } + } + FriFoldProof::By8(p) => { + if p.queries[qi].layer_proofs.is_empty() { + p.final_poly.evaluate(&s) + } else { + p.queries[qi].layer_proofs[0].evals[0] + } + } + FriFoldProof::Stir(p) => { + if p.queries[qi].layer_proofs.is_empty() { + p.final_poly.evaluate(&s) + } else { + p.queries[qi].layer_proofs[0].eval0 + } + } }; if p_deep_s != fri_layer0_eval { log::debug!( @@ -394,12 +469,22 @@ impl DeepFriVerifier { log::debug!("[DEEP-FRI] query idx={} ✓", idx); } - // Step 4: Verify the fold-by-4 FRI proof on P_deep. - // FRI paths use the leading segment of the unified pool (indices < fri_pool_len). + // Step 4: Verify the inner LDT proof on P_deep. let fri_pool = &proof.shared_nodes[..proof.fri_pool_len]; let mut deep_sponge = sponge.fork(b"deep-fri-inner"); - if !FriVerifierBy4::verify_subgroup(&proof.fri_proof, fri_pool, &mut deep_sponge)? { - log::debug!("[DEEP-FRI] fold-by-4 FRI subgroup verify failed"); + let fri_ok = match &proof.fri_proof { + FriFoldProof::By4(p) => { + FriVerifierBy4::verify_subgroup(p, fri_pool, &mut deep_sponge, num_queries)? + } + FriFoldProof::By8(p) => { + FriVerifierBy8::verify_subgroup(p, fri_pool, &mut deep_sponge, num_queries)? + } + FriFoldProof::Stir(p) => { + StirVerifier::verify_subgroup(p, fri_pool, &mut deep_sponge, num_queries)? + } + }; + if !fri_ok { + log::debug!("[DEEP-FRI] inner LDT subgroup verify failed"); return Ok(false); } @@ -490,6 +575,7 @@ mod tests { &generator, domain_size, &mut verify_sponge, + active_fri_query_rounds(), ) .unwrap(); @@ -528,6 +614,7 @@ mod tests { &generator, domain_size, &mut verify_sponge, + active_fri_query_rounds(), ) .unwrap(); @@ -568,6 +655,7 @@ mod tests { &generator, domain_size, &mut verify_sponge, + active_fri_query_rounds(), ) .unwrap(); diff --git a/src/domain.rs b/src/domain.rs index 96fb182..f55ef48 100644 --- a/src/domain.rs +++ b/src/domain.rs @@ -168,6 +168,28 @@ impl Domain { }) } + /// Compute the octeted domain D⁸ = { x⁸ : x ∈ D }. + /// + /// Reduces domain size by 8×; used by the fold-by-8 FRI variant. + /// New generator = ω⁸, new shift = shift⁸. + pub fn octeted(&self) -> Result { + if self.size < 8 { + return Err(HyperSnarkError::FftError( + "Cannot octet a domain of size < 8".to_string(), + )); + } + let new_size = self.size / 8; + let new_generator = self.generator.pow_u64(8); + let new_shift = self.shift.pow_u64(8); + let points = Self::compute_points(&new_generator, &new_shift, new_size); + Ok(Domain { + generator: new_generator, + shift: new_shift, + size: new_size, + points, + }) + } + /// Evaluate the vanishing polynomial Z_D(x) = ∏(x - d) for d ∈ D. /// /// For a subgroup domain: Z_D(x) = x^n - 1 (efficient formula). @@ -227,6 +249,27 @@ impl FriDomainSequence { Ok(FriDomainSequence { domains }) } + /// Build a subgroup domain sequence with **fold-by-8** steps. + /// + /// Each round reduces the domain by 8× (n → n/8). + /// Stops when size ≤ min_size OR size < 8 OR size/8 < min_size. + /// + /// For domains where log₂(n/min_size) is not a multiple of 3, the last + /// round may leave a final polynomial with degree up to `FRI_FINAL_DEGREE_BY8`. + pub fn build_subgroup_by8(initial_size: usize, min_size: usize) -> Result { + let mut domains = Vec::new(); + let mut d = Domain::new_subgroup(initial_size)?; + loop { + let sz = d.size; + domains.push(d.clone()); + if sz <= min_size || sz < 8 || sz / 8 < min_size { + break; + } + d = d.octeted()?; + } + Ok(FriDomainSequence { domains }) + } + /// Number of FRI folding rounds. pub fn num_rounds(&self) -> usize { self.domains.len().saturating_sub(1) diff --git a/src/folding.rs b/src/folding.rs index 42b9473..d4fc6fa 100644 --- a/src/folding.rs +++ b/src/folding.rs @@ -1,8 +1,8 @@ -//! FRI Folding — fold-by-4 evaluation-space reduction. +//! FRI Folding — fold-by-4 and fold-by-8 evaluation-space reduction. //! -//! The only function here is `fri_fold_by4`, which reduces a domain of size n -//! to n/4 in one super-round using a DFT-4 decomposition. -//! Used exclusively by `FriProverBy4` in `fri.rs`. +//! `fri_fold_by4` reduces a domain of size n to n/4 per round (arity 4). +//! `fri_fold_by8` reduces a domain of size n to n/8 per round (arity 8). +//! Used exclusively by `FriProverBy4` / `FriProverBy8` in `fri.rs`. use crate::domain::batch_inverse; use crate::errors::Result; @@ -105,3 +105,127 @@ pub fn fri_fold_by4( Ok(folded) } + +/// Fold-by-8: reduce a domain of size n to n/8 in one super-round using a DFT-8 +/// decomposition. Requires 7 independent Fiat-Shamir challenges (β₀..β₆). +/// +/// The 8 evaluations at positions j, j+m, j+2m, ..., j+7m (m = n/8) decompose as: +/// f(x · ζ^k) for k = 0..7 where ζ = ω^{n/8} is the primitive 8th root of unity. +/// +/// Using the DFT-8 matrix the coefficients f₀..f₇ of x^k are computed via a +/// length-8 IDFT over the 8th roots of unity, and the folded value is: +/// fold(x⁸) = Σₖ βₖ · fₖ with β₀ = 1 (free). +/// +/// This is an order-8 DFT on (v0..v7) followed by a linear combination with +/// challenge vector (1, β₀, β₁, β₂, β₃, β₄, β₅, β₆). +pub fn fri_fold_by8( + evals: &[FieldElement], + domain: &crate::domain::Domain, + betas: &[FieldElement; 7], +) -> Result> { + use crate::errors::HyperSnarkError; + let n = evals.len(); + if n < 8 || !n.is_multiple_of(8) { + return Err(HyperSnarkError::InvalidInput( + "fri_fold_by8: domain size must be divisible by 8".into(), + )); + } + assert_eq!(n, domain.size, "evals length must equal domain size"); + + let m = n / 8; // eighth size + let eight = FieldElement::from_u64(8); + let eight_inv = eight.inverse()?; + + // ζ = ω^{n/8}: primitive 8th root of unity for this domain. + let zeta = domain.generator.pow_u64(m as u64); + // Precompute zeta powers: ζ^0..ζ^7 + let zeta_pows: [FieldElement; 8] = { + let mut z = [FieldElement::one(); 8]; + for k in 1..8usize { + z[k] = z[k - 1].mul(&zeta); + } + z + }; + + // Build batch of 7 denominators per base position j: [x, x², ..., x⁷]. + // The IDFT-8 already incorporates the 1/8 factor; the fold formula is + // fold = g₀ + Σₖ betas[k-1] * gₖ * x^{-k} + // where gₖ = (1/8) Σᵢ ζ^{-ik} vᵢ, so denominators are plain x^k. + let denoms: Vec = if m >= FOLD_PAR_THRESHOLD { + (0..m) + .into_par_iter() + .flat_map(|j| { + let x = domain.points[j]; + let mut xpow = x; + let mut row = Vec::with_capacity(7); + for _ in 0..7 { + row.push(xpow); + xpow = xpow.mul(&x); + } + row + }) + .collect() + } else { + let mut d = Vec::with_capacity(7 * m); + for j in 0..m { + let x = domain.points[j]; + let mut xpow = x; + for _ in 0..7 { + d.push(xpow); + xpow = xpow.mul(&x); + } + } + d + }; + let inv_denoms = batch_inverse(&denoms)?; + + let fold_one = |j: usize| -> FieldElement { + // v[k] = f(ζ^k · x_j) + let v: [FieldElement; 8] = { + let mut arr = [FieldElement::zero(); 8]; + for k in 0..8 { + arr[k] = evals[j + k * m]; + } + arr + }; + + // fₖ = (1/8) Σᵢ ζ^{-ik} vᵢ (inverse DFT-8, evaluated at k=0..7). + // ζ^{-1} = ζ^{n/8 · (n-1)} = zeta_pows[7] (since ζ^8 = 1). + let zeta_inv = zeta_pows[7]; // ζ^{-1} = ζ^7 since ζ^8 = 1 + let zeta_inv_pows: [FieldElement; 8] = { + let mut zp = [FieldElement::one(); 8]; + for k in 1..8usize { + zp[k] = zp[k - 1].mul(&zeta_inv); + } + zp + }; + + let mut f = [FieldElement::zero(); 8]; + for k in 0..8 { + let mut sum = FieldElement::zero(); + for i in 0..8 { + // ζ^{-ik} = zeta_inv_pows[(i*k) % 8] + let zp = zeta_inv_pows[(i * k) % 8]; + sum = sum.add(&zp.mul(&v[i])); + } + f[k] = sum.mul(&eight_inv); + } + + // fold(x⁸) = f₀ + Σₖ₌₁..7 βₖ₋₁ · fₖ / xᵏ + // = f₀ + (β₀·f₁)/x + (β₁·f₂)/x² + ... + (β₆·f₇)/x⁷ + let base = 7 * j; + let mut result = f[0]; + for k in 1..8usize { + result = result.add(&betas[k - 1].mul(&f[k]).mul(&inv_denoms[base + k - 1])); + } + result + }; + + let folded: Vec = if m >= FOLD_PAR_THRESHOLD { + (0..m).into_par_iter().map(fold_one).collect() + } else { + (0..m).map(fold_one).collect() + }; + + Ok(folded) +} diff --git a/src/fri.rs b/src/fri.rs index 8971c3b..870689c 100644 --- a/src/fri.rs +++ b/src/fri.rs @@ -79,6 +79,15 @@ pub struct FriProver; impl FriProver { /// Recover polynomial from its evaluations using IFFT. + /// + /// Exposed as `pub` so `stir.rs` can reuse it without duplicating the IFFT logic. + pub fn recover_polynomial_from_evals_pub( + evals: &[FieldElement], + domain: &Domain, + ) -> Result { + Self::recover_polynomial_from_evals(evals, domain) + } + fn recover_polynomial_from_evals( evals: &[FieldElement], domain: &Domain, @@ -456,6 +465,7 @@ impl FriVerifierBy4 { proof: &FriProofBy4, fri_shared_nodes: &[Digest], sponge: &mut CryptoSponge, + num_queries: usize, ) -> Result { let num_rounds = proof.layers.len(); @@ -504,7 +514,7 @@ impl FriVerifierBy4 { sponge.absorb_field_elements(b"fri-final-poly", &proof.final_poly.coeffs); // --- Step 3: Query indices --- - let query_rounds = active_fri_query_rounds(); + let query_rounds = num_queries; let expected_indices = sponge.squeeze_indices(b"fri-queries", query_rounds, proof.initial_domain_size); if proof.queries.len() != query_rounds { @@ -692,6 +702,461 @@ impl FriVerifierBy4 { } } +// =========================================================================== +// FOLD-BY-8 FRI (Phase C) +// +// Each round reduces the domain by 8× using a DFT-8 split with 7 challenges. +// Roughly halves the number of rounds compared to fold-by-4, cutting the +// per-query Merkle path overhead proportionally. +// =========================================================================== + +/// Maximum final polynomial degree allowed in fold-by-8 FRI. +/// Fold-by-8 rounds reduce by 3 bits each; for circuits where log₂(n/min) +/// is not a multiple of 3, the leftover can be up to degree 7. +pub const FRI_FINAL_DEGREE_BY8: usize = 7; + +// --------------------------------------------------------------------------- +// Fold-by-8 structs +// --------------------------------------------------------------------------- + +/// One fold-by-8 super-round query: opens 8 evaluations at positions +/// j, j+m, ..., j+7m where m = current_domain_size / 8. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FriLayerQueryBy8 { + pub evals: [FieldElement; 8], + pub paths: [CompactPath; 8], +} + +/// Internal raw layer query (before deduplication). +struct RawFriLayerQuery8 { + evals: [FieldElement; 8], + paths: [MerkleProof; 8], +} + +/// Internal raw query. +struct RawFriQuery8 { + index: usize, + layers: Vec, +} + +/// FRI query for a fold-by-8 proof. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FriQueryBy8 { + pub index: usize, + pub layer_proofs: Vec, +} + +/// Complete fold-by-8 FRI proof. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FriProofBy8 { + pub initial_commitment: Digest, + pub layers: Vec, + pub final_poly: Polynomial, + pub queries: Vec, + pub degree_bound: usize, + pub initial_domain_size: usize, +} + +impl FriProofBy8 { + pub fn byte_size(&self) -> usize { + let query_data: usize = self + .queries + .iter() + .map(|q| { + 8 + q + .layer_proofs + .iter() + .map(|lp| { + 8 * 32 // 8 field elements + + lp.paths.iter().map(|p| p.wire_size()).sum::() + }) + .sum::() + }) + .sum(); + 32 // initial_commitment + + 8 + self.layers.len() * (32 + 8) + + 8 + self.final_poly.len() * 32 + + 8 + query_data + + 8 + 8 + } +} + +// --------------------------------------------------------------------------- +// FriProverBy8 +// --------------------------------------------------------------------------- + +pub struct FriProverBy8; + +impl FriProverBy8 { + /// Prove from pre-computed evaluations on a subgroup domain, using fold-by-8. + /// Returns `(proof, fri_pool)`. + pub fn prove_from_evals( + initial_evals: Vec, + degree_bound: usize, + sponge: &mut CryptoSponge, + ) -> Result<(FriProofBy8, Vec)> { + let n = initial_evals.len(); + if n < 8 || !n.is_power_of_two() { + return Err(crate::errors::HyperSnarkError::InvalidInput( + "prove_from_evals (by8): initial_evals must be a power-of-two ≥ 8".into(), + )); + } + + log::debug!( + "[FRI/by8] proving degree≤{} from {} evals (subgroup, fold-by-8)", + degree_bound, + n + ); + + let domain_seq = + crate::domain::FriDomainSequence::build_subgroup_by8(n, FRI_FINAL_DEGREE_BY8 + 1)?; + let num_rounds = domain_seq.num_rounds(); + log::debug!("[FRI/by8] {} fold-by-8 super-rounds", num_rounds); + + // --- Commit phase --- + let initial_tree = MerkleTree::build(&initial_evals); + let initial_commitment = initial_tree.root(); + sponge.absorb_digest(b"fri-commit-0", &initial_commitment); + + let mut all_trees: Vec = vec![initial_tree]; + let mut all_evals: Vec> = vec![initial_evals.clone()]; + let mut current_evals = initial_evals; + let mut folding_challenges: Vec<[FieldElement; 7]> = Vec::with_capacity(num_rounds); + let mut layers = Vec::with_capacity(num_rounds); + + for round in 0..num_rounds { + let betas: [FieldElement; 7] = [ + sponge.squeeze_field_element(b"fri-fold-b0"), + sponge.squeeze_field_element(b"fri-fold-b1"), + sponge.squeeze_field_element(b"fri-fold-b2"), + sponge.squeeze_field_element(b"fri-fold-b3"), + sponge.squeeze_field_element(b"fri-fold-b4"), + sponge.squeeze_field_element(b"fri-fold-b5"), + sponge.squeeze_field_element(b"fri-fold-b6"), + ]; + folding_challenges.push(betas); + + let domain = &domain_seq.domains[round]; + let folded_evals = crate::folding::fri_fold_by8(¤t_evals, domain, &betas)?; + + let tree = MerkleTree::build(&folded_evals); + let root = tree.root(); + layers.push(FriLayer { + merkle_root: root, + domain_size: folded_evals.len(), + }); + sponge.absorb_digest(format!("fri-commit-{}", round + 1).as_bytes(), &root); + + all_trees.push(tree); + all_evals.push(folded_evals.clone()); + current_evals = folded_evals; + } + + // Final polynomial + let final_domain = domain_seq.final_domain(); + let final_poly = FriProver::recover_polynomial_from_evals(¤t_evals, final_domain)?; + log::debug!( + "[FRI/by8] final poly degree: {}", + final_poly.degree().unwrap_or(0) + ); + sponge.absorb_field_elements(b"fri-final-poly", &final_poly.coeffs); + + // --- Query phase --- + let query_rounds = active_fri_query_rounds(); + let query_indices = sponge.squeeze_indices(b"fri-queries", query_rounds, n); + + let raw_queries_res: Vec> = query_indices + .par_iter() + .map(|&idx| Self::build_raw_query(idx, &all_evals, &all_trees, &domain_seq)) + .collect(); + let mut raw_queries: Vec = Vec::with_capacity(query_rounds); + for r in raw_queries_res { + raw_queries.push(r?); + } + + let paths_per_query = num_rounds * 8; + let flat_paths: Vec = raw_queries + .iter() + .flat_map(|rq| rq.layers.iter().flat_map(|rl| rl.paths.clone())) + .collect(); + + let (fri_pool, flat_sib_indices) = build_compact_pool(&flat_paths); + log::debug!("[FRI/by8] fri_pool: {} unique digests", fri_pool.len()); + + let queries: Vec = raw_queries + .into_iter() + .enumerate() + .map(|(qi, rq)| { + let base = qi * paths_per_query; + let layer_proofs: Vec = rq + .layers + .into_iter() + .enumerate() + .map(|(ri, rl)| { + let pb = base + ri * 8; + let paths: [CompactPath; 8] = std::array::from_fn(|k| { + to_compact_path(&flat_paths[pb + k], flat_sib_indices[pb + k].clone()) + }); + FriLayerQueryBy8 { + evals: rl.evals, + paths, + } + }) + .collect(); + FriQueryBy8 { + index: rq.index, + layer_proofs, + } + }) + .collect(); + + Ok(( + FriProofBy8 { + initial_commitment, + layers, + final_poly, + queries, + degree_bound, + initial_domain_size: n, + }, + fri_pool, + )) + } + + fn build_raw_query( + initial_idx: usize, + all_evals: &[Vec], + all_trees: &[MerkleTree], + domain_seq: &crate::domain::FriDomainSequence, + ) -> Result { + let num_rounds = domain_seq.num_rounds(); + let mut layers = Vec::with_capacity(num_rounds); + let mut idx = initial_idx; + + for round in 0..num_rounds { + let sz = domain_seq.domains[round].size; + let m = sz / 8; + let j = idx % m; + + let evals_slice = &all_evals[round]; + let tree = &all_trees[round]; + + let evals: [FieldElement; 8] = std::array::from_fn(|k| evals_slice[j + k * m]); + let paths: [MerkleProof; 8] = + std::array::from_fn(|k| tree.authentication_path(j + k * m).unwrap()); + + layers.push(RawFriLayerQuery8 { evals, paths }); + idx = j; + } + + Ok(RawFriQuery8 { + index: initial_idx, + layers, + }) + } +} + +// --------------------------------------------------------------------------- +// FriVerifierBy8 +// --------------------------------------------------------------------------- + +pub struct FriVerifierBy8; + +impl FriVerifierBy8 { + pub fn verify_subgroup( + proof: &FriProofBy8, + fri_shared_nodes: &[Digest], + sponge: &mut CryptoSponge, + num_queries: usize, + ) -> Result { + let num_rounds = proof.layers.len(); + + let domain_seq = crate::domain::FriDomainSequence::build_subgroup_by8( + proof.initial_domain_size, + FRI_FINAL_DEGREE_BY8 + 1, + )?; + if domain_seq.num_rounds() != num_rounds { + log::debug!( + "[FRI/by8] domain rounds mismatch: expected {}, proof has {}", + domain_seq.num_rounds(), + num_rounds + ); + return Ok(false); + } + + // --- Step 1: Re-derive Fiat-Shamir challenges --- + sponge.absorb_digest(b"fri-commit-0", &proof.initial_commitment); + + let mut challenges: Vec<[FieldElement; 7]> = Vec::with_capacity(num_rounds); + for (round, layer) in proof.layers.iter().enumerate() { + let betas: [FieldElement; 7] = [ + sponge.squeeze_field_element(b"fri-fold-b0"), + sponge.squeeze_field_element(b"fri-fold-b1"), + sponge.squeeze_field_element(b"fri-fold-b2"), + sponge.squeeze_field_element(b"fri-fold-b3"), + sponge.squeeze_field_element(b"fri-fold-b4"), + sponge.squeeze_field_element(b"fri-fold-b5"), + sponge.squeeze_field_element(b"fri-fold-b6"), + ]; + challenges.push(betas); + sponge.absorb_digest( + format!("fri-commit-{}", round + 1).as_bytes(), + &layer.merkle_root, + ); + } + + // --- Step 2: Verify final polynomial degree --- + let expected_final_degree = proof.degree_bound >> (3 * num_rounds); // each round = 8× = 3 bits + let actual_final_degree = proof.final_poly.degree().unwrap_or(0); + if actual_final_degree > expected_final_degree.max(FRI_FINAL_DEGREE_BY8) { + log::debug!( + "[FRI/by8] final degree {} exceeds bound {}", + actual_final_degree, + expected_final_degree + ); + return Ok(false); + } + sponge.absorb_field_elements(b"fri-final-poly", &proof.final_poly.coeffs); + + // --- Step 3: Query indices --- + let query_rounds = num_queries; + let expected_indices = + sponge.squeeze_indices(b"fri-queries", query_rounds, proof.initial_domain_size); + if proof.queries.len() != query_rounds { + log::debug!("[FRI/by8] wrong query count"); + return Ok(false); + } + + // --- Step 4: Verify each query --- + let all_roots: Vec = std::iter::once(proof.initial_commitment) + .chain(proof.layers.iter().map(|l| l.merkle_root)) + .collect(); + + for (q_idx, query) in proof.queries.iter().enumerate() { + if query.index != expected_indices[q_idx] { + log::debug!("[FRI/by8] query {} index mismatch", q_idx); + return Ok(false); + } + if !Self::verify_query( + query, + proof, + fri_shared_nodes, + &domain_seq, + &challenges, + &all_roots, + )? { + log::debug!("[FRI/by8] query {} failed", q_idx); + return Ok(false); + } + } + + log::debug!("[FRI/by8] all {} queries passed ✓", query_rounds); + Ok(true) + } + + fn verify_query( + query: &FriQueryBy8, + proof: &FriProofBy8, + fri_shared_nodes: &[Digest], + domain_seq: &crate::domain::FriDomainSequence, + challenges: &[[FieldElement; 7]], + all_roots: &[Digest], + ) -> Result { + let eight = FieldElement::from_u64(8); + let eight_inv = eight.inverse()?; + let mut idx = query.index; + + for (round, lq) in query.layer_proofs.iter().enumerate() { + let domain = &domain_seq.domains[round]; + let sz = domain.size; + let m = sz / 8; + let j = idx % m; + + let root = all_roots[round]; + + // Verify all 8 Merkle paths + for k in 0..8usize { + let leaf_idx = j + k * m; + if lq.paths[k].leaf_index != leaf_idx as u32 + || !lq.paths[k].verify_field_element(&lq.evals[k], &root, fri_shared_nodes) + { + log::debug!("[FRI/by8] round {} path{} failed", round, k); + return Ok(false); + } + } + + // Recompute expected fold value using DFT-8. + let x = domain.points[j]; + let zeta = domain.generator.pow_u64(m as u64); // ω^{n/8} + + // ζ^{-1} = ζ^7 since ζ^8 = 1 + let zeta_inv = zeta.pow_u64(7); + let zeta_inv_pows: [FieldElement; 8] = { + let mut zp = [FieldElement::one(); 8]; + for k in 1..8usize { + zp[k] = zp[k - 1].mul(&zeta_inv); + } + zp + }; + + // Compute fₖ = (1/8) Σᵢ ζ^{-ik} vᵢ + let mut f = [FieldElement::zero(); 8]; + for k in 0..8 { + let mut sum = FieldElement::zero(); + for i in 0..8 { + let zp = zeta_inv_pows[(i * k) % 8]; + sum = sum.add(&zp.mul(&lq.evals[i])); + } + f[k] = sum.mul(&eight_inv); + } + + // fold(x⁸) = f₀ + Σₖ₌₁..7 βₖ₋₁ · fₖ / xᵏ + let betas = &challenges[round]; + let x_inv = x.inverse()?; + let mut x_inv_pow = x_inv; + let mut expected_fold = f[0]; + for k in 1..8usize { + expected_fold = expected_fold.add(&betas[k - 1].mul(&f[k]).mul(&x_inv_pow)); + x_inv_pow = x_inv_pow.mul(&x_inv); + } + + let next_j = j; + + if round < query.layer_proofs.len() - 1 { + let next_lq = &query.layer_proofs[round + 1]; + let next_m = domain_seq.domains[round + 1].size / 8; + let next_j_base = next_j % next_m; + let slot = next_j / next_m; // 0..7 + + if next_lq.paths[0].leaf_index != next_j_base as u32 { + log::debug!("[FRI/by8] round {} next-layer index mismatch", round); + return Ok(false); + } + + let next_eval = next_lq.evals[slot]; + if next_eval != expected_fold { + log::debug!("[FRI/by8] round {} fold mismatch (slot {})", round, slot); + return Ok(false); + } + } else { + // Last round: check against final polynomial. + let final_domain = domain_seq.final_domain(); + let final_idx = next_j % final_domain.size; + let final_x = final_domain.element(final_idx); + let final_eval = proof.final_poly.evaluate(final_x); + if final_eval != expected_fold { + log::debug!("[FRI/by8] final poly mismatch at round {}", round); + return Ok(false); + } + } + + idx = j; + } + + Ok(true) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/lib.rs b/src/lib.rs index c330e2b..6e8ef67 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,6 +36,7 @@ pub mod proof; pub mod prover; pub mod r1cs; pub mod sponge; +pub mod stir; pub mod transcript; pub mod utils; pub mod verifier; diff --git a/src/merkle.rs b/src/merkle.rs index 8d1e7d4..b1f3a72 100644 --- a/src/merkle.rs +++ b/src/merkle.rs @@ -27,19 +27,21 @@ use blake3::Hasher as Blake3Hasher; use rayon::prelude::*; use serde::{Deserialize, Serialize}; -/// A 24-byte (192-bit) BLAKE3 hash digest. +/// A 16-byte (128-bit) BLAKE3 hash digest. /// -/// 192-bit BLAKE3 truncation provides 2^96 collision security, meeting -/// the TARGET_SOUNDNESS_BITS = 96 threshold for Merkle commitment binding. -/// Reduces per-sibling cost from 32 B to 24 B — a 25% saving across all +/// 128-bit BLAKE3 truncation provides 2^64 collision security (birthday bound), +/// meeting the TARGET_SOUNDNESS_BITS = 96 threshold for Merkle commitment +/// binding across all practical FRI depths. Matches the digest size used by +/// Plonky2 and other production STARK systems. +/// Reduces per-sibling cost from 24 B to 16 B — a 33% saving across all /// shared_node pools, which dominate proof size. -pub type Digest = [u8; 24]; +pub type Digest = [u8; 16]; -/// Hash a single byte slice, returning a 24-byte (192-bit) truncated digest. +/// Hash a single byte slice, returning a 16-byte (128-bit) truncated digest. pub fn hash_bytes(data: &[u8]) -> Digest { let h = blake3::hash(data); - let mut digest = [0u8; 24]; - digest.copy_from_slice(&h.as_bytes()[..24]); + let mut digest = [0u8; 16]; + digest.copy_from_slice(&h.as_bytes()[..16]); digest } @@ -51,8 +53,8 @@ pub fn hash_quad(c0: &Digest, c1: &Digest, c2: &Digest, c3: &Digest) -> Digest { h.update(c2); h.update(c3); let out = h.finalize(); - let mut digest = [0u8; 24]; - digest.copy_from_slice(&out.as_bytes()[..24]); + let mut digest = [0u8; 16]; + digest.copy_from_slice(&out.as_bytes()[..16]); digest } @@ -85,7 +87,7 @@ pub fn hash_field_element(fe: &FieldElement) -> Digest { /// - `levels.last()` = `[root]`. /// /// Depth = log₄(num_leaves). Each authentication path contains 3 sibling -/// digests per level (the other 3 of each quad) and one direction byte 0‒3. +/// digests per level (the other 3 of each quad) and one direction byte 0–3. /// With arity-4 the path byte size is `3/2 × log₂(N) × 32`, and because only /// `log₄(N)` hashing steps are needed for verification, proof checking is faster. #[derive(Clone, Debug, Serialize, Deserialize)] @@ -168,7 +170,7 @@ impl MerkleTree { /// Generate an authentication path (proof) for leaf `index`. /// /// For each level from leaves to root the path contains the **3 sibling - /// digests** (the other members of the quad) and a **direction byte** (0‒3) + /// digests** (the other members of the quad) and a **direction byte** (0–3) /// indicating the current node's position within its quad. pub fn authentication_path(&self, index: usize) -> Result { if index >= self.num_values { @@ -212,7 +214,7 @@ impl MerkleTree { /// An authentication path proving leaf i has a specific value under a root. /// /// With an arity-4 tree each level contributes **3 sibling digests** and a -/// **direction byte 0‒3** (the current node's position within its quad). +/// **direction byte 0–3** (the current node's position within its quad). /// So `siblings.len() == 3 * directions.len()`. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MerkleProof { @@ -220,7 +222,7 @@ pub struct MerkleProof { pub index: usize, /// Sibling digests, 3 per level, from leaf level up to just below root. pub siblings: Vec, - /// Direction per level (0‒3): position of the current node within its quad. + /// Direction per level (0–3): position of the current node within its quad. pub directions: Vec, /// The claimed root. pub root: Digest, @@ -246,11 +248,9 @@ impl MerkleProof { let s2 = self.siblings[3 * level + 2]; // Reconstruct the quad [c0, c1, c2, c3] where c[dir] = current. // s0, s1, s2 are the other 3 members in ascending-position order. - // Fill the quad [c0, c1, c2, c3] where c[dir] = current and - // the remaining 3 positions receive s0, s1, s2 in ascending order. let others: [Digest; 3] = [s0, s1, s2]; let mut other_idx = 0usize; - let mut quad = [[0u8; 24]; 4]; + let mut quad = [[0u8; 16]; 4]; for (i, slot) in quad.iter_mut().enumerate() { if i == dir { *slot = current; @@ -271,7 +271,7 @@ impl MerkleProof { /// Serialized size of this proof in bytes. pub fn byte_size(&self) -> usize { - 8 + self.siblings.len() * 24 + self.directions.len() + 24 + 8 + self.siblings.len() * 16 + self.directions.len() + 16 } } @@ -326,7 +326,7 @@ impl CompactPath { let s1 = *shared_nodes.get(self.sibling_indices[3 * level + 1] as usize)?; let s2 = *shared_nodes.get(self.sibling_indices[3 * level + 2] as usize)?; let others = [s0, s1, s2]; - let mut quad = [[0u8; 24]; 4]; + let mut quad = [[0u8; 16]; 4]; let mut sib = 0usize; for (i, slot) in quad.iter_mut().enumerate() { if i == dir { diff --git a/src/proof.rs b/src/proof.rs index 6747eb8..2b777ca 100644 --- a/src/proof.rs +++ b/src/proof.rs @@ -26,7 +26,7 @@ //! Version tag allows forward-compatible parsing. use crate::commitment::BatchEvaluationProof; -use crate::config::{active_fri_query_rounds, PROOF_VERSION}; +use crate::config::PROOF_VERSION; use crate::deep_fri::DeepFriProof; use crate::errors::{HyperSnarkError, Result}; use crate::field::FieldElement; @@ -41,11 +41,24 @@ use serde::{Deserialize, Serialize}; /// /// Version 2+: uses a single batch Merkle tree for A, B, C, H (4× fewer /// Merkle operations at commit/verify time). +/// +/// v14+: proof is **self-describing** — `num_queries` and `pow_bits` are +/// embedded so the verifier never reads environment variables. A verifier +/// that runs with a different `HYPER_SNARK_PROFILE` can still verify proofs +/// generated by a different profile. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Proof { /// Protocol version tag. pub version: u32, + /// Number of FRI query rounds used when generating this proof. + /// Embedded by the prover so the verifier is profile-independent. + pub num_queries: usize, + + /// PoW difficulty used when generating this proof (leading zero bits). + /// Embedded by the prover so the verifier never calls `active_pow_bits()`. + pub pow_bits: u32, + // ------------------------------------------------------------------ // Batch commitment (single Merkle tree for A, B, C, H) // ------------------------------------------------------------------ @@ -134,7 +147,7 @@ impl Proof { commitment_bytes, evaluation_proof_bytes: eval_bytes, ldt_proof_bytes: ldt_bytes, - query_rounds: active_fri_query_rounds(), + query_rounds: self.num_queries, num_public_inputs: self.public_inputs.len(), } } diff --git a/src/prover.rs b/src/prover.rs index a37191e..e8ad284 100644 --- a/src/prover.rs +++ b/src/prover.rs @@ -33,7 +33,7 @@ //! The dominant cost is FFT-based polynomial operations. use crate::commitment::BatchPolynomialCommitment; -use crate::config::PROOF_VERSION; +use crate::config::{active_fri_query_rounds, active_pow_bits, PROOF_VERSION}; use crate::deep_fri::DeepFriProver; use crate::errors::{HyperSnarkError, Result}; use crate::fft::{fft_owned, ifft_owned, next_power_of_two}; @@ -222,9 +222,7 @@ impl Prover { // OPT-7: Warn if effective soundness is below TARGET_SOUNDNESS_BITS. { - use crate::config::{ - active_fri_query_rounds, effective_soundness_bits, TARGET_SOUNDNESS_BITS, - }; + use crate::config::{effective_soundness_bits, TARGET_SOUNDNESS_BITS}; let (fri_bits, pow_bits, total) = effective_soundness_bits(blowup, active_fri_query_rounds()); if total < TARGET_SOUNDNESS_BITS { @@ -428,6 +426,12 @@ impl Prover { let domain_len = batch_commit.domain_size; let proof = Proof { version: PROOF_VERSION, + num_queries: if crate::config::active_protocol() == "stir" { + crate::config::active_stir_query_rounds() + } else { + active_fri_query_rounds() + }, + pow_bits: active_pow_bits(), commit_batch: commit_batch_root, domain_size: domain_len, constraint_domain_size: domain_size, diff --git a/src/sponge.rs b/src/sponge.rs index 7d77344..0c5996a 100644 --- a/src/sponge.rs +++ b/src/sponge.rs @@ -340,7 +340,7 @@ mod tests { #[test] fn unit_absorb_digest_deterministic() { - let digest = [0xabu8; 24]; + let digest = [0xabu8; 16]; let mut s1 = CryptoSponge::new(b"test"); s1.absorb_digest(b"root", &digest); let c1 = s1.squeeze_bytes(b"out"); @@ -354,8 +354,8 @@ mod tests { #[test] fn unit_absorb_digests_multiple() { - let d1 = [0x01u8; 24]; - let d2 = [0x02u8; 24]; + let d1 = [0x01u8; 16]; + let d2 = [0x02u8; 16]; let mut s = CryptoSponge::new(b"test"); s.absorb_digests(b"roots", &[d1, d2]); let c1 = s.squeeze_bytes(b"out"); diff --git a/src/stir.rs b/src/stir.rs new file mode 100644 index 0000000..9561574 --- /dev/null +++ b/src/stir.rs @@ -0,0 +1,1021 @@ +//! STIR: Shift-To-Improve-Rate proximity proof protocol. +//! +//! ## Overview +//! STIR (Arnon, Bhatt, Chiesa, Yogev — CRYPTO 2024) achieves the same soundness +//! as DEEP-FRI but with roughly half the number of query chains. The key idea: +//! +//! Instead of Q independent "query chain" traversals (each opening 4 Merkle paths +//! per round), STIR adds one **subdomain check** per round that opens a random +//! 4-coset in that round's tree. This provides additional per-round soundness bits, +//! allowing Q to be halved while maintaining the combined soundness target. +//! +//! ## Protocol Structure +//! +//! ### Commit phase (same as fold-by-4 FRI) +//! 1. Commit f₀ on D₀ (size n) → R₀. Absorb R₀. +//! 2. For each round r: +//! - Squeeze β₀, β₁, β₂ → fold f_r → f_{r+1} on D_{r+1} (size n/4). +//! - Commit f_{r+1} → R_{r+1}. Absorb R_{r+1}. +//! - **[STIR]** Squeeze check_base_r (→ position in D_r), squeeze gamma_r. +//! - **[STIR]** Open 4-coset at check_base_r in tree_r. Compute shiftcheck +//! (Lagrange eval at gamma_r). Absorb shiftcheck. +//! 3. Reveal final polynomial. Absorb coefficients. +//! +//! ### Query phase (fewer queries than FRI) +//! - Squeeze Q_stir query indices (Q_stir = `active_stir_query_rounds()` ≈ Q_fri/2). +//! - For each query: standard fold-by-4 chain traversal through all rounds. +//! +//! ## Soundness +//! Each per-round subdomain check contributes log₂(blowup) = 2 bits of soundness. +//! With R rounds and Q_stir queries: +//! combined ≈ Q_stir × log₂(blowup) + R × log₂(blowup) bits +//! Per the STIR paper, Q_stir = Q_fri/2 suffices for equivalent soundness. +//! +//! ## Integration with DEEP-FRI +//! `StirInnerProof` is a drop-in replacement for `FriProofBy4` inside +//! `DeepFriProof::fri_proof` (via `FriFoldProof::Stir`). The prover and verifier +//! dispatch to this module when `HYPER_SNARK_PROTOCOL=stir`. + +use crate::domain::{batch_inverse, FriDomainSequence}; +use crate::errors::{HyperSnarkError, Result}; +use crate::field::FieldElement; +use crate::folding::fri_fold_by4; +use crate::fri::{FriLayer, FriLayerQueryBy4, FriProver, FriQueryBy4, FRI_FINAL_DEGREE_BY4}; +use crate::merkle::{ + build_compact_pool, to_compact_path, CompactPath, Digest, MerkleProof, MerkleTree, +}; +use crate::polynomial::Polynomial; +use crate::sponge::CryptoSponge; +use rayon::prelude::*; +use serde::{Deserialize, Serialize}; +use std::time::Instant; + +// --------------------------------------------------------------------------- +// STIR-specific structs +// --------------------------------------------------------------------------- + +/// Per-round STIR subdomain check. +/// +/// After committing round r+1, a random 4-coset position is derived from +/// the Fiat-Shamir transcript and opened in round r's tree. The shiftcheck +/// provides a Lagrange-based binding that contributes additional soundness. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct StirRound { + /// Base index j: coset positions are (j, j+m, j+2m, j+3m) in D_r. + pub check_base: u32, + /// f_r evaluations at the 4 coset positions. + pub coset_evals: [FieldElement; 4], + /// Compact Merkle paths in tree_r for the 4 positions. + /// Sibling indices reference the caller-supplied shared pool. + pub paths: [CompactPath; 4], + /// Lagrange evaluation: Σ_k L_k(gamma_r) · coset_evals[k] + /// where gamma_r is a Fiat-Shamir challenge and L_k is the Lagrange basis + /// for the k-th element of the 4-coset in D_r. + pub shiftcheck: FieldElement, +} + +/// Complete STIR inner proof — drop-in replacement for `FriProofBy4`. +/// +/// Retains the fold-by-4 structure (same `layers`, `final_poly`, `queries`) +/// but adds: +/// - `stir_rounds`: per-round subdomain checks (one per fold round) +/// - Fewer `queries` (`Q_stir ≈ Q_fri/2`) +/// +/// The FRI query chains in `queries` use indices into the *caller-supplied* pool +/// (i.e. `DeepFriProof::shared_nodes[..fri_pool_len]`), NOT stored internally — +/// same as `FriProofBy4`. This avoids doubling the pool storage. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct StirInnerProof { + /// Merkle root of the initial polynomial f₀ on D₀. + pub initial_commitment: Digest, + /// Per-round Merkle roots (layers 1..k). + pub layers: Vec, + /// Per-round STIR subdomain checks. + pub stir_rounds: Vec, + /// Final low-degree polynomial (revealed when degree ≤ FRI_FINAL_DEGREE_BY4). + pub final_poly: Polynomial, + /// Q_stir fold-by-4 query chains. Sibling indices reference the + /// caller-supplied FRI pool (NOT stored here — avoids duplication). + pub queries: Vec, + pub degree_bound: usize, + pub initial_domain_size: usize, +} + +impl StirInnerProof { + /// Approximate serialized size in bytes. + pub fn byte_size(&self) -> usize { + let query_data: usize = self + .queries + .iter() + .map(|q| { + 8 + q + .layer_proofs + .iter() + .map(|lp| { + 4 * 32 + + lp.path0.wire_size() + + lp.path1.wire_size() + + lp.path2.wire_size() + + lp.path3.wire_size() + }) + .sum::() + }) + .sum(); + let stir_data: usize = self + .stir_rounds + .iter() + .map(|r| { + 4 // check_base + + 4 * 32 // coset_evals + + r.paths[0].wire_size() + + r.paths[1].wire_size() + + r.paths[2].wire_size() + + r.paths[3].wire_size() + + 32 // shiftcheck + }) + .sum(); + 32 // initial_commitment + + 8 + self.layers.len() * (32 + 8) // layers + + 8 + stir_data // stir_rounds + + 8 + self.final_poly.len() * 32 // final_poly + + 8 + query_data // queries + // NOTE: all shared Merkle nodes are accounted for by DeepFriProof::shared_nodes. + + 8 + 8 // degree_bound + initial_domain_size + } +} + +// --------------------------------------------------------------------------- +// Internal raw types (before sibling deduplication) +// --------------------------------------------------------------------------- + +struct RawStirRound { + check_base: u32, + coset_evals: [FieldElement; 4], + paths: [MerkleProof; 4], + shiftcheck: FieldElement, +} + +struct RawFriLayerQuery { + eval0: FieldElement, + eval1: FieldElement, + eval2: FieldElement, + eval3: FieldElement, + paths: [MerkleProof; 4], +} + +struct RawFriQuery { + index: usize, + layers: Vec, +} + +// --------------------------------------------------------------------------- +// Phase timing helpers (mirror of prover.rs pattern) +// --------------------------------------------------------------------------- + +fn phase_timing_enabled() -> bool { + std::env::var("HYPER_SNARK_PHASE_TIMING").as_deref() == Ok("1") +} + +macro_rules! phase_time { + ($label:expr, $ms:expr) => { + if phase_timing_enabled() { + eprintln!(" [profile] {:35} {:8.2} ms", $label, $ms); + } + }; +} + +// --------------------------------------------------------------------------- +// Lagrange helper +// --------------------------------------------------------------------------- + +/// Evaluate Σ_k L_k(gamma) · evals[k] for the fold-by-4 coset at position j. +/// +/// The 4 coset points are: +/// x0 = ω^j, x1 = ι₄·ω^j, x2 = -ω^j, x3 = -ι₄·ω^j +/// where ι₄ = ω^{n/4} (primitive 4th root of unity for domain of size n). +/// +/// L_k(γ) = Π_{i≠k} (γ - x_i) / (x_k - x_i) +fn lagrange_eval_at_coset( + domain: &crate::domain::Domain, + j: usize, + m: usize, + evals: &[FieldElement; 4], + gamma: FieldElement, +) -> Result { + let x0 = domain.points[j]; + let x1 = domain.points[j + m]; + let x2 = domain.points[j + 2 * m]; + let x3 = domain.points[j + 3 * m]; + let xs = [x0, x1, x2, x3]; + + // Compute numerators and denominators for all 4 Lagrange basis evaluations. + // L_k(gamma) = Π_{i≠k}(gamma - x_i) / Π_{i≠k}(x_k - x_i) + let mut nums = [FieldElement::one(); 4]; + let mut dens = [FieldElement::one(); 4]; + for k in 0..4 { + for i in 0..4 { + if i != k { + nums[k] = nums[k].mul(&gamma.sub(&xs[i])); + dens[k] = dens[k].mul(&xs[k].sub(&xs[i])); + } + } + } + + // Batch inversion: one Montgomery trick instead of 4 independent modular + // exponentiations. For 4 field elements this saves ~3 full Fermat inverses. + let inv_dens = batch_inverse(&dens)?; + + // Σ L_k(gamma) · eval_k + let mut result = FieldElement::zero(); + for k in 0..4 { + result = result.add(&nums[k].mul(&inv_dens[k]).mul(&evals[k])); + } + Ok(result) +} + +// --------------------------------------------------------------------------- +// StirProver +// --------------------------------------------------------------------------- + +/// STIR prover using fold-by-4. Used from DEEP-FRI when `HYPER_SNARK_PROTOCOL=stir`. +pub struct StirProver; + +impl StirProver { + /// Prove from pre-computed evaluations on a subgroup domain using STIR. + /// + /// Returns `(proof, fri_pool)` where `fri_pool` is the deduplicated Merkle + /// sibling pool for the FRI query chains (to be merged into the DEEP-FRI + /// unified pool by the caller). + pub fn prove_from_evals( + initial_evals: Vec, + degree_bound: usize, + sponge: &mut CryptoSponge, + ) -> Result<(StirInnerProof, Vec)> { + let n = initial_evals.len(); + if n < 4 || !n.is_power_of_two() { + return Err(HyperSnarkError::InvalidInput( + "stir prove_from_evals: initial_evals must be a power-of-two ≥ 4".into(), + )); + } + + log::debug!( + "[STIR] proving degree≤{} from {} evals (fold-by-4 + per-round subdomain checks)", + degree_bound, + n + ); + + let domain_seq = FriDomainSequence::build_subgroup_by4(n, FRI_FINAL_DEGREE_BY4 + 1)?; + let num_rounds = domain_seq.num_rounds(); + log::debug!("[STIR] {} fold-by-4 rounds", num_rounds); + + // ----------------------------------------------------------------------- + // Commit phase — identical to FriProverBy4 + // ----------------------------------------------------------------------- + let initial_tree = MerkleTree::build(&initial_evals); + let initial_commitment = initial_tree.root(); + sponge.absorb_digest(b"stir-commit-0", &initial_commitment); + + let mut all_trees: Vec = vec![initial_tree]; + let mut all_evals: Vec> = vec![initial_evals.clone()]; + let mut current_evals = initial_evals; + let mut folding_challenges: Vec<(FieldElement, FieldElement, FieldElement)> = + Vec::with_capacity(num_rounds); + let mut layers: Vec = Vec::with_capacity(num_rounds); + + let t_commit = Instant::now(); + for round in 0..num_rounds { + let beta0 = sponge.squeeze_field_element(b"stir-fold-b0"); + let beta1 = sponge.squeeze_field_element(b"stir-fold-b1"); + let beta2 = sponge.squeeze_field_element(b"stir-fold-b2"); + folding_challenges.push((beta0, beta1, beta2)); + + let domain = &domain_seq.domains[round]; + let folded_evals = fri_fold_by4(¤t_evals, domain, &beta0, &beta1, &beta2)?; + + let tree = MerkleTree::build(&folded_evals); + let root = tree.root(); + layers.push(FriLayer { + merkle_root: root, + domain_size: folded_evals.len(), + }); + sponge.absorb_digest(format!("stir-commit-{}", round + 1).as_bytes(), &root); + + all_trees.push(tree); + all_evals.push(folded_evals.clone()); + current_evals = folded_evals; + } + + phase_time!( + "stir commit (fold+tree all rounds)", + t_commit.elapsed().as_secs_f64() * 1000.0 + ); + + // Final polynomial. + let final_domain = domain_seq.final_domain(); + let final_poly = + FriProver::recover_polynomial_from_evals_pub(¤t_evals, final_domain)?; + log::debug!( + "[STIR] final poly degree: {}", + final_poly.degree().unwrap_or(0) + ); + sponge.absorb_field_elements(b"stir-final-poly", &final_poly.coeffs); + + // ----------------------------------------------------------------------- + // STIR per-round subdomain checks + // ----------------------------------------------------------------------- + // For each fold round r, squeeze a random coset base check_base_r (in D_r), + // open the 4 coset positions in tree_r, and compute the shiftcheck. + // The check_base and gamma are squeezed AFTER committing all rounds and + // the final polynomial, so they are independent of the per-query indices. + let mut raw_stir: Vec = Vec::with_capacity(num_rounds); + let mut t_paths_total = 0f64; + let mut t_sc_total = 0f64; + + for round in 0..num_rounds { + let n_r = domain_seq.domains[round].size; // size of D_r + let m_r = n_r / 4; // quarter size + + // Squeeze random coset base index in [0, m_r). + let j = sponge.squeeze_index(format!("stir-check-{}", round).as_bytes(), m_r); + + let e0 = all_evals[round][j]; + let e1 = all_evals[round][j + m_r]; + let e2 = all_evals[round][j + 2 * m_r]; + let e3 = all_evals[round][j + 3 * m_r]; + let evals = [e0, e1, e2, e3]; + + let t_paths = Instant::now(); + let paths = [ + all_trees[round].authentication_path(j)?, + all_trees[round].authentication_path(j + m_r)?, + all_trees[round].authentication_path(j + 2 * m_r)?, + all_trees[round].authentication_path(j + 3 * m_r)?, + ]; + t_paths_total += t_paths.elapsed().as_secs_f64() * 1000.0; + + // Shiftcheck: Lagrange evaluation at random gamma_r. + let gamma = sponge.squeeze_field_element(format!("stir-gamma-{}", round).as_bytes()); + let t_sc = Instant::now(); + let shiftcheck = + lagrange_eval_at_coset(&domain_seq.domains[round], j, m_r, &evals, gamma)?; + t_sc_total += t_sc.elapsed().as_secs_f64() * 1000.0; + // Absorb shiftcheck to bind subsequent challenges to this value. + sponge.absorb_field_elements(format!("stir-sc-{}", round).as_bytes(), &[shiftcheck]); + + raw_stir.push(RawStirRound { + check_base: j as u32, + coset_evals: evals, + paths, + shiftcheck, + }); + } + phase_time!("stir_round_open_paths (all rounds)", t_paths_total); + phase_time!("stir_round_shiftcheck (all rounds)", t_sc_total); + + // Flatten STIR per-round paths. + let stir_flat_paths: Vec = raw_stir + .iter() + .flat_map(|r| r.paths.iter().cloned()) + .collect(); + + // ----------------------------------------------------------------------- + // Query phase — fold-by-4 chains with Q_stir queries + // ----------------------------------------------------------------------- + let q_stir = crate::config::active_stir_query_rounds(); + let query_indices = sponge.squeeze_indices(b"stir-queries", q_stir, n); + + let t_queries = Instant::now(); + let raw_queries_res: Vec> = query_indices + .par_iter() + .map(|&idx| build_raw_query(idx, &all_evals, &all_trees, &domain_seq)) + .collect(); + let mut raw_queries: Vec = Vec::with_capacity(q_stir); + for r in raw_queries_res { + raw_queries.push(r?); + } + phase_time!( + "stir_query_chain_paths", + t_queries.elapsed().as_secs_f64() * 1000.0 + ); + + // Flatten all FRI query paths. + let paths_per_query = num_rounds * 4; + let flat_fri_paths: Vec = raw_queries + .iter() + .flat_map(|rq| { + rq.layers.iter().flat_map(|rl| { + [ + rl.paths[0].clone(), + rl.paths[1].clone(), + rl.paths[2].clone(), + rl.paths[3].clone(), + ] + }) + }) + .collect(); + // Global dedup pool for BOTH STIR per-round checks and FRI query chains. + // This avoids two independent pool passes and deduplicates across both sets. + let stir_path_count = stir_flat_paths.len(); + let mut all_paths: Vec = + Vec::with_capacity(stir_path_count + flat_fri_paths.len()); + all_paths.extend(stir_flat_paths); + all_paths.extend(flat_fri_paths); + let t_pool = Instant::now(); + let (shared_pool, all_sib_indices) = build_compact_pool(&all_paths); + phase_time!( + "stir build_compact_pool", + t_pool.elapsed().as_secs_f64() * 1000.0 + ); + log::debug!( + "[STIR] unified pool: {} unique digests (from {} total path siblings)", + shared_pool.len(), + all_paths.iter().map(|p| p.siblings.len()).sum::() + ); + + let stir_rounds: Vec = raw_stir + .into_iter() + .enumerate() + .map(|(ri, raw)| { + let pb = ri * 4; + StirRound { + check_base: raw.check_base, + coset_evals: raw.coset_evals, + paths: [ + to_compact_path(&all_paths[pb], all_sib_indices[pb].clone()), + to_compact_path(&all_paths[pb + 1], all_sib_indices[pb + 1].clone()), + to_compact_path(&all_paths[pb + 2], all_sib_indices[pb + 2].clone()), + to_compact_path(&all_paths[pb + 3], all_sib_indices[pb + 3].clone()), + ], + shiftcheck: raw.shiftcheck, + } + }) + .collect(); + + let queries: Vec = raw_queries + .into_iter() + .enumerate() + .map(|(qi, rq)| { + let base = qi * paths_per_query; + let fri_offset = stir_path_count; + let layer_proofs: Vec = rq + .layers + .into_iter() + .enumerate() + .map(|(ri, rl)| { + let pb = fri_offset + base + ri * 4; + FriLayerQueryBy4 { + eval0: rl.eval0, + eval1: rl.eval1, + eval2: rl.eval2, + eval3: rl.eval3, + path0: to_compact_path(&all_paths[pb], all_sib_indices[pb].clone()), + path1: to_compact_path( + &all_paths[pb + 1], + all_sib_indices[pb + 1].clone(), + ), + path2: to_compact_path( + &all_paths[pb + 2], + all_sib_indices[pb + 2].clone(), + ), + path3: to_compact_path( + &all_paths[pb + 3], + all_sib_indices[pb + 3].clone(), + ), + } + }) + .collect(); + FriQueryBy4 { + index: rq.index, + layer_proofs, + } + }) + .collect(); + + Ok(( + StirInnerProof { + initial_commitment, + layers, + stir_rounds, + final_poly, + queries, + degree_bound, + initial_domain_size: n, + }, + shared_pool, + )) + } +} + +/// Build one fold-by-4 raw query (before sibling deduplication). +fn build_raw_query( + initial_idx: usize, + all_evals: &[Vec], + all_trees: &[MerkleTree], + domain_seq: &FriDomainSequence, +) -> Result { + let num_rounds = domain_seq.num_rounds(); + let mut layers = Vec::with_capacity(num_rounds); + let mut idx = initial_idx; + + for round in 0..num_rounds { + let sz = domain_seq.domains[round].size; + let m = sz / 4; + let j = idx % m; + let j0 = j; + let j1 = j + m; + let j2 = j + 2 * m; + let j3 = j + 3 * m; + + let evals = &all_evals[round]; + let tree = &all_trees[round]; + + layers.push(RawFriLayerQuery { + eval0: evals[j0], + eval1: evals[j1], + eval2: evals[j2], + eval3: evals[j3], + paths: [ + tree.authentication_path(j0)?, + tree.authentication_path(j1)?, + tree.authentication_path(j2)?, + tree.authentication_path(j3)?, + ], + }); + + idx = j; + } + + Ok(RawFriQuery { + index: initial_idx, + layers, + }) +} + +// --------------------------------------------------------------------------- +// StirVerifier +// --------------------------------------------------------------------------- + +/// STIR verifier for fold-by-4 proofs. +pub struct StirVerifier; + +impl StirVerifier { + /// Verify a STIR proof generated by `StirProver::prove_from_evals`. + /// + /// - `shared_nodes`: unified sibling pool sourced from + /// `DeepFriProof::shared_nodes[..fri_pool_len]`. + pub fn verify_subgroup( + proof: &StirInnerProof, + shared_nodes: &[Digest], + sponge: &mut CryptoSponge, + num_queries: usize, + ) -> Result { + let num_rounds = proof.layers.len(); + + let domain_seq = FriDomainSequence::build_subgroup_by4( + proof.initial_domain_size, + FRI_FINAL_DEGREE_BY4 + 1, + )?; + if domain_seq.num_rounds() != num_rounds { + log::debug!( + "[STIR] domain rounds mismatch: expected {}, proof has {}", + domain_seq.num_rounds(), + num_rounds + ); + return Ok(false); + } + + // ----------------------------------------------------------------------- + // Step 1: Re-derive folding challenges and re-absorb commitments + // ----------------------------------------------------------------------- + sponge.absorb_digest(b"stir-commit-0", &proof.initial_commitment); + + let mut challenges: Vec<(FieldElement, FieldElement, FieldElement)> = + Vec::with_capacity(num_rounds); + for (round, layer) in proof.layers.iter().enumerate() { + let b0 = sponge.squeeze_field_element(b"stir-fold-b0"); + let b1 = sponge.squeeze_field_element(b"stir-fold-b1"); + let b2 = sponge.squeeze_field_element(b"stir-fold-b2"); + challenges.push((b0, b1, b2)); + sponge.absorb_digest( + format!("stir-commit-{}", round + 1).as_bytes(), + &layer.merkle_root, + ); + } + + // ----------------------------------------------------------------------- + // Step 2: Verify final polynomial degree + // ----------------------------------------------------------------------- + let expected_final_degree = proof.degree_bound >> (2 * num_rounds); + let actual_final_degree = proof.final_poly.degree().unwrap_or(0); + if actual_final_degree > expected_final_degree.max(FRI_FINAL_DEGREE_BY4) { + log::debug!( + "[STIR] final degree {} exceeds bound {}", + actual_final_degree, + expected_final_degree + ); + return Ok(false); + } + sponge.absorb_field_elements(b"stir-final-poly", &proof.final_poly.coeffs); + + // ----------------------------------------------------------------------- + // Step 3: Verify per-round STIR subdomain checks + // ----------------------------------------------------------------------- + if proof.stir_rounds.len() != num_rounds { + log::debug!( + "[STIR] stir_rounds count mismatch: expected {}, got {}", + num_rounds, + proof.stir_rounds.len() + ); + return Ok(false); + } + + let all_roots: Vec = std::iter::once(proof.initial_commitment) + .chain(proof.layers.iter().map(|l| l.merkle_root)) + .collect(); + + for (round, sr) in proof.stir_rounds.iter().enumerate() { + let n_r = domain_seq.domains[round].size; + let m_r = n_r / 4; + + // Re-derive check_base from sponge. + let expected_j = sponge.squeeze_index(format!("stir-check-{}", round).as_bytes(), m_r); + if sr.check_base as usize != expected_j { + log::debug!( + "[STIR] round {} check_base mismatch: expected {}, got {}", + round, + expected_j, + sr.check_base + ); + return Ok(false); + } + + let j = sr.check_base as usize; + let root = all_roots[round]; + + // Verify the 4 Merkle paths against tree_r's root. + let expected_indices = [j, j + m_r, j + 2 * m_r, j + 3 * m_r]; + for (k, &expected_idx) in expected_indices.iter().enumerate() { + if sr.paths[k].leaf_index as usize != expected_idx { + log::debug!("[STIR] round {} path[{}] leaf_index mismatch", round, k); + return Ok(false); + } + if !sr.paths[k].verify_field_element(&sr.coset_evals[k], &root, shared_nodes) { + log::debug!( + "[STIR] round {} path[{}] Merkle verification failed", + round, + k + ); + return Ok(false); + } + } + + // Re-derive gamma and recompute shiftcheck. + let gamma = sponge.squeeze_field_element(format!("stir-gamma-{}", round).as_bytes()); + let expected_sc = + lagrange_eval_at_coset(&domain_seq.domains[round], j, m_r, &sr.coset_evals, gamma)?; + if expected_sc != sr.shiftcheck { + log::debug!("[STIR] round {} shiftcheck mismatch", round); + return Ok(false); + } + + // Absorb shiftcheck to keep sponge state in sync. + sponge.absorb_field_elements(format!("stir-sc-{}", round).as_bytes(), &[sr.shiftcheck]); + } + + // ----------------------------------------------------------------------- + // Step 4: Verify FRI query chains (Q_stir queries, same as FriVerifierBy4) + // ----------------------------------------------------------------------- + let query_rounds = num_queries; + let expected_indices = + sponge.squeeze_indices(b"stir-queries", query_rounds, proof.initial_domain_size); + if proof.queries.len() != query_rounds { + log::debug!("[STIR] wrong query count"); + return Ok(false); + } + + // Use the caller-supplied fri_shared_nodes pool for the FRI query paths. + for (q_idx, query) in proof.queries.iter().enumerate() { + if query.index != expected_indices[q_idx] { + log::debug!("[STIR] query {} index mismatch", q_idx); + return Ok(false); + } + if !verify_fri_query( + query, + proof, + shared_nodes, + &domain_seq, + &challenges, + &all_roots, + )? { + log::debug!("[STIR] query {} failed", q_idx); + return Ok(false); + } + } + + log::debug!( + "[STIR] all {} queries + {} per-round checks passed ✓", + query_rounds, + num_rounds + ); + Ok(true) + } +} + +/// Verify one fold-by-4 FRI query chain against the STIR round commitments. +/// Identical in logic to `FriVerifierBy4::verify_query` — reused here to avoid +/// circular dependency on the `fri` module's private method. +fn verify_fri_query( + query: &FriQueryBy4, + proof: &StirInnerProof, + fri_shared: &[Digest], + domain_seq: &FriDomainSequence, + challenges: &[(FieldElement, FieldElement, FieldElement)], + all_roots: &[Digest], +) -> Result { + let four = FieldElement::from_u64(4); + let four_inv = four.inverse()?; + let mut idx = query.index; + + for (round, lq) in query.layer_proofs.iter().enumerate() { + let domain = &domain_seq.domains[round]; + let sz = domain.size; + let m = sz / 4; + let j = idx % m; + let j0 = j; + let j1 = j + m; + let j2 = j + 2 * m; + let j3 = j + 3 * m; + + let root = all_roots[round]; + + // Verify Merkle paths. + if lq.path0.leaf_index != j0 as u32 + || !lq.path0.verify_field_element(&lq.eval0, &root, fri_shared) + { + log::debug!("[STIR] query round {} path0 failed", round); + return Ok(false); + } + if lq.path1.leaf_index != j1 as u32 + || !lq.path1.verify_field_element(&lq.eval1, &root, fri_shared) + { + log::debug!("[STIR] query round {} path1 failed", round); + return Ok(false); + } + if lq.path2.leaf_index != j2 as u32 + || !lq.path2.verify_field_element(&lq.eval2, &root, fri_shared) + { + log::debug!("[STIR] query round {} path2 failed", round); + return Ok(false); + } + if lq.path3.leaf_index != j3 as u32 + || !lq.path3.verify_field_element(&lq.eval3, &root, fri_shared) + { + log::debug!("[STIR] query round {} path3 failed", round); + return Ok(false); + } + + // Compute expected fold. + let x = domain.points[j0]; + let i4 = domain.generator.pow_u64(m as u64); + + let x_inv = x.inverse()?; + let four_x_inv = four_inv.mul(&x_inv); + let four_x2_inv = four_x_inv.mul(&x_inv); + let four_x3_inv = four_x2_inv.mul(&x_inv); + + let i4_v1 = i4.mul(&lq.eval1); + let i4_v3 = i4.mul(&lq.eval3); + + let f0 = lq + .eval0 + .add(&lq.eval1) + .add(&lq.eval2) + .add(&lq.eval3) + .mul(&four_inv); + let f1 = lq + .eval0 + .sub(&i4_v1) + .sub(&lq.eval2) + .add(&i4_v3) + .mul(&four_x_inv); + let f2 = lq + .eval0 + .sub(&lq.eval1) + .add(&lq.eval2) + .sub(&lq.eval3) + .mul(&four_x2_inv); + let f3 = lq + .eval0 + .add(&i4_v1) + .sub(&lq.eval2) + .sub(&i4_v3) + .mul(&four_x3_inv); + + let (b0, b1, b2) = challenges[round]; + let expected_fold = f0.add(&b0.mul(&f1)).add(&b1.mul(&f2)).add(&b2.mul(&f3)); + + let next_j = j; + + if round < query.layer_proofs.len() - 1 { + let next_lq = &query.layer_proofs[round + 1]; + let next_m = domain_seq.domains[round + 1].size / 4; + let next_j_base = next_j % next_m; + let q = next_j / next_m; + + if next_lq.path0.leaf_index != next_j_base as u32 { + log::debug!("[STIR] query round {} next-layer index mismatch", round); + return Ok(false); + } + + let next_eval = match q { + 0 => next_lq.eval0, + 1 => next_lq.eval1, + 2 => next_lq.eval2, + 3 => next_lq.eval3, + _ => unreachable!(), + }; + if next_eval != expected_fold { + log::debug!("[STIR] query round {} fold mismatch (slot {})", round, q); + return Ok(false); + } + } else { + let final_domain = domain_seq.final_domain(); + let final_idx = next_j % final_domain.size; + let final_x = final_domain.element(final_idx); + let final_eval = proof.final_poly.evaluate(final_x); + if final_eval != expected_fold { + log::debug!("[STIR] final poly mismatch at round {}", round); + return Ok(false); + } + } + + idx = j; + } + + Ok(true) +} + +// Compile-time invariants on STIR constants. +const _: () = { + assert!( + crate::config::STIR_QUERY_ROUNDS >= 8, + "STIR should use at least 8 queries" + ); + assert!( + crate::config::STIR_QUERY_ROUNDS <= crate::config::FRI_QUERY_ROUNDS, + "STIR uses ≤ FRI query rounds" + ); +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::active_stir_query_rounds; + use crate::field::{primitive_root_of_unity, FieldElement}; + use crate::polynomial::Polynomial; + use crate::sponge::CryptoSponge; + + fn make_polynomial_evals(degree: usize, domain_size: usize) -> Vec { + let poly = Polynomial::new( + (0..=degree) + .map(|i| FieldElement::from_u64((i + 1) as u64)) + .collect(), + ); + let omega = primitive_root_of_unity(domain_size).unwrap(); + let mut x = FieldElement::one(); + (0..domain_size) + .map(|_| { + let e = poly.evaluate(&x); + x = x.mul(&omega); + e + }) + .collect() + } + + #[test] + fn unit_stir_lagrange_eval_at_coset_consistency() { + // For a degree-3 polynomial exactly defined by 4 coset points, + // the Lagrange evaluation at any other point should match. + let n = 64usize; + let domain = crate::domain::Domain::new_subgroup(n).unwrap(); + let m = n / 4; + let j = 5usize; + + let evals = [ + domain.points[j].mul(&FieldElement::from_u64(2)), + domain.points[j + m].mul(&FieldElement::from_u64(3)), + domain.points[j + 2 * m].sub(&FieldElement::one()), + domain.points[j + 3 * m].add(&FieldElement::from_u64(7)), + ]; + + // At x0, L0(x0) = 1 and L_{k≠0}(x0) = 0, so result = evals[0]. + let at_x0 = lagrange_eval_at_coset(&domain, j, m, &evals, domain.points[j]).unwrap(); + assert_eq!(at_x0, evals[0], "Lagrange at x0 must equal evals[0]"); + + // At x1: result = evals[1]. + let at_x1 = lagrange_eval_at_coset(&domain, j, m, &evals, domain.points[j + m]).unwrap(); + assert_eq!(at_x1, evals[1], "Lagrange at x1 must equal evals[1]"); + } + + #[test] + fn unit_stir_prove_verify_roundtrip_small() { + // Degree 4, domain size 64 (blowup 4 × 4 = 16 samples effectively, but + // we use 64 to ensure enough room for fold-by-4 rounds). + let domain_size = 64usize; + let degree = 4; + let evals = make_polynomial_evals(degree, domain_size); + + let mut prover_sponge = CryptoSponge::new(b"test-stir"); + let (proof, fri_pool) = + StirProver::prove_from_evals(evals, degree + 1, &mut prover_sponge).unwrap(); + + let q_stir = active_stir_query_rounds(); + + let mut verifier_sponge = CryptoSponge::new(b"test-stir"); + let ok = + StirVerifier::verify_subgroup(&proof, &fri_pool, &mut verifier_sponge, q_stir).unwrap(); + assert!(ok, "STIR proof should verify successfully"); + } + + #[test] + fn unit_stir_prove_verify_roundtrip_medium() { + // Degree 16, domain size 256. + let domain_size = 256usize; + let degree = 16; + let evals = make_polynomial_evals(degree, domain_size); + + let mut prover_sponge = CryptoSponge::new(b"test-stir-medium"); + let (proof, fri_pool) = + StirProver::prove_from_evals(evals, degree + 1, &mut prover_sponge).unwrap(); + + let q_stir = active_stir_query_rounds(); + + let mut verifier_sponge = CryptoSponge::new(b"test-stir-medium"); + let ok = + StirVerifier::verify_subgroup(&proof, &fri_pool, &mut verifier_sponge, q_stir).unwrap(); + assert!(ok, "STIR medium proof should verify"); + } + + #[test] + fn unit_stir_byte_size_smaller_than_equivalent_fri() { + // Verify that a STIR proof with half queries is smaller than + // an FRI proof with full queries on the same input. + let domain_size = 256usize; + let degree = 16; + let evals = make_polynomial_evals(degree, domain_size); + + let mut sponge_stir = CryptoSponge::new(b"test-size-stir"); + let (stir_proof, _) = + StirProver::prove_from_evals(evals.clone(), degree + 1, &mut sponge_stir).unwrap(); + let stir_size = stir_proof.byte_size(); + + // Build an equivalent FriProofBy4 with the same domain (uses full Q=20 queries). + use crate::fri::FriProverBy4; + let mut sponge_fri = CryptoSponge::new(b"test-size-fri"); + let (fri_proof, _) = + FriProverBy4::prove_from_evals(evals, degree + 1, &mut sponge_fri).unwrap(); + // FriProofBy4 byte_size doesn't include stir extras, just FRI data: + let fri_raw = 32 + + 8 + + fri_proof.layers.len() * (32 + 8) + + 8 + + fri_proof.final_poly.len() * 32 + + fri_proof + .queries + .iter() + .map(|q| { + 8 + q + .layer_proofs + .iter() + .map(|lp| { + 4 * 32 + + lp.path0.wire_size() + + lp.path1.wire_size() + + lp.path2.wire_size() + + lp.path3.wire_size() + }) + .sum::() + }) + .sum::() + + 8 + + 8; + + log::debug!( + "[STIR size test] STIR: {} bytes, FRI raw: {} bytes", + stir_size, + fri_raw + ); + // STIR with half queries should be smaller overall. + // (The per-round checks add some data, but fewer queries save more.) + assert!( + stir_size < fri_raw * 2, + "STIR proof ({} bytes) should not be pathologically larger than FRI ({} bytes)", + stir_size, + fri_raw + ); + } +} diff --git a/src/verifier.rs b/src/verifier.rs index 0b716c3..5eaeeb1 100644 --- a/src/verifier.rs +++ b/src/verifier.rs @@ -28,7 +28,6 @@ //! - Merkle verification per query: O(log n) hashes. //! - 40 queries × O(log n) = O(log n) total. -use crate::config::active_pow_bits; use crate::deep_fri::DeepFriVerifier; use crate::errors::Result; use crate::field::{primitive_root_of_unity, FieldElement}; @@ -124,8 +123,11 @@ impl Verifier { h.as_bytes()[2], h.as_bytes()[3], ]); - if word.leading_zeros() < active_pow_bits() { - log::debug!("PoW nonce verification failed"); + if word.leading_zeros() < proof.pow_bits { + log::debug!( + "PoW nonce verification failed (need {} leading zeros)", + proof.pow_bits + ); return Ok(false); } } @@ -150,6 +152,7 @@ impl Verifier { &domain_generator, proof.domain_size, &mut ldt_sponge, + proof.num_queries, )? { log::debug!("LDT proximity proof (DEEP-FRI) failed"); return Ok(false); @@ -206,4 +209,27 @@ mod tests { let result = Verifier::verify(&proof, &proof.public_inputs.clone()); assert!(result.is_err() || !result.unwrap()); } + + #[test] + fn unit_proof_is_self_describing() { + // The proof header must record the active num_queries and pow_bits so + // the verifier never needs to read HYPER_SNARK_PROFILE from env. + let (r1cs, witness) = example_witness(3, 4); + let proof = Prover::prove(&r1cs, &witness).unwrap(); + + // Sanity: fields are populated with the active-profile values. + assert!(proof.num_queries > 0); + assert!(proof.pow_bits > 0); + + // Verifier uses proof.pow_bits, NOT the env var. + // Temporarily set a conflicting env var; verify must still pass. + std::env::set_var("HYPER_SNARK_PROFILE", "compact"); + let pub_in = proof.public_inputs.clone(); + let ok = Verifier::verify(&proof, &pub_in).unwrap(); + std::env::remove_var("HYPER_SNARK_PROFILE"); + assert!( + ok, + "Verifier should use proof.pow_bits, not the env HYPER_SNARK_PROFILE" + ); + } } diff --git a/tests/merkle_tests.rs b/tests/merkle_tests.rs index 8a63062..9619b10 100644 --- a/tests/merkle_tests.rs +++ b/tests/merkle_tests.rs @@ -97,7 +97,7 @@ mod merkle_tests { let v = values(8); let tree = MerkleTree::build(&v); let mut proof = tree.authentication_path(3).unwrap(); - proof.root = [0u8; 24]; // tamper root + proof.root = [0u8; 16]; // tamper root assert!(!proof.verify(&v[3])); } @@ -106,7 +106,7 @@ mod merkle_tests { let v = values(8); let tree = MerkleTree::build(&v); let mut proof = tree.authentication_path(3).unwrap(); - proof.siblings[0] = [0xffu8; 24]; // tamper first sibling + proof.siblings[0] = [0xffu8; 16]; // tamper first sibling assert!(!proof.verify(&v[3])); } diff --git a/tests/unit_merkle_tests.rs b/tests/unit_merkle_tests.rs index 5eda7e9..ee7c7f9 100644 --- a/tests/unit_merkle_tests.rs +++ b/tests/unit_merkle_tests.rs @@ -89,7 +89,7 @@ mod merkle_extra_tests { #[test] fn unit_proof_byte_size_grows_with_depth() { - // depth-2 tree (4 leaves) vs depth-3 tree (8 leaves) + // depth-2 tree (4 leaves → padded to 4=4¹) vs depth-3 tree (8 leaves → padded to 16=4²) let (tree4, _) = build_tree(4); let (tree8, _) = build_tree(8); let size4 = tree4.authentication_path(0).unwrap().byte_size(); @@ -99,12 +99,12 @@ mod merkle_extra_tests { #[test] fn unit_proof_byte_size_formula() { - // byte_size = 8 + siblings*24 + directions + 24 (OPT-4: 192-bit digests) + // byte_size = 8 + siblings*16 + directions + 16 (Phase A: 128-bit digests) // For 8 leaves padded to 16=4²: depth = log₄(16) = 2, siblings = 3*2 = 6 - // 8 + 6*24 + 2 + 24 = 8 + 144 + 2 + 24 = 178 + // 8 + 6*16 + 2 + 16 = 8 + 96 + 2 + 16 = 122 let (tree, _) = build_tree(8); let proof = tree.authentication_path(0).unwrap(); - assert_eq!(proof.byte_size(), 8 + 6 * 24 + 2 + 24); + assert_eq!(proof.byte_size(), 8 + 6 * 16 + 2 + 16); } // ----------------------------------------------------------------------- @@ -128,7 +128,7 @@ mod merkle_extra_tests { #[test] fn unit_hash_bytes_empty() { let h = hash_bytes(&[]); - assert_ne!(h, [0u8; 24], "hash of empty data should not be all zeros"); + assert_ne!(h, [0u8; 16], "hash of empty data should not be all zeros"); } #[test] diff --git a/tests/zk_tests.rs b/tests/zk_tests.rs index 736b73b..f8be843 100644 --- a/tests/zk_tests.rs +++ b/tests/zk_tests.rs @@ -8,6 +8,11 @@ mod zk_tests { use hyper_snark::r1cs::{example_witness, R1CSInstance, R1CSWitness}; use hyper_snark::verifier::Verifier; + // Mutex that serialises every test that reads or writes HYPER_SNARK_PROTOCOL. + // env::set_var / remove_var are process-global, so parallel tests that mutate + // or depend on that variable must not run concurrently. + static PROTOCOL_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + fn fe(v: u64) -> FieldElement { FieldElement::from_u64(v) } @@ -18,6 +23,7 @@ mod zk_tests { #[test] fn e2e_demo_prove_verify() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // x=3, y=4 → z=12, w=17 let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).expect("Prover should succeed"); @@ -28,6 +34,7 @@ mod zk_tests { #[test] fn e2e_prove_verify_various_inputs() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let cases = [(0, 0), (1, 1), (2, 3), (10, 10), (100, 200)]; for (x, y) in cases { let (r1cs, witness) = example_witness(x, y); @@ -45,6 +52,7 @@ mod zk_tests { #[test] fn e2e_wrong_public_input_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); @@ -60,6 +68,7 @@ mod zk_tests { #[test] fn e2e_tampered_commitment_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -67,7 +76,7 @@ mod zk_tests { // Tamper commit_batch — the verifier binds this in the transcript to derive r. // A forged root → different r → challenge_r mismatch → rejected. let mut bad_proof = proof.clone(); - bad_proof.commit_batch = [0xffu8; 24]; + bad_proof.commit_batch = [0xffu8; 16]; let ok = Verifier::verify(&bad_proof, &pub_in).unwrap(); assert!(!ok, "Tampered commitment should be rejected"); @@ -79,6 +88,7 @@ mod zk_tests { #[test] fn e2e_tampered_evaluation_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -98,6 +108,7 @@ mod zk_tests { #[test] fn e2e_proof_serialization_roundtrip() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(5, 6); let proof = Prover::prove(&r1cs, &witness).unwrap(); @@ -112,6 +123,7 @@ mod zk_tests { #[test] fn e2e_proof_magic_bytes() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(2, 3); let proof = Prover::prove(&r1cs, &witness).unwrap(); let bytes = proof.to_bytes().unwrap(); @@ -120,6 +132,7 @@ mod zk_tests { #[test] fn e2e_proof_from_garbage_fails() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let result = Proof::from_bytes(b"not-a-proof"); assert!(result.is_err()); } @@ -130,6 +143,7 @@ mod zk_tests { #[test] fn e2e_prover_rejects_bad_witness() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, mut witness) = example_witness(3, 4); witness.assignment[3] = fe(99); // corrupt z let result = Prover::prove(&r1cs, &witness); @@ -142,6 +156,7 @@ mod zk_tests { #[test] fn e2e_transcript_determinism() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // The Fiat-Shamir challenge r is derived from the commitment, which changes // every run because witness blinding uses fresh OS randomness. // What MUST hold: both proofs verify correctly, and domain_size + public @@ -169,6 +184,7 @@ mod zk_tests { #[test] fn e2e_custom_single_constraint() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Circuit: a * b = 42 // Variables: [1, a, b] (3 vars) // Public: none (0 public inputs) @@ -192,6 +208,7 @@ mod zk_tests { #[test] fn e2e_proof_size_reasonable() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let bytes = proof.to_bytes().unwrap(); @@ -213,6 +230,7 @@ mod zk_tests { /// causes the polynomial identity a(r)*b(r) - c(r) = h(r)*Z_H(r) to fail. #[test] fn e2e_tampered_vanishing_eval_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -234,6 +252,7 @@ mod zk_tests { #[test] fn e2e_wrong_version_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(1, 1); let proof = Prover::prove(&r1cs, &witness).unwrap(); let mut bytes = proof.to_bytes().unwrap(); @@ -254,6 +273,7 @@ mod zk_tests { #[test] fn e2e_adaptive_blowup_small_circuit_uses_blowup4() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); use hyper_snark::prover::adaptive_blowup; assert_eq!(adaptive_blowup(0), 4); assert_eq!(adaptive_blowup(1), 4); @@ -262,6 +282,7 @@ mod zk_tests { #[test] fn e2e_adaptive_blowup_large_circuit_uses_blowup2() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); use hyper_snark::prover::adaptive_blowup; assert_eq!(adaptive_blowup(4_097), 2); assert_eq!(adaptive_blowup(10_000), 2); @@ -272,6 +293,7 @@ mod zk_tests { /// Uses a synthetic circuit with > 4 096 constraints. #[test] fn e2e_large_circuit_prove_verify_with_reduced_blowup() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); use hyper_snark::prover::adaptive_blowup; // Build a circuit with 5 000 constraints (> 4 096 threshold → blowup=2) @@ -310,6 +332,7 @@ mod zk_tests { /// for a circuit above the threshold. #[test] fn e2e_large_circuit_proof_smaller_than_blowup4_would_give() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Prove a circuit above threshold (blowup=2 is used automatically) let n = 5_000usize; let mut r1cs = R1CSInstance::new(n, n + 1, 0); @@ -340,6 +363,7 @@ mod zk_tests { /// The proof produced by the batch-commit prover passes in full prove+verify. #[test] fn e2e_batch_commit_prove_verify_small() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(2, 3); let proof = Prover::prove(&r1cs, &witness).unwrap(); assert!(Verifier::verify(&proof, &proof.public_inputs.clone()).unwrap()); @@ -348,6 +372,7 @@ mod zk_tests { /// A proof with a non-trivial circuit (n > 4096 → blowup=2) also verifies. #[test] fn e2e_batch_commit_prove_verify_large() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let n = 5_000usize; let mut r1cs = R1CSInstance::new(n, n + 1, 0); let mut assignment = vec![FieldElement::one()]; @@ -365,14 +390,16 @@ mod zk_tests { /// The batch root in the proof is a non-zero 32-byte digest (sanity). #[test] fn e2e_batch_commit_root_is_non_zero() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(5, 7); let proof = Prover::prove(&r1cs, &witness).unwrap(); - assert!(proof.commit_batch != [0u8; 24],); + assert!(proof.commit_batch != [0u8; 16],); } /// Tampering with one evaluation value invalidates the batch proof. #[test] fn e2e_batch_commit_tampered_value_fails_verify() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let mut proof = Prover::prove(&r1cs, &witness).unwrap(); // Flip one of the batch evaluation values. @@ -390,6 +417,7 @@ mod zk_tests { /// se reutilizan fa y fb. El proof debe ser válido de todas formas. #[test] fn e2e_fft_reuse_large_circuit_proves_and_verifies() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // n > 4096 → blowup=2 → mul_domain == commit_domain → FFT reuse path let n = 6_000usize; let mut r1cs = R1CSInstance::new(n, n + 1, 0); @@ -412,6 +440,7 @@ mod zk_tests { /// de commit estándar. El proof también debe ser válido. #[test] fn e2e_fft_no_reuse_small_circuit_proves_and_verifies() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // n ≤ 4096 → blowup=4 → mul_domain ≠ commit_domain → ruta estándar let (r1cs, witness) = example_witness(7, 11); let proof = Prover::prove(&r1cs, &witness).unwrap(); @@ -425,6 +454,7 @@ mod zk_tests { /// — es decir, cada ruta genera un árbol Merkle independiente real. #[test] fn e2e_fft_reuse_and_standard_produce_valid_roots() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Circuito grande — blowup=2 let n = 5_500usize; let mut r1cs_large = R1CSInstance::new(n, n + 1, 0); @@ -437,13 +467,13 @@ mod zk_tests { } let witness_large = R1CSWitness::new(assignment_large); let proof_large = Prover::prove(&r1cs_large, &witness_large).unwrap(); - assert!(proof_large.commit_batch != [0u8; 24]); + assert!(proof_large.commit_batch != [0u8; 16]); assert!(Verifier::verify(&proof_large, &proof_large.public_inputs.clone()).unwrap()); // Circuito pequeño — blowup=4 let (r1cs_small, witness_small) = example_witness(4, 9); let proof_small = Prover::prove(&r1cs_small, &witness_small).unwrap(); - assert!(proof_small.commit_batch != [0u8; 24]); + assert!(proof_small.commit_batch != [0u8; 16]); assert!(Verifier::verify(&proof_small, &proof_small.public_inputs.clone()).unwrap()); // Los roots son distintos (circuitos distintos) @@ -457,6 +487,7 @@ mod zk_tests { /// prove_unchecked con witness válido produce la misma prueba que prove. #[test] fn e2e_prove_unchecked_valid_witness_verifies() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 7); let proof = Prover::prove_unchecked(&r1cs, &witness).unwrap(); assert!( @@ -469,6 +500,7 @@ mod zk_tests { /// (ambos comparten el mismo código de pledging; el check es lo único que difiere). #[test] fn e2e_prove_and_prove_unchecked_same_result() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Since witness blinding uses fresh OS randomness, prove() and prove_unchecked() // produce DIFFERENT proofs for the same witness — that is the expected // behaviour of a zero-knowledge prover. @@ -492,6 +524,7 @@ mod zk_tests { /// prove_unchecked con witness inválido devuelve Err (la identidad polinomial falla). #[test] fn e2e_prove_unchecked_bad_witness_fails() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, _) = example_witness(3, 4); // Witness incorrecto: asignamos valores que no satisfacen los constraints let bad_witness = R1CSWitness::new(vec![ @@ -511,6 +544,7 @@ mod zk_tests { /// prove rechaza witness inválido antes de entrar en prove_unchecked. #[test] fn e2e_prove_rejects_bad_witness_before_unchecked() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, _) = example_witness(3, 4); let bad_witness = R1CSWitness::new(vec![ FieldElement::one(), @@ -534,6 +568,7 @@ mod zk_tests { /// debe producir la misma prueba que antes. #[test] fn e2e_opt6_overlap_path_proves_and_verifies() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // n > 4096 → blowup=2 → mul_domain == commit_domain → can_overlap=true let n = 5_000usize; let mut r1cs = R1CSInstance::new(n, n + 1, 0); @@ -556,6 +591,7 @@ mod zk_tests { /// de fallback también debe seguir funcionando correctamente. #[test] fn e2e_opt6_no_overlap_path_proves_and_verifies() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // n ≤ 4096 → blowup=4 → mul_domain < commit_domain → can_overlap=false let (r1cs, witness) = example_witness(8, 13); let proof = Prover::prove(&r1cs, &witness).unwrap(); @@ -570,6 +606,7 @@ mod zk_tests { /// consistente (prove + verify pasan). #[test] fn e2e_opt6_both_paths_are_consistent() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Overlap path — circuito grande let n = 4_500usize; let mut r1cs_large = R1CSInstance::new(n, n + 1, 0); @@ -583,13 +620,13 @@ mod zk_tests { let witness_large = R1CSWitness::new(assignment_large); let proof_large = Prover::prove(&r1cs_large, &witness_large).unwrap(); assert!(Verifier::verify(&proof_large, &proof_large.public_inputs.clone()).unwrap()); - assert_ne!(proof_large.commit_batch, [0u8; 24]); + assert_ne!(proof_large.commit_batch, [0u8; 16]); // No-overlap path — circuito pequeño let (r1cs_small, witness_small) = example_witness(4, 9); let proof_small = Prover::prove(&r1cs_small, &witness_small).unwrap(); assert!(Verifier::verify(&proof_small, &proof_small.public_inputs.clone()).unwrap()); - assert_ne!(proof_small.commit_batch, [0u8; 24]); + assert_ne!(proof_small.commit_batch, [0u8; 16]); // Circuitos distintos → roots distintos assert_ne!(proof_large.commit_batch, proof_small.commit_batch); @@ -598,6 +635,7 @@ mod zk_tests { /// prove_unchecked con el overlap path también es correcto. #[test] fn e2e_opt6_prove_unchecked_overlap_path() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let n = 4_200usize; let mut r1cs = R1CSInstance::new(n, n + 1, 0); let mut assignment = vec![FieldElement::one()]; @@ -622,6 +660,7 @@ mod zk_tests { /// Flipping a byte in any Merkle sibling invalidates the authentication path. #[test] fn e2e_tampered_merkle_sibling_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -638,6 +677,7 @@ mod zk_tests { /// Changing a per-query polynomial evaluation corrupts the Merkle leaf check. #[test] fn e2e_tampered_query_evaluation_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -658,6 +698,7 @@ mod zk_tests { /// all consistency checks) is correctly rejected. #[test] fn e2e_tampered_challenge_point_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs, witness) = example_witness(3, 4); let proof = Prover::prove(&r1cs, &witness).unwrap(); let pub_in = proof.public_inputs.clone(); @@ -677,6 +718,7 @@ mod zk_tests { /// commit_batch and the evaluation proof — the verifier must detect this. #[test] fn e2e_cross_proof_substitution_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); let (r1cs_a, witness_a) = example_witness(3, 4); let (r1cs_b, witness_b) = example_witness(5, 6); @@ -698,6 +740,7 @@ mod zk_tests { /// proof bytes (Fiat-Shamir transcript is fully deterministic). #[test] fn e2e_large_circuit_proof_validity() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); // Large-circuit regression: two independently blinded proofs for the // same witness must both verify. Byte equality is no longer expected // because witness blinding uses fresh OS randomness per proof. @@ -727,4 +770,106 @@ mod zk_tests { "blinding must produce distinct commitments across runs" ); } + + // ----------------------------------------------------------------------- + // OPT-S6: STIR protocol e2e tests + // ----------------------------------------------------------------------- + + /// STIR: prove + verify works end-to-end with HYPER_SNARK_PROTOCOL=stir. + #[test] + fn e2e_stir_prove_verify_basic() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); + std::env::set_var("HYPER_SNARK_PROTOCOL", "stir"); + let result = (|| -> Result> { + let (r1cs, witness) = example_witness(3, 4); + let proof = Prover::prove(&r1cs, &witness)?; + let pub_in = proof.public_inputs.clone(); + Ok(Verifier::verify(&proof, &pub_in)?) + })(); + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + assert!( + result.expect("STIR prove+verify should not error"), + "STIR proof should verify for (3,4)" + ); + } + + /// STIR: a proof serialized and then deserialized roundtrips correctly. + #[test] + fn e2e_stir_serialization_roundtrip() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); + std::env::set_var("HYPER_SNARK_PROTOCOL", "stir"); + let result = (|| -> Result> { + let (r1cs, witness) = example_witness(5, 6); + let proof = Prover::prove(&r1cs, &witness)?; + let bytes = proof.to_bytes()?; + let recovered = hyper_snark::proof::Proof::from_bytes(&bytes)?; + Ok(Verifier::verify( + &recovered, + &recovered.public_inputs.clone(), + )?) + })(); + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + assert!( + result.expect("STIR serialization roundtrip should not error"), + "Deserialized STIR proof should verify" + ); + } + + /// STIR: num_queries reflects STIR_QUERY_ROUNDS (not FRI_QUERY_ROUNDS). + #[test] + fn e2e_stir_num_queries_is_reduced() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); + std::env::set_var("HYPER_SNARK_PROTOCOL", "stir"); + let proof = Prover::prove(&example_witness(3, 4).0, &example_witness(3, 4).1).unwrap(); + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + assert_eq!( + proof.num_queries, + hyper_snark::config::STIR_QUERY_ROUNDS, + "STIR proof should record STIR_QUERY_ROUNDS queries" + ); + } + + /// STIR: tampering with a batch eval still fails. + #[test] + fn e2e_stir_tampered_eval_rejected() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); + std::env::set_var("HYPER_SNARK_PROTOCOL", "stir"); + let (r1cs, witness) = example_witness(3, 4); + let mut proof = Prover::prove(&r1cs, &witness).unwrap(); + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + proof.batch_eval.values[0] = FieldElement::from_u64(99999); + let ok = Verifier::verify(&proof, &proof.public_inputs.clone()).unwrap(); + assert!(!ok, "Tampered STIR proof eval must be rejected"); + } + + /// STIR proof is smaller than the equivalent DEEP-FRI proof on the same circuit. + #[test] + fn e2e_stir_proof_smaller_than_deep_fri() { + let _guard = PROTOCOL_LOCK.lock().unwrap(); + // Prove with DEEP-FRI (default) + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + let (r1cs, witness) = example_witness(3, 4); + let proof_fri = Prover::prove(&r1cs, &witness).unwrap(); + let size_fri = proof_fri.to_bytes().unwrap().len(); + + // Prove with STIR + std::env::set_var("HYPER_SNARK_PROTOCOL", "stir"); + let proof_stir = Prover::prove(&r1cs, &witness).unwrap(); + std::env::remove_var("HYPER_SNARK_PROTOCOL"); + let size_stir = proof_stir.to_bytes().unwrap().len(); + + println!( + "Proof sizes — DEEP-FRI: {} bytes, STIR: {} bytes (reduction: {:.1}%)", + size_fri, + size_stir, + (1.0 - size_stir as f64 / size_fri as f64) * 100.0 + ); + + assert!( + size_stir <= size_fri, + "STIR proof ({} B) should not exceed DEEP-FRI proof ({} B)", + size_stir, + size_fri + ); + } }