|
| 1 | +//! Morton 2×2 cascade probe — non-materialized Z-order quadtree over the |
| 2 | +//! gridlake SoA carrier, with Belichtungsmesser-style min/max early-exit. |
| 3 | +//! |
| 4 | +//! ## What this validates (probe-first, codec-agnostic) |
| 5 | +//! |
| 6 | +//! - **4×4 Morton leaf tile = one `F32x16`** ("2bit×2bit": 2 bits X, 2 bits Y = |
| 7 | +//! 16 cells = 64 bytes = one AVX-512 register loaded from `MultiLaneColumn`). |
| 8 | +//! - **Quadtree over T×T tiles** (2×2 per level): total grid `(4T)²` for |
| 9 | +//! `T = 2^k` gives the ladder 64, 256, 1024, 4096, 16384, 64k, 256k. |
| 10 | +//! - **Morton order ⇒ every quadtree node is a contiguous index range**, so the |
| 11 | +//! aggregate (min/max) pyramid is a flat bottom-up reduction (the |
| 12 | +//! Belichtungsmesser "calibrated bands" = per-node value range). |
| 13 | +//! - **Cascade early-exit**: descend the pyramid; if a node's [min,max] can't |
| 14 | +//! intersect the query band [q−r, q+r], prune the whole subtree (the 3-stroke |
| 15 | +//! band-miss generalized). Leaf tile → `F32x16` load + test 16 cells. |
| 16 | +//! |
| 17 | +//! The cell value is a plain `f32` stand-in for the eventual per-cell codec |
| 18 | +//! (palette256 / helix Fisher-2z / Belichtungsmesser band) — wiring that is the |
| 19 | +//! next step; this probe proves the *substrate* (addressing + cascade) is |
| 20 | +//! correct and that the prune actually skips work. |
| 21 | +//! |
| 22 | +//! RUSTFLAGS="-C target-cpu=native" cargo run --release --example morton_cascade_probe |
| 23 | +//! |
| 24 | +//! PASS: cascade count == brute-force count for every (size, query); the |
| 25 | +//! reported prune-rate is the "boost". |
| 26 | +
|
| 27 | +use std::sync::Arc; |
| 28 | + |
| 29 | +use ndarray::simd::{F32x16, MultiLaneColumn}; |
| 30 | + |
| 31 | +/// Interleave the low `bits` of `x` and `y` into a Z-order (Morton) index. |
| 32 | +/// x occupies even output bits, y the odd bits. |
| 33 | +fn morton2d(x: u32, y: u32, bits: u32) -> u32 { |
| 34 | + let mut m = 0u32; |
| 35 | + for b in 0..bits { |
| 36 | + m |= ((x >> b) & 1) << (2 * b); |
| 37 | + m |= ((y >> b) & 1) << (2 * b + 1); |
| 38 | + } |
| 39 | + m |
| 40 | +} |
| 41 | + |
| 42 | +/// Deterministic SplitMix64 → f32 in [0, 1). |
| 43 | +fn splitmix(state: &mut u64) -> f32 { |
| 44 | + *state = state.wrapping_add(0x9E37_79B9_7F4A_7C15); |
| 45 | + let mut z = *state; |
| 46 | + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); |
| 47 | + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); |
| 48 | + z ^= z >> 31; |
| 49 | + ((z >> 40) as f32) / (1u32 << 24) as f32 |
| 50 | +} |
| 51 | + |
| 52 | +/// Build the field in Morton-tile order: |
| 53 | +/// cell(x,y) → idx = morton(tx,ty)·16 + morton_in_tile(ix,iy) |
| 54 | +/// where tile (tx,ty) = (x>>2, y>>2), in-tile (ix,iy) = (x&3, y&3). |
| 55 | +/// Each 4×4 tile is therefore a contiguous 16-f32 (64-byte) `F32x16` chunk. |
| 56 | +fn build_field(t: u32, seed: u64) -> Vec<f32> { |
| 57 | + let side = 4 * t; // grid side |
| 58 | + let n = (side * side) as usize; |
| 59 | + let mut field = vec![0.0f32; n]; |
| 60 | + let k = t.trailing_zeros(); // T = 2^k tiles per side |
| 61 | + let mut st = seed; |
| 62 | + for y in 0..side { |
| 63 | + for x in 0..side { |
| 64 | + let (tx, ty) = (x >> 2, y >> 2); |
| 65 | + let (ix, iy) = (x & 3, y & 3); |
| 66 | + let idx = (morton2d(tx, ty, k) as usize) * 16 + morton2d(ix, iy, 2) as usize; |
| 67 | + // A smooth-ish field + noise so neighbouring tiles share value ranges |
| 68 | + // (otherwise every node spans the full range and nothing prunes). |
| 69 | + let base = ((x as f32) / side as f32 + (y as f32) / side as f32) * 0.5; |
| 70 | + field[idx] = 0.85 * base + 0.15 * splitmix(&mut st); |
| 71 | + } |
| 72 | + } |
| 73 | + field |
| 74 | +} |
| 75 | + |
| 76 | +/// Aggregate (min,max) pyramid over the T² tiles in Morton order. Level 0 = |
| 77 | +/// per-tile range (over its 16 cells); level l = range of 4 level-(l−1) nodes. |
| 78 | +/// A node at level l covers `4^l` contiguous tiles starting at `base`. |
| 79 | +struct Pyramid { |
| 80 | + levels: Vec<Vec<(f32, f32)>>, // levels[0] = per-tile, levels[K] = root |
| 81 | + k: u32, // number of quadtree levels (T = 2^k) |
| 82 | +} |
| 83 | + |
| 84 | +impl Pyramid { |
| 85 | + fn build(field: &[f32], t: u32) -> Self { |
| 86 | + let k = t.trailing_zeros(); |
| 87 | + let n_tiles = (t * t) as usize; |
| 88 | + // Level 0: min/max over each tile's 16 cells. |
| 89 | + let mut lvl0 = Vec::with_capacity(n_tiles); |
| 90 | + for tile in 0..n_tiles { |
| 91 | + let s = &field[tile * 16..tile * 16 + 16]; |
| 92 | + let mut mn = f32::INFINITY; |
| 93 | + let mut mx = f32::NEG_INFINITY; |
| 94 | + for &v in s { |
| 95 | + mn = mn.min(v); |
| 96 | + mx = mx.max(v); |
| 97 | + } |
| 98 | + lvl0.push((mn, mx)); |
| 99 | + } |
| 100 | + let mut levels = vec![lvl0]; |
| 101 | + for l in 1..=k as usize { |
| 102 | + let prev = &levels[l - 1]; |
| 103 | + let mut cur = Vec::with_capacity(prev.len() / 4); |
| 104 | + for node in prev.chunks_exact(4) { |
| 105 | + let mn = node.iter().map(|p| p.0).fold(f32::INFINITY, f32::min); |
| 106 | + let mx = node.iter().map(|p| p.1).fold(f32::NEG_INFINITY, f32::max); |
| 107 | + cur.push((mn, mx)); |
| 108 | + } |
| 109 | + levels.push(cur); |
| 110 | + } |
| 111 | + Pyramid { levels, k } |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +/// Cascade query: count cells with |value − q| ≤ r, descending the quadtree and |
| 116 | +/// pruning any node whose [min,max] can't intersect [q−r, q+r]. Returns |
| 117 | +/// (count, cells_visited) — cells_visited only counts leaf cells actually tested. |
| 118 | +fn cascade_count(field: &[f32], col: &MultiLaneColumn, pyr: &Pyramid, q: f32, r: f32) -> (usize, usize) { |
| 119 | + let (lo, hi) = (q - r, q + r); |
| 120 | + let mut count = 0usize; |
| 121 | + let mut visited = 0usize; |
| 122 | + // Stack of (level, node_index_within_level). |
| 123 | + let mut stack = vec![(pyr.k as usize, 0usize)]; |
| 124 | + let bytes = col.as_bytes(); |
| 125 | + while let Some((level, node)) = stack.pop() { |
| 126 | + let (mn, mx) = pyr.levels[level][node]; |
| 127 | + if mx < lo || mn > hi { |
| 128 | + continue; // band miss → prune whole subtree (early-exit) |
| 129 | + } |
| 130 | + if level == 0 { |
| 131 | + // Leaf tile = 16 cells = one F32x16 chunk in the SoA column. |
| 132 | + let off = node * 64; // 16 f32 × 4 bytes |
| 133 | + let chunk: [u8; 64] = bytes[off..off + 64].try_into().unwrap(); |
| 134 | + let arr = f32x16_from_bytes(&chunk).to_array(); |
| 135 | + for &v in arr.iter() { |
| 136 | + if (v - q).abs() <= r { |
| 137 | + count += 1; |
| 138 | + } |
| 139 | + } |
| 140 | + visited += 16; |
| 141 | + } else { |
| 142 | + let base = node * 4; |
| 143 | + for c in 0..4 { |
| 144 | + stack.push((level - 1, base + c)); |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + let _ = field; // field kept for the brute-force reference; cascade reads the SoA column |
| 149 | + (count, visited) |
| 150 | +} |
| 151 | + |
| 152 | +/// Build an `F32x16` from 64 little-endian bytes (one 4×4 Morton tile). |
| 153 | +fn f32x16_from_bytes(chunk: &[u8; 64]) -> F32x16 { |
| 154 | + let arr: [f32; 16] = core::array::from_fn(|i| { |
| 155 | + let o = i * 4; |
| 156 | + f32::from_le_bytes([chunk[o], chunk[o + 1], chunk[o + 2], chunk[o + 3]]) |
| 157 | + }); |
| 158 | + F32x16::from_array(arr) |
| 159 | +} |
| 160 | + |
| 161 | +fn brute_count(field: &[f32], q: f32, r: f32) -> usize { |
| 162 | + field.iter().filter(|&&v| (v - q).abs() <= r).count() |
| 163 | +} |
| 164 | + |
| 165 | +fn run(t: u32) { |
| 166 | + let side = 4 * t; |
| 167 | + let n = (side * side) as usize; |
| 168 | + let field = build_field(t, 0xC0FFEE ^ t as u64); |
| 169 | + // Wrap the Morton-ordered field bytes in the gridlake SoA carrier. |
| 170 | + let raw: Vec<u8> = field.iter().flat_map(|v| v.to_le_bytes()).collect(); |
| 171 | + let col = MultiLaneColumn::new(Arc::from(raw.into_boxed_slice())).unwrap(); |
| 172 | + let pyr = Pyramid::build(&field, t); |
| 173 | + |
| 174 | + // Three query bands: tight (high prune), medium, broad (low prune). |
| 175 | + let queries = [(0.5f32, 0.02f32), (0.25, 0.10), (0.5, 0.5)]; |
| 176 | + let mut all_ok = true; |
| 177 | + let mut report = String::new(); |
| 178 | + for (q, r) in queries { |
| 179 | + let exp = brute_count(&field, q, r); |
| 180 | + let (got, visited) = cascade_count(&field, &col, &pyr, q, r); |
| 181 | + let ok = got == exp; |
| 182 | + all_ok &= ok; |
| 183 | + let prune = 100.0 * (1.0 - visited as f64 / n as f64); |
| 184 | + report.push_str(&format!( |
| 185 | + " q={q:.2} r={r:.2}: count {got:>7} (exp {exp:>7}) {} visited {visited:>8}/{n} → prune {prune:5.1}%\n", |
| 186 | + if ok { "OK " } else { "MISMATCH" } |
| 187 | + )); |
| 188 | + } |
| 189 | + println!( |
| 190 | + " T={t:>3} tiles/side grid {side}×{side} = {n:>7} cells ({})\n{}", |
| 191 | + if all_ok { "CORRECT" } else { "WRONG" }, |
| 192 | + report |
| 193 | + ); |
| 194 | +} |
| 195 | + |
| 196 | +fn main() { |
| 197 | + println!("== Morton 2×2 quadtree cascade probe (4×4 tile = F32x16, gridlake SoA) ==\n"); |
| 198 | + // T = 2^k → grid (4T)² = the ladder 64, 256, 1024, 4096, 16384, 64k, 256k. |
| 199 | + for t in [2u32, 4, 8, 16, 32, 64, 128] { |
| 200 | + run(t); |
| 201 | + } |
| 202 | +} |
0 commit comments