Skip to content

Commit f6deff8

Browse files
committed
feat(hpc/framebuffer): pyramid shader — heat diffusion through cache-aligned levels
The inverse Stufenpyramide IS a GPU shader pipeline, made visible: L1 (64²) → 4 KB → registers/L0 ← inject here L2 (256²) → 64 KB → L1 data cache ← cascade up L3 (1024²) → 1 MB → L2 cache ← cascade up L4 (2048²) → 4 MB → L3 cache ← output surface PyramidShader::inject(x, y, intensity) drops heat at L1. PyramidShader::tick() runs one 3×3 box-blur diffusion at each level, then upscales L1→L2→L3→L4 via nearest-neighbor 2× with additive blend. Global decay on L4 prevents saturation. The viewer watches a single perturbation ripple through the hardware cache hierarchy. compose_quad_view() renders all four levels simultaneously in a 2×2 panel framebuffer — the cognitive shader, visualized. Also: diffuse_step (3×3 box blur), upscale_2x, blit_scaled. Tests: 6 new pyramid_tests (inject+tick, decay, quad view, memory footprint, upscale, diffusion). 30 total framebuffer tests. Module is now 1303 LOC. Total session this module: 1303 LOC framebuffer (tier-adaptive palette, MRI/Neo4j/Cloud views, wobble, fire, glyphs, Amiga flyby, pyramid shader) + 766 LOC renderer (double-buffer, SIMD FMA, foveated, adaptive FPS). 2069 LOC total rendering pipeline. 57 tests pass. https://claude.ai/code/session_01SbYsmmbPf9YQuYbHZN52Zh
1 parent 79a864d commit f6deff8

1 file changed

Lines changed: 271 additions & 0 deletions

File tree

src/hpc/framebuffer.rs

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,3 +1030,274 @@ mod visual_tests {
10301030
assert!(label_count > 0, "labels should render");
10311031
}
10321032
}
1033+
1034+
// ─────────────────────────────────────────────────────────────────────
1035+
// Pyramid shader — heat diffusion through the cache-aligned pyramid.
1036+
//
1037+
// The inverse Stufenpyramide IS a GPU shader pipeline:
1038+
// L1 (64²) → 4 KB → registers / L0 cache
1039+
// L2 (256²) → 64 KB → L1 data cache
1040+
// L3 (4K²) → 2 MB → L2 cache (bit) / 16 MB (byte)
1041+
// L4 (16K²) → 32 MB → L3 cache
1042+
//
1043+
// A perturbation enters at L1, diffuses at each level, then upscales
1044+
// 4× to the next. Each level physically runs in its matching CPU cache.
1045+
// The viewer sees cognition ripple through the hardware.
1046+
// ─────────────────────────────────────────────────────────────────────
1047+
1048+
/// 3×3 box-blur diffusion: each pixel = average of itself + 8 neighbors.
1049+
/// In-place via double buffer (src → dst, then swap pointers).
1050+
/// Palette-safe: result is clamped to [0, max_palette].
1051+
pub fn diffuse_step(
1052+
src: &[u8], dst: &mut [u8],
1053+
width: usize, height: usize,
1054+
max_palette: u8,
1055+
) {
1056+
for y in 0..height {
1057+
for x in 0..width {
1058+
let mut sum: u16 = 0;
1059+
let mut count: u16 = 0;
1060+
for dy in -1i32..=1 {
1061+
for dx in -1i32..=1 {
1062+
let nx = x as i32 + dx;
1063+
let ny = y as i32 + dy;
1064+
if nx >= 0 && ny >= 0 && (nx as usize) < width && (ny as usize) < height {
1065+
sum += src[ny as usize * width + nx as usize] as u16;
1066+
count += 1;
1067+
}
1068+
}
1069+
}
1070+
dst[y * width + x] = ((sum / count) as u8).min(max_palette);
1071+
}
1072+
}
1073+
}
1074+
1075+
/// Upscale 2× via nearest-neighbor (L_n → L_{n+1}).
1076+
pub fn upscale_2x(src: &[u8], src_w: usize, src_h: usize) -> (Vec<u8>, usize, usize) {
1077+
let dst_w = src_w * 2;
1078+
let dst_h = src_h * 2;
1079+
let mut dst = vec![0u8; dst_w * dst_h];
1080+
for sy in 0..src_h {
1081+
for sx in 0..src_w {
1082+
let v = src[sy * src_w + sx];
1083+
let dy = sy * 2;
1084+
let dx = sx * 2;
1085+
dst[dy * dst_w + dx] = v;
1086+
dst[dy * dst_w + dx + 1] = v;
1087+
dst[(dy + 1) * dst_w + dx] = v;
1088+
dst[(dy + 1) * dst_w + dx + 1] = v;
1089+
}
1090+
}
1091+
(dst, dst_w, dst_h)
1092+
}
1093+
1094+
/// Four-level pyramid shader state.
1095+
///
1096+
/// Each level is a framebuffer at its native resolution. `tick()` runs
1097+
/// one diffusion step at each level, then upscales L1→L2→L3→L4.
1098+
/// Inject heat at L1 via `inject(x, y, intensity)`.
1099+
pub struct PyramidShader {
1100+
/// L1: 64×64 (4 KB).
1101+
pub l1: Vec<u8>,
1102+
/// L2: 256×256 (64 KB).
1103+
pub l2: Vec<u8>,
1104+
/// L3: 1024×1024 (1 MB) — scaled down from 4K for practical display.
1105+
pub l3: Vec<u8>,
1106+
/// L4: 2048×2048 (4 MB) — the output surface.
1107+
pub l4: Vec<u8>,
1108+
/// Scratch buffer for double-buffer diffusion (same size as L4).
1109+
scratch: Vec<u8>,
1110+
/// Palette max (from tier).
1111+
pub palette_max: u8,
1112+
/// Tick counter.
1113+
pub tick: u64,
1114+
}
1115+
1116+
impl PyramidShader {
1117+
pub fn new(palette_max: u8) -> Self {
1118+
Self {
1119+
l1: vec![0u8; 64 * 64],
1120+
l2: vec![0u8; 256 * 256],
1121+
l3: vec![0u8; 1024 * 1024],
1122+
l4: vec![0u8; 2048 * 2048],
1123+
scratch: vec![0u8; 2048 * 2048],
1124+
palette_max,
1125+
tick: 0,
1126+
}
1127+
}
1128+
1129+
/// Inject heat at L1 coordinates (0..64, 0..64).
1130+
pub fn inject(&mut self, x: usize, y: usize, intensity: u8) {
1131+
if x < 64 && y < 64 {
1132+
self.l1[y * 64 + x] = self.l1[y * 64 + x].saturating_add(intensity).min(self.palette_max);
1133+
}
1134+
}
1135+
1136+
/// One shader tick: diffuse each level, then cascade upward.
1137+
///
1138+
/// This IS the cognitive shader made visible. Each level physically
1139+
/// fits its CPU cache tier. The 4× widening at each step IS the
1140+
/// cache hierarchy doubling pattern.
1141+
pub fn tick(&mut self) {
1142+
// 1. Diffuse at each level independently.
1143+
// L1: 64² = 4 KB → runs in registers / L0.
1144+
let mut scratch_l1 = vec![0u8; 64 * 64];
1145+
diffuse_step(&self.l1, &mut scratch_l1, 64, 64, self.palette_max);
1146+
self.l1.copy_from_slice(&scratch_l1);
1147+
1148+
// L2: 256² = 64 KB → runs in L1 data cache.
1149+
let mut scratch_l2 = vec![0u8; 256 * 256];
1150+
diffuse_step(&self.l2, &mut scratch_l2, 256, 256, self.palette_max);
1151+
self.l2.copy_from_slice(&scratch_l2);
1152+
1153+
// L3: 1024² = 1 MB → runs in L2 cache.
1154+
let mut scratch_l3 = vec![0u8; 1024 * 1024];
1155+
diffuse_step(&self.l3, &mut scratch_l3, 1024, 1024, self.palette_max);
1156+
self.l3.copy_from_slice(&scratch_l3);
1157+
1158+
// 2. Cascade: L1 upscales into L2, L2 into L3, L3 into L4.
1159+
// Additive blend (saturating) so existing diffusion + upscaled signal combine.
1160+
let (up1, _, _) = upscale_2x(&self.l1, 64, 64); // 128²
1161+
let (up1b, _, _) = upscale_2x(&up1, 128, 128); // 256²
1162+
for (dst, src) in self.l2.iter_mut().zip(up1b.iter()) {
1163+
*dst = dst.saturating_add(*src).min(self.palette_max);
1164+
}
1165+
1166+
let (up2, _, _) = upscale_2x(&self.l2, 256, 256); // 512²
1167+
let (up2b, _, _) = upscale_2x(&up2, 512, 512); // 1024²
1168+
for (dst, src) in self.l3.iter_mut().zip(up2b.iter()) {
1169+
*dst = dst.saturating_add(*src).min(self.palette_max);
1170+
}
1171+
1172+
let (up3, _, _) = upscale_2x(&self.l3, 1024, 1024); // 2048²
1173+
for (dst, src) in self.l4.iter_mut().zip(up3.iter()) {
1174+
*dst = dst.saturating_add(*src).min(self.palette_max);
1175+
}
1176+
1177+
// 3. Global decay on L4 (prevents saturation).
1178+
for v in self.l4.iter_mut() {
1179+
*v = v.saturating_sub(1);
1180+
}
1181+
1182+
self.tick += 1;
1183+
}
1184+
1185+
/// Compose a 2×2 panel view of all four levels into a framebuffer.
1186+
///
1187+
/// Top-left = L1 (upscaled to panel size), top-right = L2,
1188+
/// bottom-left = L3, bottom-right = L4. Each panel is `pw × ph`.
1189+
pub fn compose_quad_view(&self, fb: &mut Framebuffer) {
1190+
let pw = fb.width / 2;
1191+
let ph = fb.height / 2;
1192+
1193+
// L1 → top-left (upscale from 64² to pw×ph)
1194+
blit_scaled(&self.l1, 64, 64, fb, 0, 0, pw, ph);
1195+
// L2 → top-right (upscale from 256² to pw×ph)
1196+
blit_scaled(&self.l2, 256, 256, fb, pw, 0, pw, ph);
1197+
// L3 → bottom-left (downscale from 1024² to pw×ph)
1198+
blit_scaled(&self.l3, 1024, 1024, fb, 0, ph, pw, ph);
1199+
// L4 → bottom-right (downscale from 2048² to pw×ph)
1200+
blit_scaled(&self.l4, 2048, 2048, fb, pw, ph, pw, ph);
1201+
1202+
fb.dirty = (0, 0, fb.width, fb.height);
1203+
}
1204+
1205+
/// Memory footprint across all levels.
1206+
pub fn memory_bytes(&self) -> usize {
1207+
self.l1.len() + self.l2.len() + self.l3.len() + self.l4.len() + self.scratch.len()
1208+
}
1209+
}
1210+
1211+
/// Nearest-neighbor scale-blit from src (src_w × src_h) into a region
1212+
/// of the framebuffer at (dst_x, dst_y) with size (dst_w × dst_h).
1213+
fn blit_scaled(
1214+
src: &[u8], src_w: usize, src_h: usize,
1215+
fb: &mut Framebuffer,
1216+
dst_x: usize, dst_y: usize,
1217+
dst_w: usize, dst_h: usize,
1218+
) {
1219+
for dy in 0..dst_h {
1220+
let sy = (dy * src_h) / dst_h;
1221+
for dx in 0..dst_w {
1222+
let sx = (dx * src_w) / dst_w;
1223+
let px = dst_x + dx;
1224+
let py = dst_y + dy;
1225+
if px < fb.width && py < fb.height && sy < src_h && sx < src_w {
1226+
fb.pixels[py * fb.width + px] = src[sy * src_w + sx];
1227+
}
1228+
}
1229+
}
1230+
}
1231+
1232+
#[cfg(test)]
1233+
mod pyramid_tests {
1234+
use super::*;
1235+
1236+
#[test]
1237+
fn pyramid_shader_inject_and_tick() {
1238+
let mut ps = PyramidShader::new(15);
1239+
ps.inject(32, 32, 15);
1240+
assert_eq!(ps.l1[32 * 64 + 32], 15);
1241+
ps.tick();
1242+
// After one tick, heat should have diffused to neighbors at L1
1243+
// and cascaded to L2/L3/L4.
1244+
assert!(ps.l1[32 * 64 + 33] > 0, "L1 should diffuse right");
1245+
assert!(ps.l2[128 * 256 + 128] > 0, "L2 should receive cascade");
1246+
}
1247+
1248+
#[test]
1249+
fn pyramid_shader_decays_to_zero() {
1250+
let mut ps = PyramidShader::new(15);
1251+
ps.inject(32, 32, 15);
1252+
for _ in 0..200 {
1253+
ps.tick();
1254+
}
1255+
let l4_max = ps.l4.iter().copied().max().unwrap_or(0);
1256+
assert_eq!(l4_max, 0, "L4 should decay to zero after enough ticks");
1257+
}
1258+
1259+
#[test]
1260+
fn pyramid_shader_compose_quad_view() {
1261+
let mut ps = PyramidShader::new(15);
1262+
ps.inject(32, 32, 15);
1263+
ps.tick();
1264+
let mut fb = Framebuffer::with_tier(128, 128, PaletteTier::Full16);
1265+
ps.compose_quad_view(&mut fb);
1266+
// Top-left panel (L1 upscaled) should have nonzero pixels.
1267+
let tl_sum: u32 = fb.pixels[..64 * 128].iter().map(|&v| v as u32).sum();
1268+
assert!(tl_sum > 0, "L1 panel should show the injection");
1269+
}
1270+
1271+
#[test]
1272+
fn pyramid_shader_memory_footprint() {
1273+
let ps = PyramidShader::new(15);
1274+
// L1=4K + L2=64K + L3=1M + L4=4M + scratch=4M ≈ 9.07 MB
1275+
assert!(ps.memory_bytes() > 5_000_000);
1276+
assert!(ps.memory_bytes() < 20_000_000);
1277+
}
1278+
1279+
#[test]
1280+
fn upscale_2x_doubles_dimensions() {
1281+
let src = vec![5u8; 8 * 8];
1282+
let (dst, w, h) = upscale_2x(&src, 8, 8);
1283+
assert_eq!(w, 16);
1284+
assert_eq!(h, 16);
1285+
assert!(dst.iter().all(|&v| v == 5));
1286+
}
1287+
1288+
#[test]
1289+
fn diffuse_step_smooths_spike() {
1290+
let mut src = vec![0u8; 16 * 16];
1291+
src[8 * 16 + 8] = 15; // single hot pixel
1292+
let mut dst = vec![0u8; 16 * 16];
1293+
diffuse_step(&src, &mut dst, 16, 16, 15);
1294+
// Center should have decreased (averaged with zero neighbors).
1295+
assert!(dst[8 * 16 + 8] < 15);
1296+
// At least one neighbor should be nonzero.
1297+
let neighbor_sum: u16 = [
1298+
dst[7 * 16 + 8], dst[9 * 16 + 8],
1299+
dst[8 * 16 + 7], dst[8 * 16 + 9],
1300+
].iter().map(|&v| v as u16).sum();
1301+
assert!(neighbor_sum > 0, "diffusion should spread to neighbors");
1302+
}
1303+
}

0 commit comments

Comments
 (0)