From 99049564953dae26a8e79928c623b957877bde68 Mon Sep 17 00:00:00 2001 From: Francisco Freitas Date: Mon, 30 Mar 2026 14:23:29 +0000 Subject: [PATCH 1/5] feat: Add whole_values domain and unify constrained-domain selector policy --- src/Invariant.zig | 59 ++++++++++++ src/cgen/emit.zig | 226 ++++++++++++++++++++++++++++++++++++---------- src/cgen/ir.zig | 134 ++++++++++++++++++++++++++- src/main.zig | 3 + src/root.zig | 1 + src/seed.zig | 69 +++++++++++--- 6 files changed, 429 insertions(+), 63 deletions(-) diff --git a/src/Invariant.zig b/src/Invariant.zig index 5f48ae6..c9b4a69 100644 --- a/src/Invariant.zig +++ b/src/Invariant.zig @@ -133,6 +133,65 @@ fn uniqueKey(alloc: std.mem.Allocator, name: []const u8, source_file: []const u8 return name; } +test "applyToGlobals accepts whole_values domain" { + const allocator = std.testing.allocator; + + var globals = std.ArrayList(Parser.Global).empty; + defer Parser.freeGlobals(allocator, &globals); + + const parser_dims = try allocator.alloc(ir.Dimension, 1); + parser_dims[0] = .{ .len = 1, .stride_bytes = 2 }; + + const global_fields = try allocator.alloc(Parser.Field, 1); + global_fields[0] = .{ + .name = try allocator.dupe(u8, "."), + .bit_width = 16, + .is_padding = false, + .dims = parser_dims, + .domain = .top, + }; + + try globals.append(allocator, .{ + .name = try allocator.dupe(u8, "g"), + .source_file = try allocator.dupe(u8, ""), + .size_bytes = 2, + .is_static = false, + .dims = &.{}, + .fields = global_fields, + }); + + var inv_arena = std.heap.ArenaAllocator.init(allocator); + const inv_alloc = inv_arena.allocator(); + const inv_dims = try inv_alloc.alloc(ir.Dimension, 1); + inv_dims[0] = .{ .len = 1, .stride_bytes = 2 }; + const inv_fields = try inv_alloc.alloc(ir.Field, 1); + inv_fields[0] = .{ + .name = try inv_alloc.dupe(u8, "."), + .bit_width = 16, + .dims = inv_dims, + .domain = .{ .whole_values = &.{ &[_]u8{ 0xAA, 0xBB }, &[_]u8{ 0xCC, 0xDD } } }, + .is_padding = false, + }; + const inv_globals = try inv_alloc.alloc(ir.Global, 1); + inv_globals[0] = .{ + .name = try inv_alloc.dupe(u8, "g"), + .source_file = try inv_alloc.dupe(u8, ""), + .size_bytes = 2, + .is_static = false, + .dims = &.{}, + .fields = inv_fields, + }; + var inv = Invariant{ .globals = inv_globals, .arena = inv_arena }; + defer inv.deinit(); + + var apply_arena = std.heap.ArenaAllocator.init(allocator); + defer apply_arena.deinit(); + const result = try inv.applyToGlobals(allocator, apply_arena.allocator(), globals); + defer allocator.free(result.func_symbols); + try std.testing.expect(globals.items[0].fields[0].domain == .whole_values); + try std.testing.expectEqual(@as(usize, 2), globals.items[0].fields[0].domain.whole_values.len); +} + test "applyToGlobals updates domains" { const allocator = std.testing.allocator; diff --git a/src/cgen/emit.zig b/src/cgen/emit.zig index b1170c3..9668514 100644 --- a/src/cgen/emit.zig +++ b/src/cgen/emit.zig @@ -126,6 +126,7 @@ fn emitDomainTables(globals: []const Parser.Global, file: *std.fs.File) !void { var num_buf: [64]u8 = undefined; var bytes_buf: [64]u8 = undefined; var value_idx: usize = 0; + var wval_idx: usize = 0; var ptr_idx: usize = 0; for (globals) |g| { @@ -160,6 +161,32 @@ fn emitDomainTables(globals: []const Parser.Global, file: *std.fs.File) !void { const bytes_def = try std.fmt.bufPrint(&num_buf, "#define {s}_BYTES {s}\n", .{ label, bytes_str }); try file.writeAll(bytes_def); }, + .whole_values => |vals| { + if (vals.len == 0) continue; + const blob_bytes = ir.wholeFieldBytes(f); + const blob_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{blob_bytes}); + var label_buf: [64]u8 = undefined; + const label = try std.fmt.bufPrint(&label_buf, "FM_WVAL_{d}", .{wval_idx}); + wval_idx += 1; + + try file.writeAll("static const uint8_t "); + try file.writeAll(label); + try file.writeAll("[] = { "); + for (vals, 0..) |v, vi| { + if (vi > 0) try file.writeAll(", "); + for (0..blob_bytes) |bi| { + if (bi > 0) try file.writeAll(", "); + const byte_val: u8 = if (bi < v.len) v[bi] else 0; + const byte_str = try std.fmt.bufPrint(&num_buf, "0x{X:0>2}", .{byte_val}); + try file.writeAll(byte_str); + } + } + try file.writeAll(" };\n"); + const count_str = try std.fmt.bufPrint(&num_buf, "#define {s}_COUNT {d}\n", .{ label, vals.len }); + try file.writeAll(count_str); + const bytes_def = try std.fmt.bufPrint(&num_buf, "#define {s}_BLOB_BYTES {s}\n", .{ label, blob_str }); + try file.writeAll(bytes_def); + }, .pointers => |ptrs| { if (ptrs.len == 0) continue; var ptr_label_buf: [64]u8 = undefined; @@ -182,7 +209,7 @@ fn emitDomainTables(globals: []const Parser.Global, file: *std.fs.File) !void { } } - if (value_idx > 0 or ptr_idx > 0) try file.writeAll("\n"); + if (value_idx > 0 or wval_idx > 0 or ptr_idx > 0) try file.writeAll("\n"); } /// Generate the complete fuzzer C file and `objcopy` redefinition file. @@ -267,6 +294,7 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil var bytes_buf: [64]u8 = undefined; var value_idx: usize = 0; + var wval_idx: usize = 0; var ptr_idx: usize = 0; try file.writeAll("ptrdiff_t sample_invariant(const uint8_t *data, size_t size) {\n"); try file.writeAll(" size_t off = 0;\n"); @@ -304,6 +332,41 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil const field_dims_len = f.dims.len; + if (f.domain == .whole_values) { + const vals = f.domain.whole_values; + if (vals.len == 0) continue; + + const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + const dst_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); + defer allocator.free(dst_expr); + const blob_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{ir.wholeFieldBytes(f)}); + + const current_depth = loop_stack.depth(); + var label_buf: [64]u8 = undefined; + const label = try std.fmt.bufPrint(&label_buf, "FM_WVAL_{d}", .{wval_idx}); + wval_idx += 1; + + if (vals.len > 1) { + try writeIndent(file, current_depth); + try file.writeAll("size_t idx_"); + try file.writeAll(label); + try file.writeAll(" = data[off] % "); + const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{vals.len}); + try file.writeAll(count_str); + try file.writeAll(";\n"); + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s} * {s}_BLOB_BYTES]", .{ label, label, label }); + try emitMemcpy(file, current_depth, dst_expr, src, blob_str); + try incrementOffset(file, current_depth, "1"); + } else { + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[0]", .{label}); + try emitMemcpy(file, current_depth, dst_expr, src, blob_str); + } + continue; + } + // Open field loops (if any) inside the global loops. for (f.dims, 0..) |d, fi| { const i = global_dims_len + fi; @@ -315,8 +378,6 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil ); defer allocator.free(offset_expr); - // Access: &mangled[offset_expr] - // Wait, offset_expr might be long. We should allocate. const dst_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); defer allocator.free(dst_expr); @@ -327,42 +388,61 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil try incrementOffset(file, current_depth, bytes_str); }, .values => |vals| { - if (vals.len == 0) continue; + if (vals.len == 0) { + try loop_stack.closeLoops(field_dims_len); + continue; + } var label_buf: [64]u8 = undefined; const label = try std.fmt.bufPrint(&label_buf, "FM_VAL_{d}", .{value_idx}); value_idx += 1; - try writeIndent(file, current_depth); - try file.writeAll("size_t idx_"); - try file.writeAll(label); - try file.writeAll(" = data[off] % "); - const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{vals.len}); - try file.writeAll(count_str); - try file.writeAll(";\n"); - - var src_buf: [256]u8 = undefined; - const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s} * {s}]", .{ label, label, bytes_str }); - try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); - try incrementOffset(file, current_depth, "1"); + if (vals.len > 1) { + try writeIndent(file, current_depth); + try file.writeAll("size_t idx_"); + try file.writeAll(label); + try file.writeAll(" = data[off] % "); + const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{vals.len}); + try file.writeAll(count_str); + try file.writeAll(";\n"); + + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s} * {s}]", .{ label, label, bytes_str }); + try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); + try incrementOffset(file, current_depth, "1"); + } else { + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[0]", .{label}); + try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); + } }, + .whole_values => unreachable, .pointers => |ptrs| { - if (ptrs.len == 0) continue; + if (ptrs.len == 0) { + try loop_stack.closeLoops(field_dims_len); + continue; + } var ptr_label_buf: [64]u8 = undefined; const ptr_label = try std.fmt.bufPrint(&ptr_label_buf, "FM_PTR_{d}", .{ptr_idx}); ptr_idx += 1; - try writeIndent(file, current_depth); - try file.writeAll("size_t idx_"); - try file.writeAll(ptr_label); - try file.writeAll(" = data[off] % "); - const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{ptrs.len}); - try file.writeAll(count_str); - try file.writeAll(";\n"); - - var src_buf: [128]u8 = undefined; - const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s}]", .{ ptr_label, ptr_label }); - try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); - try incrementOffset(file, current_depth, "1"); + if (ptrs.len > 1) { + try writeIndent(file, current_depth); + try file.writeAll("size_t idx_"); + try file.writeAll(ptr_label); + try file.writeAll(" = data[off] % "); + const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{ptrs.len}); + try file.writeAll(count_str); + try file.writeAll(";\n"); + + var src_buf: [128]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s}]", .{ ptr_label, ptr_label }); + try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); + try incrementOffset(file, current_depth, "1"); + } else { + var src_buf: [128]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[0]", .{ptr_label}); + try emitMemcpy(file, current_depth, dst_expr, src, bytes_str); + } }, } @@ -382,6 +462,7 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil fn emitChecker(allocator: std.mem.Allocator, globals: []const Parser.Global, file: *std.fs.File) !void { var bytes_buf: [64]u8 = undefined; var value_idx: usize = 0; + var wval_idx: usize = 0; var ptr_idx: usize = 0; try file.writeAll("int check_invariant(void) {\n"); @@ -406,25 +487,18 @@ fn emitChecker(allocator: std.mem.Allocator, globals: []const Parser.Global, fil const bytes_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{bytes}); const field_dims_len = f.dims.len; - // Open field loops (if any) inside the global loops. - for (f.dims, 0..) |d, fi| { - const i = global_dims_len + fi; - try loop_stack.openLoop(d, i); - } - - const current_depth = loop_stack.depth(); - // Skip byte-unaligned regions (bitfields can produce these). - if (f.offset_bits % 8 != 0) { - try loop_stack.closeLoops(field_dims_len); - continue; - } - - const offset_expr = try emitOffsetCalc(allocator, g.dims, f.dims, @intCast(f.offset_bits / 8)); - defer allocator.free(offset_expr); + if (f.offset_bits % 8 != 0) continue; if (f.is_padding) { - // Padding must stay zeroed. + for (f.dims, 0..) |d, fi| { + const i = global_dims_len + fi; + try loop_stack.openLoop(d, i); + } + const current_depth = loop_stack.depth(); + const offset_expr = try emitOffsetCalc(allocator, g.dims, f.dims, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + try writeIndent(file, current_depth); try file.writeAll("for (size_t i = 0; i < "); try file.writeAll(bytes_str); @@ -437,8 +511,65 @@ fn emitChecker(allocator: std.mem.Allocator, globals: []const Parser.Global, fil try file.writeAll(" + i] != 0) return -1;\n"); try writeIndent(file, current_depth); try file.writeAll("}\n"); - } else switch (f.domain) { + + try loop_stack.closeLoops(field_dims_len); + continue; + } + + if (f.domain == .whole_values) { + const vals = f.domain.whole_values; + if (vals.len == 0) continue; + + const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + const current_depth = loop_stack.depth(); + + var label_buf: [64]u8 = undefined; + const label = try std.fmt.bufPrint(&label_buf, "FM_WVAL_{d}", .{wval_idx}); + wval_idx += 1; + + try writeIndent(file, current_depth); + try file.writeAll("{\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("int found = 0;\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("for (size_t vi = 0; vi < "); + try file.writeAll(label); + try file.writeAll("_COUNT; vi++) {\n"); + try writeIndent(file, current_depth + 2); + try file.writeAll("if (memcmp(&"); + try file.writeAll(mangled); + try file.writeAll("["); + try file.writeAll(offset_expr); + try file.writeAll("], &"); + try file.writeAll(label); + try file.writeAll("[vi * "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES], "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES) == 0) { found = 1; break; }\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("}\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("if (!found) return -1;\n"); + try writeIndent(file, current_depth); + try file.writeAll("}\n"); + continue; + } + + // Open field loops (if any) inside the global loops. + for (f.dims, 0..) |d, fi| { + const i = global_dims_len + fi; + try loop_stack.openLoop(d, i); + } + + const current_depth = loop_stack.depth(); + const offset_expr = try emitOffsetCalc(allocator, g.dims, f.dims, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + + switch (f.domain) { .top => {}, + .whole_values => unreachable, .values => |vals| { if (vals.len == 0) { try loop_stack.closeLoops(field_dims_len); @@ -827,7 +958,8 @@ test "emitSampler with .pointers domain" { try emitSampler(alloc, globals, &file); var buf: [4096]u8 = undefined; const out = try readTmpFile(&tmp, "t.c", &buf); - try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_PTR_0 = data[off] % 1") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "&FM_PTR_0[0]") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_PTR_0") == null); } test "emitSampler skips padding fields" { diff --git a/src/cgen/ir.zig b/src/cgen/ir.zig index 3095b7e..854af5c 100644 --- a/src/cgen/ir.zig +++ b/src/cgen/ir.zig @@ -6,15 +6,87 @@ pub const Dimension = struct { }; /// Domain of a field value: -/// - `top` : unconstrained bytes pulled from the input stream. -/// - `values` : fixed set of literal values to choose from. -/// - `pointers` : allowed pointer targets (by symbol name). +/// - `top` : unconstrained bytes pulled from the input stream. +/// - `values` : fixed set of literal values to choose from (per element). +/// - `whole_values` : fixed set of full-field byte blobs (one blob covers the +/// entire field instance, including all `Field.dims`). +/// - `pointers` : allowed pointer targets (by symbol name). pub const Domain = union(enum) { top, values: []const []const u8, + whole_values: []const []const u8, pointers: []const []const u8, }; +pub const DomainError = error{ + TooManyCandidates, + EmptyWholeValuesDomain, + WholeValuesBlobMismatch, +}; + +/// Product of dimension lengths (1 when `dims` is empty). +pub fn dimsProduct(dims: []const Dimension) usize { + var prod: usize = 1; + for (dims) |d| prod *= d.len; + return prod; +} + +/// Byte width of one scalar element of a field. +pub fn elementBytes(f: Field) usize { + return (f.bit_width + 7) / 8; +} + +/// Total byte span of one field instance (all field array dims), used by `whole_values`. +pub fn wholeFieldBytes(f: Field) usize { + return elementBytes(f) * dimsProduct(f.dims); +} + +/// For constrained domains: 0 bytes when there is at most one candidate, else 1 selector byte. +pub fn constrainedSelectorBytes(domain: Domain) usize { + return switch (domain) { + .top => 0, + .values => |vals| if (vals.len <= 1) 0 else 1, + .whole_values => |vals| if (vals.len <= 1) 0 else 1, + .pointers => |ptrs| if (ptrs.len <= 1) 0 else 1, + }; +} + +/// Rejects candidate lists that cannot be indexed with one byte (>256 choices). +pub fn validateConstrainedDomain(domain: Domain) DomainError!void { + switch (domain) { + .top => {}, + .values => |vals| if (vals.len > 256) return error.TooManyCandidates, + .whole_values => |vals| { + if (vals.len == 0) return error.EmptyWholeValuesDomain; + if (vals.len > 256) return error.TooManyCandidates; + }, + .pointers => |ptrs| if (ptrs.len > 256) return error.TooManyCandidates, + } +} + +/// Validates domain-specific constraints; for `whole_values`, checks candidate blob lengths. +pub fn validateFieldDomain(f: Field) DomainError!void { + try validateConstrainedDomain(f.domain); + switch (f.domain) { + .top, .values, .pointers => {}, + .whole_values => |vals| { + const expected = wholeFieldBytes(f); + for (vals) |blob| { + if (blob.len != expected) return error.WholeValuesBlobMismatch; + } + }, + } +} + +pub fn validateGlobalsDomains(globals: []const Global) DomainError!void { + for (globals) |g| { + for (g.fields) |f| { + if (f.is_padding) continue; + try validateFieldDomain(f); + } + } +} + /// A flattened field inside a global. pub const Field = struct { /// Field name with dot-path semantics, e.g. ".a" or ".a_pad". @@ -46,6 +118,13 @@ pub const Field = struct { } break :blk .{ .values = dup_vals }; }, + .whole_values => |vals| blk: { + const dup_vals = try arena.alloc([]const u8, vals.len); + for (vals, 0..) |v, i| { + dup_vals[i] = try arena.dupe(u8, v); + } + break :blk .{ .whole_values = dup_vals }; + }, .pointers => |ptrs| blk: { const dup_ptrs = try arena.alloc([]const u8, ptrs.len); for (ptrs, 0..) |p, i| { @@ -130,6 +209,23 @@ test "Field.updateDomain with .pointers deep-copies" { try std.testing.expectEqualStrings("handler_a", f.domain.pointers[0]); } +test "Field.updateDomain with .whole_values deep-copies" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var f = Field{ .name = "", .bit_width = 16 }; + + var src_buf = "AB".*; + const src_slice: []const u8 = &src_buf; + try f.updateDomain(arena.allocator(), .{ .whole_values = &.{src_slice} }); + + try std.testing.expect(f.domain == .whole_values); + try std.testing.expectEqual(@as(usize, 1), f.domain.whole_values.len); + try std.testing.expectEqualStrings("AB", f.domain.whole_values[0]); + + src_buf[0] = 'Z'; + try std.testing.expectEqualStrings("AB", f.domain.whole_values[0]); +} + test "Field.deinit frees owned memory" { const alloc = std.testing.allocator; const dims = try alloc.alloc(Dimension, 1); @@ -142,6 +238,38 @@ test "Field.deinit frees owned memory" { f.deinit(alloc); } +test "validateConstrainedDomain rejects more than 256 candidates" { + var vals: [257][]const u8 = undefined; + for (&vals) |*v| v.* = "x"; + try std.testing.expectError(error.TooManyCandidates, validateConstrainedDomain(.{ .values = &vals })); +} + +test "validateFieldDomain whole_values checks blob length" { + const f_ok: Field = .{ + .name = ".b", + .bit_width = 16, + .dims = &.{.{ .len = 1, .stride_bytes = 2 }}, + .domain = .{ .whole_values = &.{&[_]u8{ 1, 2 }} }, + }; + try validateFieldDomain(f_ok); + + const f_bad: Field = .{ + .name = ".b", + .bit_width = 16, + .dims = &.{.{ .len = 1, .stride_bytes = 2 }}, + .domain = .{ .whole_values = &.{&[_]u8{1}} }, + }; + try std.testing.expectError(error.WholeValuesBlobMismatch, validateFieldDomain(f_bad)); +} + +test "constrainedSelectorBytes" { + try std.testing.expectEqual(@as(usize, 0), constrainedSelectorBytes(.top)); + try std.testing.expectEqual(@as(usize, 0), constrainedSelectorBytes(.{ .values = &.{"a"} })); + try std.testing.expectEqual(@as(usize, 1), constrainedSelectorBytes(.{ .values = &.{ "a", "b" } })); + try std.testing.expectEqual(@as(usize, 0), constrainedSelectorBytes(.{ .whole_values = &.{&[_]u8{1}} })); + try std.testing.expectEqual(@as(usize, 1), constrainedSelectorBytes(.{ .whole_values = &.{ &[_]u8{1}, &[_]u8{2} } })); +} + test "Global.deinit frees fields, name, source_file, and dims" { const alloc = std.testing.allocator; const fields = try alloc.alloc(Field, 1); diff --git a/src/main.zig b/src/main.zig index 352b45f..55a5098 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,6 +9,7 @@ const clap = @import("clap"); const absolution = @import("absolution"); const Invariant = absolution.Invariant; const Global = absolution.Parser.Global; +const ir = absolution.ir; const seed = absolution.seed; const emit = absolution.emit; @@ -51,6 +52,8 @@ pub fn main() !void { func_symbols = res.func_symbols; } + try ir.validateGlobalsDomains(globals.items); + // Compute needed bytes const needed_bytes = seed.neededBytesFromGlobals(globals.items); diff --git a/src/root.zig b/src/root.zig index d6226f5..d8c4ff6 100644 --- a/src/root.zig +++ b/src/root.zig @@ -9,6 +9,7 @@ pub const Parser = @import("Parser.zig"); pub const seed = @import("seed.zig"); pub const Invariant = @import("Invariant.zig"); pub const emit = @import("cgen/emit.zig"); +pub const ir = @import("cgen/ir.zig"); test { _ = @import("Invariant.zig"); diff --git a/src/seed.zig b/src/seed.zig index ccfbbb6..2130470 100644 --- a/src/seed.zig +++ b/src/seed.zig @@ -7,26 +7,21 @@ const ir = @import("cgen/ir.zig"); pub fn neededBytesFromGlobals(globals: []const ir.Global) usize { var total: usize = 0; for (globals) |g| { - const global_mult = dimsProduct(g.dims); + const global_mult = ir.dimsProduct(g.dims); for (g.fields) |f| { if (f.is_padding) continue; - const field_mult = dimsProduct(f.dims); + const field_mult = ir.dimsProduct(f.dims); const bytes: usize = switch (f.domain) { - .top => (f.bit_width + 7) / 8, - .values, .pointers => 1, + .top => ir.elementBytes(f) * global_mult * field_mult, + .values, .pointers => ir.constrainedSelectorBytes(f.domain) * global_mult * field_mult, + .whole_values => global_mult * (ir.constrainedSelectorBytes(f.domain) + ir.wholeFieldBytes(f)), }; - total += bytes * global_mult * field_mult; + total += bytes; } } return total; } -fn dimsProduct(dims: []const ir.Dimension) usize { - var prod: usize = 1; - for (dims) |d| prod *= d.len; - return prod; -} - // Write a file the size given, no garantee on the content is given pub fn writeSeed(path: []const u8, size: usize) !void { var file = try std.fs.cwd().createFile(path, .{ .truncate = true }); @@ -84,7 +79,33 @@ test "neededBytesFromGlobals counts .top bytes by width" { try std.testing.expectEqual(@as(usize, 4), neededBytesFromGlobals(globals)); } -test "neededBytesFromGlobals counts .values/.pointers as 1 byte" { +test "neededBytesFromGlobals counts multi-candidate .values/.pointers as 1 byte each" { + const fields: []const ir.Field = &.{ + .{ + .name = ".a", + .bit_width = 32, + .is_padding = false, + .domain = .{ .values = &.{ "0xAA", "0xBB" } }, + }, + .{ + .name = ".b", + .bit_width = 64, + .is_padding = false, + .domain = .{ .pointers = &.{ "func", "other" } }, + }, + }; + const globals: []const ir.Global = &.{.{ + .name = "g", + .source_file = "", + .size_bytes = 12, + .is_static = false, + .dims = &.{}, + .fields = @constCast(fields), + }}; + try std.testing.expectEqual(@as(usize, 2), neededBytesFromGlobals(globals)); +} + +test "neededBytesFromGlobals singleton constrained domains use 0 selector bytes" { const fields: []const ir.Field = &.{ .{ .name = ".a", @@ -107,7 +128,29 @@ test "neededBytesFromGlobals counts .values/.pointers as 1 byte" { .dims = &.{}, .fields = @constCast(fields), }}; - try std.testing.expectEqual(@as(usize, 2), neededBytesFromGlobals(globals)); + try std.testing.expectEqual(@as(usize, 0), neededBytesFromGlobals(globals)); +} + +test "neededBytesFromGlobals whole_values counts selector and blob per global instance" { + const fields: []const ir.Field = &.{ + .{ + .name = ".buf", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 2, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 1, 2 }, &[_]u8{ 3, 4 } } }, + }, + }; + const globals: []const ir.Global = &.{.{ + .name = "g", + .source_file = "", + .size_bytes = 6, + .is_static = false, + .dims = &.{.{ .len = 3, .stride_bytes = 2 }}, + .fields = @constCast(fields), + }}; + // global_mult=3, per instance: 1 selector + 2 blob bytes => 3 * 3 = 9 + try std.testing.expectEqual(@as(usize, 9), neededBytesFromGlobals(globals)); } test "neededBytesFromGlobals multiplies by global and field dims" { From 823d8d72f4f6c548a2bd6d0db0360070580aab04 Mon Sep 17 00:00:00 2001 From: Francisco Freitas Date: Mon, 30 Mar 2026 14:28:03 +0000 Subject: [PATCH 2/5] fix: align whole_values fuzzer byte count with sampler --- src/cgen/emit.zig | 118 ++++++++++++++++++ src/cgen/ir.zig | 10 ++ src/seed.zig | 30 ++++- .../whole_field_byte_array_invariant/target.c | 11 ++ .../target.c.in | 18 +++ .../target.c.zon | 15 +++ 6 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 tests/whole_field_byte_array_invariant/target.c create mode 100644 tests/whole_field_byte_array_invariant/target.c.in create mode 100644 tests/whole_field_byte_array_invariant/target.c.zon diff --git a/src/cgen/emit.zig b/src/cgen/emit.zig index 9668514..8273f36 100644 --- a/src/cgen/emit.zig +++ b/src/cgen/emit.zig @@ -826,6 +826,34 @@ test "emitDomainTables with .values field" { try std.testing.expect(std.mem.indexOf(u8, out, "FM_VAL_0_BYTES 1") != null); } +test "emitDomainTables with .whole_values field" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 1, 2, 3, 4 }, &[_]u8{ 5, 6, 7, 8 } } }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 4, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitDomainTables(globals, &file); + var buf: [4096]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0_COUNT 2") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0_BLOB_BYTES 4") != null); +} + test "emitDomainTables with .pointers field" { var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); @@ -935,6 +963,67 @@ test "emitSampler with .values domain" { try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") != null); } +test "emitSampler with .whole_values multi-candidate" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 1, 2, 3, 4 }, &[_]u8{ 5, 6, 7, 8 } } }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 4, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitSampler(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_WVAL_0 = data[off] % 2") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0_BLOB_BYTES") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcpy(&pkt[0], &FM_WVAL_0[idx_FM_WVAL_0 * FM_WVAL_0_BLOB_BYTES], 4)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") != null); + // Whole-field path: no per-element field loops for this domain + try std.testing.expect(std.mem.indexOf(u8, out, "for (size_t i1 = 0;") == null); +} + +test "emitSampler with .whole_values singleton uses no selector byte" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 2, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{&[_]u8{ 0xAA, 0xBB }} }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 2, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitSampler(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "&FM_WVAL_0[0]") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_WVAL_0") == null); + try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") == null); +} + test "emitSampler with .pointers domain" { const alloc = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); @@ -1043,6 +1132,35 @@ test "emitChecker generates padding zero-check" { try std.testing.expect(std.mem.indexOf(u8, out, "return 0;") != null); } +test "emitChecker generates .whole_values validation" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 1, 2, 3, 4 }, &[_]u8{ 5, 6, 7, 8 } } }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 4, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitChecker(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0_COUNT") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "FM_WVAL_0_BLOB_BYTES") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcmp(&pkt[0], &FM_WVAL_0[vi * FM_WVAL_0_BLOB_BYTES], FM_WVAL_0_BLOB_BYTES)") != null); +} + test "emitChecker generates .values validation" { const alloc = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); diff --git a/src/cgen/ir.zig b/src/cgen/ir.zig index 854af5c..185ee3b 100644 --- a/src/cgen/ir.zig +++ b/src/cgen/ir.zig @@ -244,6 +244,16 @@ test "validateConstrainedDomain rejects more than 256 candidates" { try std.testing.expectError(error.TooManyCandidates, validateConstrainedDomain(.{ .values = &vals })); } +test "validateConstrainedDomain rejects empty whole_values" { + try std.testing.expectError(error.EmptyWholeValuesDomain, validateConstrainedDomain(.{ .whole_values = &.{} })); +} + +test "validateConstrainedDomain rejects more than 256 whole_values candidates" { + var blobs: [257][]const u8 = undefined; + for (&blobs) |*b| b.* = &[_]u8{0}; + try std.testing.expectError(error.TooManyCandidates, validateConstrainedDomain(.{ .whole_values = &blobs })); +} + test "validateFieldDomain whole_values checks blob length" { const f_ok: Field = .{ .name = ".b", diff --git a/src/seed.zig b/src/seed.zig index 2130470..bfd8d38 100644 --- a/src/seed.zig +++ b/src/seed.zig @@ -14,7 +14,8 @@ pub fn neededBytesFromGlobals(globals: []const ir.Global) usize { const bytes: usize = switch (f.domain) { .top => ir.elementBytes(f) * global_mult * field_mult, .values, .pointers => ir.constrainedSelectorBytes(f.domain) * global_mult * field_mult, - .whole_values => global_mult * (ir.constrainedSelectorBytes(f.domain) + ir.wholeFieldBytes(f)), + // Blob bytes come from emitted domain tables, not from the fuzzer stream. + .whole_values => global_mult * ir.constrainedSelectorBytes(f.domain), }; total += bytes; } @@ -131,7 +132,7 @@ test "neededBytesFromGlobals singleton constrained domains use 0 selector bytes" try std.testing.expectEqual(@as(usize, 0), neededBytesFromGlobals(globals)); } -test "neededBytesFromGlobals whole_values counts selector and blob per global instance" { +test "neededBytesFromGlobals whole_values counts selector bytes per global instance only" { const fields: []const ir.Field = &.{ .{ .name = ".buf", @@ -149,8 +150,29 @@ test "neededBytesFromGlobals whole_values counts selector and blob per global in .dims = &.{.{ .len = 3, .stride_bytes = 2 }}, .fields = @constCast(fields), }}; - // global_mult=3, per instance: 1 selector + 2 blob bytes => 3 * 3 = 9 - try std.testing.expectEqual(@as(usize, 9), neededBytesFromGlobals(globals)); + // global_mult=3, multi-candidate => 1 selector byte per instance (blobs are static in C) + try std.testing.expectEqual(@as(usize, 3), neededBytesFromGlobals(globals)); +} + +test "neededBytesFromGlobals whole_values singleton uses zero fuzzer bytes per instance" { + const fields: []const ir.Field = &.{ + .{ + .name = ".b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 1 }}, + .domain = .{ .whole_values = &.{&[_]u8{ 1, 2, 3, 4 }} }, + }, + }; + const globals: []const ir.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 4, + .is_static = false, + .dims = &.{.{ .len = 2, .stride_bytes = 4 }}, + .fields = @constCast(fields), + }}; + try std.testing.expectEqual(@as(usize, 0), neededBytesFromGlobals(globals)); } test "neededBytesFromGlobals multiplies by global and field dims" { diff --git a/tests/whole_field_byte_array_invariant/target.c b/tests/whole_field_byte_array_invariant/target.c new file mode 100644 index 0000000..f3b5cef --- /dev/null +++ b/tests/whole_field_byte_array_invariant/target.c @@ -0,0 +1,11 @@ +// Integration test: whole-field `.whole_values` on an inner byte array (field dims). + +typedef struct { + unsigned char b[8]; +} packet_t; + +packet_t pkt; + +void AbsolutionTestRegression(void) { + (void)pkt; +} diff --git a/tests/whole_field_byte_array_invariant/target.c.in b/tests/whole_field_byte_array_invariant/target.c.in new file mode 100644 index 0000000..d7e5703 --- /dev/null +++ b/tests/whole_field_byte_array_invariant/target.c.in @@ -0,0 +1,18 @@ +.{.{ + .name = "pkt", + .source_file = "tests/whole_field_byte_array_invariant/target.c", + .size_bytes = 8, + .is_static = false, + .dims = .{}, + .fields = .{.{ + .name = ".b", + .offset_bits = 0, + .bit_width = 8, + .dims = .{.{ .len = 8, .stride_bytes = 1 }}, + .is_padding = false, + .domain = .{ .whole_values = .{ + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + } }, + }}, +}} diff --git a/tests/whole_field_byte_array_invariant/target.c.zon b/tests/whole_field_byte_array_invariant/target.c.zon new file mode 100644 index 0000000..43e7cfb --- /dev/null +++ b/tests/whole_field_byte_array_invariant/target.c.zon @@ -0,0 +1,15 @@ +.{.{ + .name = "pkt", + .source_file = "tests/whole_field_byte_array_invariant/target.c", + .size_bytes = 8, + .is_static = false, + .dims = .{}, + .fields = .{.{ + .name = ".b", + .offset_bits = 0, + .bit_width = 8, + .dims = .{.{ .len = 8, .stride_bytes = 1 }}, + .is_padding = false, + .domain = .{ .whole_values = .{ "\x00\x01\x02\x03\x04\x05\x06\x07", "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" } }, + }}, +}} From 0339efe59c467d20d15c26f09f355a0c9388e703 Mon Sep 17 00:00:00 2001 From: Francisco Freitas Date: Mon, 30 Mar 2026 14:31:30 +0000 Subject: [PATCH 3/5] docs: document whole_values domain and selector rules --- CONTRIBUTING.md | 6 ++--- README.md | 2 +- USAGE.md | 37 ++++++++++++++++++++++++++++--- example/protocol_parser/README.md | 5 +++++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e112061..8e42689 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,7 +71,7 @@ Handles `.zon` invariant files: ### `cgen/ir.zig` Core data structures: -- `Domain`: `.top`, `.values`, `.pointers` +- `Domain`: `.top`, `.values`, `.whole_values`, `.pointers` - `Field`: Flattened field with name, width, dimensions, domain - `Global`: Named global with dimensions and fields @@ -156,8 +156,8 @@ Padding is detected from layout gaps and emitted as `._padN` fields. The emitter produces C code with: - Nested loops for array dimensions (global and field) -- Static arrays for `.values` and `.pointers` domains -- Index-based selection for constrained domains +- Static tables for `.values`, `.whole_values`, and `.pointers` domains +- Index-based selection for constrained domains; `.whole_values` copies one blob per field instance ## Code Style diff --git a/README.md b/README.md index a526920..54dd69d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Absolution lets you specify an invariant for a program’s global state and fuzz 1. Parse globals from your C translation unit(s) using [aro](https://github.com/Vexu/aro). 2. Build flattened globals containing fields, padding, and domains. -3. Optionally apply a `.zon` invariant to constrain field values. +3. Optionally apply a `.zon` invariant to constrain field values (per-element `.values` / `.pointers`, or whole-field blobs with `.whole_values` on array-shaped fields; see [USAGE.md](USAGE.md)). 4. Emit `fuzzer.c` with sampling, invariant checking, and libFuzzer entrypoint. 5. Emit a symbol redefinition file for `objcopy` (handles `static` globals across translation units). 6. Write an optional seed file sized to the required random bytes. diff --git a/USAGE.md b/USAGE.md index 3f6d3f5..30083c6 100644 --- a/USAGE.md +++ b/USAGE.md @@ -173,9 +173,17 @@ fields: | Domain | Description | Fuzzer bytes used | |--------|-------------|-------------------| -| `.top` | Unconstrained bytes from fuzzer input | `bit_width / 8` | -| `.values` | Fixed literal values (hex strings) | 1 (index selection) | -| `.pointers` | Addresses of listed symbols | 1 (index selection) | +| `.top` | Unconstrained bytes from fuzzer input | `element_bytes ×` global instances `×` field instances (see dimensions below) | +| `.values` | Fixed literal values (hex strings), **per scalar element** | `0` if there is at most one candidate, else `1` selector byte **per element** (each index in the field’s `.dims`, times global array instances) | +| `.whole_values` | Fixed set of **full field-instance** byte blobs (covers the entire field span, including all of the field’s `.dims`) | `0` if there is at most one candidate, else `1` selector byte **per field instance** (global array instances only; not once per inner array element) | +| `.pointers` | Addresses of listed symbols (per element, same indexing as `.values`) | Same selector rule as `.values` | + +Constrained domains (`.values`, `.whole_values`, `.pointers`) allow at most **256** candidates; each multi-candidate domain uses a single selector byte to pick an index. + +**When to use `.values` vs `.whole_values` for array-shaped fields** + +- Use **`.values`** when each array element should be chosen independently from the same small set (or when the field is scalar). The sampler loops over dimensions and spends up to one selector byte per element. +- Use **`.whole_values`** when the entire array (or blob) must be one of a few fixed byte patterns end-to-end. Each candidate blob’s length must equal the field’s total byte span: `(bit_width / 8) × ∏` field dimension lengths. Do not rely on candidate string length alone to imply whole-field semantics; encode intent explicitly with `.whole_values`. ### Example @@ -224,6 +232,29 @@ fields: } } ``` +Whole-field value example (`uint8_t b[8]` must be exactly one of two 8-byte patterns; one selector byte for the field, not eight): + +```zig +.{.{ + .name = "pkt", + .source_file = "my_module.c", + .size_bytes = 8, + .is_static = false, + .dims = .{}, + .fields = .{.{ + .name = ".b", + .offset_bits = 0, + .bit_width = 8, + .dims = .{.{ .len = 8, .stride_bytes = 1 }}, + .is_padding = false, + .domain = .{ .whole_values = .{ + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + } }, + }}, +}} +``` + ### Field naming conventions - **Scalar fields**: `.field_name` diff --git a/example/protocol_parser/README.md b/example/protocol_parser/README.md index 95bc463..6f243e0 100644 --- a/example/protocol_parser/README.md +++ b/example/protocol_parser/README.md @@ -31,6 +31,11 @@ decode → validate → process pipeline. The generated `LLVMFuzzerTestOneInput` calls `sample_invariant()` to fill both structs from fuzzer input, then passes the remaining bytes to `FuzzDecode()`. +To lock globals to specific value sets, export a `.zon` with `--zon`, edit +`.domain` (e.g. per-element `.values` or whole-array `.whole_values`), and pass +that file as the invariant when regenerating; see [USAGE.md](../../USAGE.md) for +domain encoding and fuzzer-byte accounting. + ### Compile definitions `decoder.c` uses a compile-time `PROTO_MAX_VERSIONS` define (guarded by From 738c8098d0d22833001ecbe56bcef6b1d597845e Mon Sep 17 00:00:00 2001 From: Aymeric Robert Date: Thu, 9 Apr 2026 13:57:44 +0200 Subject: [PATCH 4/5] test: document strided whole_values repro --- .../README.md | 50 +++++++++++++++++++ .../target.c | 14 ++++++ .../target.c.in | 18 +++++++ 3 files changed, 82 insertions(+) create mode 100644 tests/whole_field_strided_struct_member/README.md create mode 100644 tests/whole_field_strided_struct_member/target.c create mode 100644 tests/whole_field_strided_struct_member/target.c.in diff --git a/tests/whole_field_strided_struct_member/README.md b/tests/whole_field_strided_struct_member/README.md new file mode 100644 index 0000000..b14a14d --- /dev/null +++ b/tests/whole_field_strided_struct_member/README.md @@ -0,0 +1,50 @@ +# Test: whole_field_strided_struct_member + +## Purpose + +Demonstrate the limitation of whole field on a strided struct member. + +## Category + +edge-case + +## Issue + +When applying a multi-value whole field domain on an array of struct where the member field is strided, the generated sampler/checker considers the field as contiguous in memory, effectively corrupting the intended layout of the domain. + +## Symptom + +The parser understands the field correctly as strided: + +```zig +.name = ".items.b", +.dims = .{.{ .len = 4, .stride_bytes = 4 }}, +``` + +The prefix accounting also looks correct: + +```c +#define ABSOLUTION_GLOBALS_SIZE 1 +``` + +The suspicious part is the generated `whole_values` code: + +```c +memcpy(&pkt[0], &FM_WVAL_0[idx_FM_WVAL_0 * FM_WVAL_0_BLOB_BYTES], 4); +if (memcmp(&pkt[0], &FM_WVAL_0[vi * FM_WVAL_0_BLOB_BYTES], FM_WVAL_0_BLOB_BYTES) == 0) { found = 1; break; } +``` + +## Root Cause + +`src/cgen/emit.zig` handles `.values` per element through the field-dimension loop, but `.whole_values` skips that path and copies/checks a single dense blob starting from the base field offset. + +For `.items.b` with `.len = 4` and `.stride_bytes = 4`, the logical bytes live at offsets `0, 4, 8, 12`. The generated `whole_values` code instead copies/checks bytes `0..3` contiguously from `&pkt[0]`. + +So the issue is not parsing or selector-byte count, it is the final application/check treating a strided field like a dense blob. + +## Notes + +- Dense array fields such as `uint8_t b[8]` are not affected by this issue. +- This issue appears when the field stride is larger than the element size, for example a field inside an array of structs with other members or padding between consecutive elements. +- Until this is fixed, prefer `.values` over `.whole_values` for strided fields. + diff --git a/tests/whole_field_strided_struct_member/target.c b/tests/whole_field_strided_struct_member/target.c new file mode 100644 index 0000000..9ec1ada --- /dev/null +++ b/tests/whole_field_strided_struct_member/target.c @@ -0,0 +1,14 @@ +// Strided whole-field case: logical field `.items.b` lives at offsets 0, 4, 8, 12. +// The extra bytes are compiler padding created by the explicit alignment. + +typedef struct __attribute__((aligned(4))) { + unsigned char b; +} slot_t; + +typedef struct { + slot_t items[4]; +} packet_t; + +typedef char slot_t_must_be_4_bytes[(sizeof(slot_t) == 4) ? 1 : -1]; + +packet_t pkt; diff --git a/tests/whole_field_strided_struct_member/target.c.in b/tests/whole_field_strided_struct_member/target.c.in new file mode 100644 index 0000000..dc963fe --- /dev/null +++ b/tests/whole_field_strided_struct_member/target.c.in @@ -0,0 +1,18 @@ +.{.{ + .name = "pkt", + .source_file = "tests/whole_field_strided_struct_member/target.c", + .size_bytes = 16, + .is_static = false, + .dims = .{}, + .fields = .{.{ + .name = ".items.b", + .offset_bits = 0, + .bit_width = 8, + .dims = .{.{ .len = 4, .stride_bytes = 4 }}, + .is_padding = false, + .domain = .{ .whole_values = .{ + "\x10\x20\x30\x40", + "\xa0\xb0\xc0\xd0", + } }, + }}, +}} From ac930b90552830f6fb5264babd45f2c3ffcc4ed6 Mon Sep 17 00:00:00 2001 From: Francisco Freitas Date: Thu, 9 Apr 2026 12:30:17 +0000 Subject: [PATCH 5/5] fix(emit): correct whole_values emission for strided struct members Signed-off-by: Francisco Freitas --- src/cgen/emit.zig | 292 +++++++++++++++--- src/cgen/ir.zig | 62 ++++ .../README.md | 45 +-- 3 files changed, 319 insertions(+), 80 deletions(-) diff --git a/src/cgen/emit.zig b/src/cgen/emit.zig index 8273f36..6f13dac 100644 --- a/src/cgen/emit.zig +++ b/src/cgen/emit.zig @@ -90,6 +90,32 @@ fn mangleName(allocator: std.mem.Allocator, path: []const u8, symbol: []const u8 return std.fmt.allocPrint(allocator, "{s}_{s}", .{ sanitized, symbol }); } +/// Build a C expression that indexes into a dense whole_values blob using the +/// loop variables from the field dimensions. The blob stores elements +/// sequentially (no stride gaps), so the offset for element (i_k, i_k+1, ...) +/// is: src_base + (i_k * inner_product_k+1 + i_k+1 * inner_product_k+2 + ...) * elem_bytes +fn emitBlobOffsetExpr( + buf: []u8, + src_base: []const u8, + field_dims: []const ir.Dimension, + global_dims_len: usize, + elem_bytes: usize, +) ![]const u8 { + var stream = std.io.fixedBufferStream(buf); + const w = stream.writer(); + try w.writeAll(src_base); + + var inner_product: usize = elem_bytes; + var i: usize = field_dims.len; + while (i > 0) { + i -= 1; + const idx = global_dims_len + i; + try w.print(" + i{d} * {d}", .{ idx, inner_product }); + inner_product *= field_dims[i].len; + } + return stream.getWritten(); +} + /// Generate offset calculation string: "base + i0*s0 + i1*s1 + ..." fn emitOffsetCalc( allocator: std.mem.Allocator, @@ -336,12 +362,6 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil const vals = f.domain.whole_values; if (vals.len == 0) continue; - const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); - defer allocator.free(offset_expr); - const dst_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); - defer allocator.free(dst_expr); - const blob_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{ir.wholeFieldBytes(f)}); - const current_depth = loop_stack.depth(); var label_buf: [64]u8 = undefined; const label = try std.fmt.bufPrint(&label_buf, "FM_WVAL_{d}", .{wval_idx}); @@ -355,14 +375,56 @@ fn emitSampler(allocator: std.mem.Allocator, globals: []const Parser.Global, fil const count_str = try std.fmt.bufPrint(&num_buf, "{d}", .{vals.len}); try file.writeAll(count_str); try file.writeAll(";\n"); - var src_buf: [256]u8 = undefined; - const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s} * {s}_BLOB_BYTES]", .{ label, label, label }); - try emitMemcpy(file, current_depth, dst_expr, src, blob_str); - try incrementOffset(file, current_depth, "1"); + } + + if (ir.isWholeFieldDense(f)) { + const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + const dst_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); + defer allocator.free(dst_expr); + const blob_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{ir.wholeFieldBytes(f)}); + + if (vals.len > 1) { + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[idx_{s} * {s}_BLOB_BYTES]", .{ label, label, label }); + try emitMemcpy(file, current_depth, dst_expr, src, blob_str); + } else { + var src_buf: [256]u8 = undefined; + const src = try std.fmt.bufPrint(&src_buf, "&{s}[0]", .{label}); + try emitMemcpy(file, current_depth, dst_expr, src, blob_str); + } } else { + const src_base = if (vals.len > 1) + try std.fmt.allocPrint(allocator, "idx_{s} * {s}_BLOB_BYTES", .{ label, label }) + else + try std.fmt.allocPrint(allocator, "0", .{}); + defer allocator.free(src_base); + + for (f.dims, 0..) |d, fi| { + const i = global_dims_len + fi; + try loop_stack.openLoop(d, i); + } + const inner_depth = loop_stack.depth(); + + const offset_expr = try emitOffsetCalc(allocator, g.dims, f.dims, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + const dst_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); + defer allocator.free(dst_expr); + + var blob_off_buf: [256]u8 = undefined; + const elem_bytes = ir.elementBytes(f); + const blob_off_expr = try emitBlobOffsetExpr(&blob_off_buf, src_base, f.dims, global_dims_len, elem_bytes); var src_buf: [256]u8 = undefined; - const src = try std.fmt.bufPrint(&src_buf, "&{s}[0]", .{label}); - try emitMemcpy(file, current_depth, dst_expr, src, blob_str); + const src = try std.fmt.bufPrint(&src_buf, "&{s}[{s}]", .{ label, blob_off_expr }); + + const eb_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{elem_bytes}); + try emitMemcpy(file, inner_depth, dst_expr, src, eb_str); + + try loop_stack.closeLoops(field_dims_len); + } + + if (vals.len > 1) { + try incrementOffset(file, current_depth, "1"); } continue; } @@ -520,40 +582,99 @@ fn emitChecker(allocator: std.mem.Allocator, globals: []const Parser.Global, fil const vals = f.domain.whole_values; if (vals.len == 0) continue; - const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); - defer allocator.free(offset_expr); const current_depth = loop_stack.depth(); var label_buf: [64]u8 = undefined; const label = try std.fmt.bufPrint(&label_buf, "FM_WVAL_{d}", .{wval_idx}); wval_idx += 1; - try writeIndent(file, current_depth); - try file.writeAll("{\n"); - try writeIndent(file, current_depth + 1); - try file.writeAll("int found = 0;\n"); - try writeIndent(file, current_depth + 1); - try file.writeAll("for (size_t vi = 0; vi < "); - try file.writeAll(label); - try file.writeAll("_COUNT; vi++) {\n"); - try writeIndent(file, current_depth + 2); - try file.writeAll("if (memcmp(&"); - try file.writeAll(mangled); - try file.writeAll("["); - try file.writeAll(offset_expr); - try file.writeAll("], &"); - try file.writeAll(label); - try file.writeAll("[vi * "); - try file.writeAll(label); - try file.writeAll("_BLOB_BYTES], "); - try file.writeAll(label); - try file.writeAll("_BLOB_BYTES) == 0) { found = 1; break; }\n"); - try writeIndent(file, current_depth + 1); - try file.writeAll("}\n"); - try writeIndent(file, current_depth + 1); - try file.writeAll("if (!found) return -1;\n"); - try writeIndent(file, current_depth); - try file.writeAll("}\n"); + if (ir.isWholeFieldDense(f)) { + const offset_expr = try emitOffsetCalc(allocator, g.dims, &.{}, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + + try writeIndent(file, current_depth); + try file.writeAll("{\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("int found = 0;\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("for (size_t vi = 0; vi < "); + try file.writeAll(label); + try file.writeAll("_COUNT; vi++) {\n"); + try writeIndent(file, current_depth + 2); + try file.writeAll("if (memcmp(&"); + try file.writeAll(mangled); + try file.writeAll("["); + try file.writeAll(offset_expr); + try file.writeAll("], &"); + try file.writeAll(label); + try file.writeAll("[vi * "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES], "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES) == 0) { found = 1; break; }\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("}\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("if (!found) return -1;\n"); + try writeIndent(file, current_depth); + try file.writeAll("}\n"); + } else { + const blob_bytes = ir.wholeFieldBytes(f); + const blob_str = try std.fmt.bufPrint(&bytes_buf, "{d}", .{blob_bytes}); + const elem_bytes = ir.elementBytes(f); + + try writeIndent(file, current_depth); + try file.writeAll("{\n"); + + // Declare a local buffer and gather strided elements into it. + try writeIndent(file, current_depth + 1); + try file.writeAll("uint8_t wvbuf["); + try file.writeAll(blob_str); + try file.writeAll("];\n"); + + for (f.dims, 0..) |d, fi| { + const i = global_dims_len + fi; + try loop_stack.openLoop(d, i); + } + const gather_depth = loop_stack.depth(); + + const offset_expr = try emitOffsetCalc(allocator, g.dims, f.dims, @intCast(f.offset_bits / 8)); + defer allocator.free(offset_expr); + const src_expr = try std.fmt.allocPrint(allocator, "&{s}[{s}]", .{ mangled, offset_expr }); + defer allocator.free(src_expr); + + var blob_off_buf: [256]u8 = undefined; + const blob_off_expr = try emitBlobOffsetExpr(&blob_off_buf, "0", f.dims, global_dims_len, elem_bytes); + var dst_buf: [256]u8 = undefined; + const dst_expr = try std.fmt.bufPrint(&dst_buf, "&wvbuf[{s}]", .{blob_off_expr}); + + var eb_buf: [64]u8 = undefined; + const eb_str_2 = try std.fmt.bufPrint(&eb_buf, "{d}", .{elem_bytes}); + try emitMemcpy(file, gather_depth, dst_expr, src_expr, eb_str_2); + + try loop_stack.closeLoops(field_dims_len); + + try writeIndent(file, current_depth + 1); + try file.writeAll("int found = 0;\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("for (size_t vi = 0; vi < "); + try file.writeAll(label); + try file.writeAll("_COUNT; vi++) {\n"); + try writeIndent(file, current_depth + 2); + try file.writeAll("if (memcmp(wvbuf, &"); + try file.writeAll(label); + try file.writeAll("[vi * "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES], "); + try file.writeAll(label); + try file.writeAll("_BLOB_BYTES) == 0) { found = 1; break; }\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("}\n"); + try writeIndent(file, current_depth + 1); + try file.writeAll("if (!found) return -1;\n"); + try writeIndent(file, current_depth); + try file.writeAll("}\n"); + } continue; } @@ -1024,6 +1145,66 @@ test "emitSampler with .whole_values singleton uses no selector byte" { try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") == null); } +test "emitSampler with strided .whole_values scatters elements" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".items.b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 4 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 0x10, 0x20, 0x30, 0x40 }, &[_]u8{ 0xa0, 0xb0, 0xc0, 0xd0 } } }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 16, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitSampler(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_WVAL_0 = data[off] % 2") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "for (size_t i0 = 0; i0 < 4; i0++)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcpy(&pkt[0 + i0 * 4], &FM_WVAL_0[idx_FM_WVAL_0 * FM_WVAL_0_BLOB_BYTES + i0 * 1], 1)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") != null); +} + +test "emitSampler with strided .whole_values singleton scatters without selector" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".items.b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 4 }}, + .domain = .{ .whole_values = &.{&[_]u8{ 0x10, 0x20, 0x30, 0x40 }} }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 16, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitSampler(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "for (size_t i0 = 0; i0 < 4; i0++)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcpy(&pkt[0 + i0 * 4], &FM_WVAL_0[0 + i0 * 1], 1)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "idx_FM_WVAL_0") == null); + try std.testing.expect(std.mem.indexOf(u8, out, "off += 1") == null); +} + test "emitSampler with .pointers domain" { const alloc = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); @@ -1161,6 +1342,37 @@ test "emitChecker generates .whole_values validation" { try std.testing.expect(std.mem.indexOf(u8, out, "memcmp(&pkt[0], &FM_WVAL_0[vi * FM_WVAL_0_BLOB_BYTES], FM_WVAL_0_BLOB_BYTES)") != null); } +test "emitChecker generates strided .whole_values gather-then-compare" { + const alloc = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + var file = try createTmpFile(&tmp, "t.c"); + defer file.close(); + const fields: []Parser.Field = @constCast(&[_]Parser.Field{.{ + .name = ".items.b", + .bit_width = 8, + .is_padding = false, + .dims = &.{.{ .len = 4, .stride_bytes = 4 }}, + .domain = .{ .whole_values = &.{ &[_]u8{ 0x10, 0x20, 0x30, 0x40 }, &[_]u8{ 0xa0, 0xb0, 0xc0, 0xd0 } } }, + }}); + const globals: []const Parser.Global = &.{.{ + .name = "pkt", + .source_file = "", + .size_bytes = 16, + .is_static = false, + .dims = &.{}, + .fields = fields, + }}; + try emitChecker(alloc, globals, &file); + var buf: [8192]u8 = undefined; + const out = try readTmpFile(&tmp, "t.c", &buf); + try std.testing.expect(std.mem.indexOf(u8, out, "uint8_t wvbuf[4]") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "for (size_t i0 = 0; i0 < 4; i0++)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcpy(&wvbuf[0 + i0 * 1], &pkt[0 + i0 * 4], 1)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "memcmp(wvbuf, &FM_WVAL_0[vi * FM_WVAL_0_BLOB_BYTES], FM_WVAL_0_BLOB_BYTES)") != null); + try std.testing.expect(std.mem.indexOf(u8, out, "if (!found) return -1") != null); +} + test "emitChecker generates .values validation" { const alloc = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); diff --git a/src/cgen/ir.zig b/src/cgen/ir.zig index 185ee3b..0e6cd1a 100644 --- a/src/cgen/ir.zig +++ b/src/cgen/ir.zig @@ -41,6 +41,21 @@ pub fn wholeFieldBytes(f: Field) usize { return elementBytes(f) * dimsProduct(f.dims); } +/// True when the field's array elements are packed without gaps, so a single +/// contiguous memcpy/memcmp over `wholeFieldBytes` is correct. False when +/// stride > element size (e.g. a member inside an array-of-structs with padding). +pub fn isWholeFieldDense(f: Field) bool { + if (f.dims.len == 0) return true; + var expected_stride: usize = elementBytes(f); + var i: usize = f.dims.len; + while (i > 0) { + i -= 1; + if (f.dims[i].stride_bytes != expected_stride) return false; + expected_stride *= f.dims[i].len; + } + return true; +} + /// For constrained domains: 0 bytes when there is at most one candidate, else 1 selector byte. pub fn constrainedSelectorBytes(domain: Domain) usize { return switch (domain) { @@ -272,6 +287,53 @@ test "validateFieldDomain whole_values checks blob length" { try std.testing.expectError(error.WholeValuesBlobMismatch, validateFieldDomain(f_bad)); } +test "isWholeFieldDense scalar" { + const f: Field = .{ .name = ".x", .bit_width = 32 }; + try std.testing.expect(isWholeFieldDense(f)); +} + +test "isWholeFieldDense dense 1D" { + const f: Field = .{ + .name = ".b", + .bit_width = 8, + .dims = &.{.{ .len = 4, .stride_bytes = 1 }}, + }; + try std.testing.expect(isWholeFieldDense(f)); +} + +test "isWholeFieldDense strided 1D" { + const f: Field = .{ + .name = ".b", + .bit_width = 8, + .dims = &.{.{ .len = 4, .stride_bytes = 4 }}, + }; + try std.testing.expect(!isWholeFieldDense(f)); +} + +test "isWholeFieldDense dense 2D" { + const f: Field = .{ + .name = ".b", + .bit_width = 8, + .dims = &.{ + .{ .len = 3, .stride_bytes = 4 }, + .{ .len = 4, .stride_bytes = 1 }, + }, + }; + try std.testing.expect(isWholeFieldDense(f)); +} + +test "isWholeFieldDense strided 2D" { + const f: Field = .{ + .name = ".b", + .bit_width = 8, + .dims = &.{ + .{ .len = 3, .stride_bytes = 8 }, + .{ .len = 4, .stride_bytes = 1 }, + }, + }; + try std.testing.expect(!isWholeFieldDense(f)); +} + test "constrainedSelectorBytes" { try std.testing.expectEqual(@as(usize, 0), constrainedSelectorBytes(.top)); try std.testing.expectEqual(@as(usize, 0), constrainedSelectorBytes(.{ .values = &.{"a"} })); diff --git a/tests/whole_field_strided_struct_member/README.md b/tests/whole_field_strided_struct_member/README.md index b14a14d..4b1ada0 100644 --- a/tests/whole_field_strided_struct_member/README.md +++ b/tests/whole_field_strided_struct_member/README.md @@ -2,49 +2,14 @@ ## Purpose -Demonstrate the limitation of whole field on a strided struct member. +Verify that `.whole_values` correctly scatters/gathers elements for strided struct members (stride > element size). ## Category -edge-case +edge-case (fixed) -## Issue +## Background -When applying a multi-value whole field domain on an array of struct where the member field is strided, the generated sampler/checker considers the field as contiguous in memory, effectively corrupting the intended layout of the domain. - -## Symptom - -The parser understands the field correctly as strided: - -```zig -.name = ".items.b", -.dims = .{.{ .len = 4, .stride_bytes = 4 }}, -``` - -The prefix accounting also looks correct: - -```c -#define ABSOLUTION_GLOBALS_SIZE 1 -``` - -The suspicious part is the generated `whole_values` code: - -```c -memcpy(&pkt[0], &FM_WVAL_0[idx_FM_WVAL_0 * FM_WVAL_0_BLOB_BYTES], 4); -if (memcmp(&pkt[0], &FM_WVAL_0[vi * FM_WVAL_0_BLOB_BYTES], FM_WVAL_0_BLOB_BYTES) == 0) { found = 1; break; } -``` - -## Root Cause - -`src/cgen/emit.zig` handles `.values` per element through the field-dimension loop, but `.whole_values` skips that path and copies/checks a single dense blob starting from the base field offset. - -For `.items.b` with `.len = 4` and `.stride_bytes = 4`, the logical bytes live at offsets `0, 4, 8, 12`. The generated `whole_values` code instead copies/checks bytes `0..3` contiguously from `&pkt[0]`. - -So the issue is not parsing or selector-byte count, it is the final application/check treating a strided field like a dense blob. - -## Notes - -- Dense array fields such as `uint8_t b[8]` are not affected by this issue. -- This issue appears when the field stride is larger than the element size, for example a field inside an array of structs with other members or padding between consecutive elements. -- Until this is fixed, prefer `.values` over `.whole_values` for strided fields. +For `.items.b` with `.len = 4` and `.stride_bytes = 4`, the logical bytes live at memory offsets `0, 4, 8, 12`. The emitter must scatter each blob byte to its strided position in the sampler and gather them back before comparison in the checker — not treat the blob as a single contiguous run at the base offset. +Dense array fields (`stride == element size`) are unaffected; those use a single `memcpy`/`memcmp`.