Skip to content

Commit 62d6aff

Browse files
authored
feat: Cache-aligned bytecode storage (#343)
1 parent d12c163 commit 62d6aff

14 files changed

Lines changed: 350 additions & 39 deletions

File tree

crates/plotnik-cli/src/commands/dump.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ pub fn run(args: DumpArgs) {
8282
query.emit().expect("bytecode emission failed")
8383
};
8484

85-
let module = Module::from_bytes(bytecode).expect("module loading failed");
85+
let module = Module::load(&bytecode).expect("module loading failed");
8686
let colors = Colors::new(args.color);
8787
print!("{}", dump(&module, colors));
8888
}

crates/plotnik-cli/src/commands/infer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pub fn run(args: InferArgs) {
9696
}
9797
query.emit().expect("bytecode emission failed")
9898
};
99-
let module = Module::from_bytes(bytecode).expect("module loading failed");
99+
let module = Module::load(&bytecode).expect("module loading failed");
100100

101101
// Emit TypeScript types
102102
let void_type = match args.void_type.as_deref() {

crates/plotnik-cli/src/commands/run_common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ pub fn prepare_query(input: QueryInput) -> PreparedQuery {
194194
}
195195

196196
let bytecode = emit_linked(&query).expect("emit failed");
197-
let module = Module::from_bytes(bytecode).expect("module load failed");
197+
let module = Module::load(&bytecode).expect("module load failed");
198198

199199
let entrypoint = resolve_entrypoint(&module, input.entry);
200200
let tree = lang.parse(&source_code);
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//! 64-byte aligned storage for bytecode.
2+
//!
3+
//! Bytecode sections are 64-byte aligned internally. For this alignment to be
4+
//! meaningful at runtime, the buffer itself must start at a 64-byte boundary.
5+
//! Standard `Vec<u8>` provides no alignment guarantees for `u8`.
6+
7+
use std::ops::Deref;
8+
9+
/// Alignment for bytecode buffers (matches `SECTION_ALIGN`).
10+
pub const ALIGN: usize = 64;
11+
12+
/// 64-byte aligned block for bytecode storage.
13+
#[repr(C, align(64))]
14+
#[derive(Clone, Copy)]
15+
struct Block([u8; 64]);
16+
17+
/// Immutable 64-byte aligned byte storage.
18+
///
19+
/// Uses `Vec<Block>` internally — Vec guarantees element alignment,
20+
/// so the data starts at a 64-byte boundary. No custom allocator needed.
21+
pub struct AlignedVec {
22+
blocks: Vec<Block>,
23+
len: usize,
24+
}
25+
26+
impl AlignedVec {
27+
/// Copy bytes into aligned storage.
28+
pub fn copy_from_slice(bytes: &[u8]) -> Self {
29+
if bytes.is_empty() {
30+
return Self {
31+
blocks: Vec::new(),
32+
len: 0,
33+
};
34+
}
35+
36+
let num_blocks = bytes.len().div_ceil(64);
37+
let mut blocks = vec![Block([0; 64]); num_blocks];
38+
39+
// Copy block by block to stay safe
40+
for (i, chunk) in bytes.chunks(64).enumerate() {
41+
blocks[i].0[..chunk.len()].copy_from_slice(chunk);
42+
}
43+
44+
Self {
45+
blocks,
46+
len: bytes.len(),
47+
}
48+
}
49+
50+
/// Read a file into aligned storage.
51+
pub fn from_file(path: impl AsRef<std::path::Path>) -> std::io::Result<Self> {
52+
let bytes = std::fs::read(path)?;
53+
Ok(Self::copy_from_slice(&bytes))
54+
}
55+
56+
/// Number of bytes stored.
57+
pub fn len(&self) -> usize {
58+
self.len
59+
}
60+
61+
/// Check if empty.
62+
pub fn is_empty(&self) -> bool {
63+
self.len == 0
64+
}
65+
66+
/// View as byte slice.
67+
pub fn as_slice(&self) -> &[u8] {
68+
if self.blocks.is_empty() {
69+
return &[];
70+
}
71+
if self.len > self.blocks.len() * 64 {
72+
panic!(
73+
"AlignedVec invariant violated: len {} exceeds capacity {}",
74+
self.len,
75+
self.blocks.len() * 64
76+
);
77+
}
78+
// SAFETY: Block is repr(C) with only [u8; 64], so pointer cast is valid.
79+
// We only expose `len` bytes, which were initialized in copy_from_slice.
80+
unsafe { std::slice::from_raw_parts(self.blocks.as_ptr() as *const u8, self.len) }
81+
}
82+
}
83+
84+
impl Deref for AlignedVec {
85+
type Target = [u8];
86+
87+
fn deref(&self) -> &[u8] {
88+
self.as_slice()
89+
}
90+
}
91+
92+
impl Clone for AlignedVec {
93+
fn clone(&self) -> Self {
94+
Self {
95+
blocks: self.blocks.clone(),
96+
len: self.len,
97+
}
98+
}
99+
}
100+
101+
impl std::fmt::Debug for AlignedVec {
102+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103+
f.debug_struct("AlignedVec")
104+
.field("len", &self.len)
105+
.field("aligned", &(self.blocks.as_ptr() as usize).is_multiple_of(ALIGN))
106+
.finish()
107+
}
108+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use super::aligned_vec::{AlignedVec, ALIGN};
2+
3+
fn is_aligned(ptr: *const u8) -> bool {
4+
(ptr as usize).is_multiple_of(ALIGN)
5+
}
6+
7+
#[test]
8+
fn alignment_guarantee() {
9+
let data: Vec<u8> = (0..100).collect();
10+
let vec = AlignedVec::copy_from_slice(&data);
11+
assert!(is_aligned(vec.as_ptr()));
12+
}
13+
14+
#[test]
15+
fn copy_from_slice() {
16+
let data = [1u8, 2, 3, 4, 5];
17+
let vec = AlignedVec::copy_from_slice(&data);
18+
19+
assert!(is_aligned(vec.as_ptr()));
20+
assert_eq!(&*vec, &data);
21+
}
22+
23+
#[test]
24+
fn empty_slice() {
25+
let vec = AlignedVec::copy_from_slice(&[]);
26+
assert!(vec.is_empty());
27+
assert_eq!(vec.len(), 0);
28+
assert_eq!(vec.as_slice(), &[] as &[u8]);
29+
}
30+
31+
#[test]
32+
fn clone_preserves_alignment() {
33+
let data: Vec<u8> = (0..100).collect();
34+
let vec = AlignedVec::copy_from_slice(&data);
35+
let cloned = vec.clone();
36+
37+
assert!(is_aligned(cloned.as_ptr()));
38+
assert_eq!(&*cloned, &*vec);
39+
}
40+
41+
#[test]
42+
fn deref_to_slice() {
43+
let vec = AlignedVec::copy_from_slice(&[10, 20, 30]);
44+
45+
let slice: &[u8] = &vec;
46+
assert_eq!(slice, &[10, 20, 30]);
47+
assert_eq!(vec[0], 10);
48+
assert_eq!(vec[2], 30);
49+
}
50+
51+
#[test]
52+
fn large_data() {
53+
let data: Vec<u8> = (0..10_000).map(|i| (i % 256) as u8).collect();
54+
let vec = AlignedVec::copy_from_slice(&data);
55+
56+
assert!(is_aligned(vec.as_ptr()));
57+
assert_eq!(&*vec, &data[..]);
58+
}
59+
60+
#[test]
61+
fn partial_block() {
62+
let data: Vec<u8> = (0..37).collect();
63+
let vec = AlignedVec::copy_from_slice(&data);
64+
65+
assert_eq!(vec.len(), 37);
66+
assert_eq!(&*vec, &data[..]);
67+
}
68+
69+
#[test]
70+
fn exact_block_boundary() {
71+
let data: Vec<u8> = (0..128).map(|i| i as u8).collect();
72+
let vec = AlignedVec::copy_from_slice(&data);
73+
74+
assert_eq!(vec.len(), 128);
75+
assert_eq!(&*vec, &data[..]);
76+
}

crates/plotnik-lib/src/bytecode/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//!
33
//! Implements the binary format specified in `docs/binary-format/`.
44
5+
mod aligned_vec;
56
mod constants;
67
mod dump;
78
mod effects;
@@ -16,6 +17,8 @@ mod nav;
1617
mod sections;
1718
mod type_meta;
1819

20+
pub use aligned_vec::AlignedVec;
21+
1922
pub use constants::{
2023
MAGIC, MAX_MATCH_PAYLOAD_SLOTS, MAX_PRE_EFFECTS, SECTION_ALIGN, STEP_SIZE, VERSION,
2124
};
@@ -56,6 +59,8 @@ pub use ir::{
5659
TrampolineIR,
5760
};
5861

62+
#[cfg(test)]
63+
mod aligned_vec_tests;
5964
#[cfg(test)]
6065
mod effects_tests;
6166
#[cfg(test)]

crates/plotnik-lib/src/bytecode/module.rs

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::io;
77
use std::ops::Deref;
88
use std::path::Path;
99

10+
use super::aligned_vec::AlignedVec;
1011
use super::header::{Header, SectionOffsets};
1112
use super::ids::{StringId, TypeId};
1213
use super::instructions::{Call, Match, Opcode, Return, Trampoline};
@@ -31,28 +32,69 @@ fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
3132
])
3233
}
3334

34-
/// Storage for bytecode bytes.
35-
#[derive(Debug)]
36-
pub struct ByteStorage(Vec<u8>);
35+
/// Storage for bytecode bytes with guaranteed 64-byte alignment.
36+
///
37+
/// All bytecode must be 64-byte aligned for DFA deserialization and cache
38+
/// efficiency. This enum ensures alignment through two paths:
39+
/// - `Static`: Pre-aligned via `include_query_aligned!` macro
40+
/// - `Aligned`: Allocated with 64-byte alignment via `AlignedVec`
41+
pub enum ByteStorage {
42+
/// Static bytes from `include_query_aligned!` (zero-copy, pre-aligned).
43+
Static(&'static [u8]),
44+
/// Owned bytes with guaranteed 64-byte alignment.
45+
Aligned(AlignedVec),
46+
}
3747

3848
impl Deref for ByteStorage {
3949
type Target = [u8];
4050

4151
fn deref(&self) -> &Self::Target {
42-
&self.0
52+
match self {
53+
ByteStorage::Static(s) => s,
54+
ByteStorage::Aligned(v) => v,
55+
}
56+
}
57+
}
58+
59+
impl std::fmt::Debug for ByteStorage {
60+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61+
match self {
62+
ByteStorage::Static(s) => f.debug_tuple("Static").field(&s.len()).finish(),
63+
ByteStorage::Aligned(v) => f.debug_tuple("Aligned").field(&v.len()).finish(),
64+
}
4365
}
4466
}
4567

4668
impl ByteStorage {
47-
/// Create from owned bytes.
48-
pub fn from_vec(bytes: Vec<u8>) -> Self {
49-
Self(bytes)
69+
/// Create from static bytes (zero-copy).
70+
///
71+
/// The bytes must be 64-byte aligned. Use `include_query_aligned!` macro.
72+
///
73+
/// # Panics
74+
/// Panics if bytes are not 64-byte aligned.
75+
pub fn from_static(bytes: &'static [u8]) -> Self {
76+
assert!(
77+
(bytes.as_ptr() as usize).is_multiple_of(64),
78+
"static bytes must be 64-byte aligned; use include_query_aligned! macro"
79+
);
80+
Self::Static(bytes)
81+
}
82+
83+
/// Create from an aligned vector (from compiler or file read).
84+
pub fn from_aligned(vec: AlignedVec) -> Self {
85+
Self::Aligned(vec)
86+
}
87+
88+
/// Create by copying bytes into aligned storage.
89+
///
90+
/// Use this when receiving bytes from unknown sources (e.g., network).
91+
pub fn copy_from_slice(bytes: &[u8]) -> Self {
92+
Self::Aligned(AlignedVec::copy_from_slice(bytes))
5093
}
5194

52-
/// Read a file into memory.
95+
/// Read a file into aligned storage.
5396
pub fn from_file(path: impl AsRef<Path>) -> io::Result<Self> {
54-
let bytes = std::fs::read(path)?;
55-
Ok(Self(bytes))
97+
Ok(Self::Aligned(AlignedVec::from_file(path)?))
5698
}
5799
}
58100

@@ -118,15 +160,46 @@ pub struct Module {
118160
}
119161

120162
impl Module {
121-
/// Load a module from owned bytes.
122-
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
123-
Self::from_storage(ByteStorage::from_vec(bytes))
163+
/// Load a module from an aligned vector (compiler output).
164+
///
165+
/// This is the primary constructor for bytecode produced by the compiler.
166+
pub fn from_aligned(vec: AlignedVec) -> Result<Self, ModuleError> {
167+
Self::from_storage(ByteStorage::from_aligned(vec))
168+
}
169+
170+
/// Load a module from static bytes (zero-copy).
171+
///
172+
/// Use with `include_query_aligned!` to embed aligned bytecode:
173+
/// ```ignore
174+
/// use plotnik_lib::include_query_aligned;
175+
///
176+
/// let module = Module::from_static(include_query_aligned!("query.ptk.bin"))?;
177+
/// ```
178+
///
179+
/// # Panics
180+
/// Panics if bytes are not 64-byte aligned.
181+
pub fn from_static(bytes: &'static [u8]) -> Result<Self, ModuleError> {
182+
Self::from_storage(ByteStorage::from_static(bytes))
124183
}
125184

126185
/// Load a module from a file path.
186+
///
187+
/// Reads the file into 64-byte aligned storage.
127188
pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ModuleError> {
128-
let storage = ByteStorage::from_file(&path)?;
129-
Self::from_storage(storage)
189+
Self::from_storage(ByteStorage::from_file(&path)?)
190+
}
191+
192+
/// Load a module from arbitrary bytes (copies into aligned storage).
193+
///
194+
/// Use this for bytes from unknown sources (network, etc.). Always copies.
195+
pub fn load(bytes: &[u8]) -> Result<Self, ModuleError> {
196+
Self::from_storage(ByteStorage::copy_from_slice(bytes))
197+
}
198+
199+
/// Load a module from owned bytes (copies into aligned storage).
200+
#[deprecated(since = "0.1.0", note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying")]
201+
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
202+
Self::load(&bytes)
130203
}
131204

132205
/// Load a module from storage.

0 commit comments

Comments
 (0)