Skip to content

Commit 4f6ddce

Browse files
committed
feat: add bytecode IDs, constants, and header
Add foundational bytecode types: - constants: magic bytes, version, section alignment - ids: StepId, StringId, QTypeId newtypes - header: 64-byte file header struct
1 parent fd9b4b4 commit 4f6ddce

9 files changed

Lines changed: 467 additions & 56 deletions

File tree

crates/plotnik-lib/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ default = ["plotnik-langs"]
3030
[dev-dependencies]
3131
insta = { version = "=1.45.1", features = ["yaml"] }
3232
indoc = "=2.0.7"
33-
serde_json = "=1.0.148"
33+
serde_json = "=1.0.148"
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//! Bytecode format constants.
2+
3+
// Re-export primitive type constants from the shared type system
4+
pub use crate::type_system::{TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING, TYPE_VOID};
5+
6+
/// Magic bytes identifying a Plotnik bytecode file.
7+
pub const MAGIC: [u8; 4] = *b"PTKQ";
8+
9+
/// Current bytecode format version.
10+
pub const VERSION: u32 = 1;
11+
12+
/// Terminal step - accept state.
13+
pub const STEP_ACCEPT: u16 = 0;
14+
15+
/// Section alignment in bytes.
16+
pub const SECTION_ALIGN: usize = 64;
17+
18+
/// Step size in bytes (all instructions are 8-byte aligned).
19+
pub const STEP_SIZE: usize = 8;
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
//! Bytecode file header (64 bytes).
2+
3+
use super::{MAGIC, VERSION};
4+
5+
/// File header - first 64 bytes of the bytecode file.
6+
///
7+
/// Note: TypeMeta sub-section counts are stored in the TypeMetaHeader,
8+
/// not in the main header. See type_meta.rs for details.
9+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
10+
#[repr(C, align(64))]
11+
pub struct Header {
12+
/// Magic bytes: b"PTKQ"
13+
pub magic: [u8; 4],
14+
/// Format version (currently 1)
15+
pub version: u32,
16+
/// CRC32 checksum of everything after the header
17+
pub checksum: u32,
18+
/// Total file size in bytes
19+
pub total_size: u32,
20+
21+
// Section offsets (absolute byte offsets)
22+
pub str_blob_offset: u32,
23+
pub str_table_offset: u32,
24+
pub node_types_offset: u32,
25+
pub node_fields_offset: u32,
26+
pub trivia_offset: u32,
27+
pub type_meta_offset: u32,
28+
pub entrypoints_offset: u32,
29+
pub transitions_offset: u32,
30+
31+
// Element counts (type counts are in TypeMetaHeader at type_meta_offset)
32+
pub str_table_count: u16,
33+
pub node_types_count: u16,
34+
pub node_fields_count: u16,
35+
pub trivia_count: u16,
36+
pub entrypoints_count: u16,
37+
pub transitions_count: u16,
38+
/// Padding to maintain 64-byte size.
39+
pub(crate) _pad: u32,
40+
}
41+
42+
const _: () = assert!(std::mem::size_of::<Header>() == 64);
43+
44+
impl Default for Header {
45+
fn default() -> Self {
46+
Self {
47+
magic: MAGIC,
48+
version: VERSION,
49+
checksum: 0,
50+
total_size: 0,
51+
str_blob_offset: 0,
52+
str_table_offset: 0,
53+
node_types_offset: 0,
54+
node_fields_offset: 0,
55+
trivia_offset: 0,
56+
type_meta_offset: 0,
57+
entrypoints_offset: 0,
58+
transitions_offset: 0,
59+
str_table_count: 0,
60+
node_types_count: 0,
61+
node_fields_count: 0,
62+
trivia_count: 0,
63+
entrypoints_count: 0,
64+
transitions_count: 0,
65+
_pad: 0,
66+
}
67+
}
68+
}
69+
70+
impl Header {
71+
/// Decode header from 64 bytes.
72+
pub fn from_bytes(bytes: &[u8]) -> Self {
73+
assert!(bytes.len() >= 64, "header too short");
74+
75+
Self {
76+
magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
77+
version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
78+
checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
79+
total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
80+
str_blob_offset: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
81+
str_table_offset: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
82+
node_types_offset: u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]),
83+
node_fields_offset: u32::from_le_bytes([bytes[28], bytes[29], bytes[30], bytes[31]]),
84+
trivia_offset: u32::from_le_bytes([bytes[32], bytes[33], bytes[34], bytes[35]]),
85+
type_meta_offset: u32::from_le_bytes([bytes[36], bytes[37], bytes[38], bytes[39]]),
86+
entrypoints_offset: u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]),
87+
transitions_offset: u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]),
88+
str_table_count: u16::from_le_bytes([bytes[48], bytes[49]]),
89+
node_types_count: u16::from_le_bytes([bytes[50], bytes[51]]),
90+
node_fields_count: u16::from_le_bytes([bytes[52], bytes[53]]),
91+
trivia_count: u16::from_le_bytes([bytes[54], bytes[55]]),
92+
entrypoints_count: u16::from_le_bytes([bytes[56], bytes[57]]),
93+
transitions_count: u16::from_le_bytes([bytes[58], bytes[59]]),
94+
_pad: u32::from_le_bytes([bytes[60], bytes[61], bytes[62], bytes[63]]),
95+
}
96+
}
97+
98+
/// Encode header to 64 bytes.
99+
pub fn to_bytes(&self) -> [u8; 64] {
100+
let mut bytes = [0u8; 64];
101+
bytes[0..4].copy_from_slice(&self.magic);
102+
bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
103+
bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
104+
bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
105+
bytes[16..20].copy_from_slice(&self.str_blob_offset.to_le_bytes());
106+
bytes[20..24].copy_from_slice(&self.str_table_offset.to_le_bytes());
107+
bytes[24..28].copy_from_slice(&self.node_types_offset.to_le_bytes());
108+
bytes[28..32].copy_from_slice(&self.node_fields_offset.to_le_bytes());
109+
bytes[32..36].copy_from_slice(&self.trivia_offset.to_le_bytes());
110+
bytes[36..40].copy_from_slice(&self.type_meta_offset.to_le_bytes());
111+
bytes[40..44].copy_from_slice(&self.entrypoints_offset.to_le_bytes());
112+
bytes[44..48].copy_from_slice(&self.transitions_offset.to_le_bytes());
113+
bytes[48..50].copy_from_slice(&self.str_table_count.to_le_bytes());
114+
bytes[50..52].copy_from_slice(&self.node_types_count.to_le_bytes());
115+
bytes[52..54].copy_from_slice(&self.node_fields_count.to_le_bytes());
116+
bytes[54..56].copy_from_slice(&self.trivia_count.to_le_bytes());
117+
bytes[56..58].copy_from_slice(&self.entrypoints_count.to_le_bytes());
118+
bytes[58..60].copy_from_slice(&self.transitions_count.to_le_bytes());
119+
bytes[60..64].copy_from_slice(&self._pad.to_le_bytes());
120+
bytes
121+
}
122+
123+
pub fn validate_magic(&self) -> bool {
124+
self.magic == MAGIC
125+
}
126+
127+
pub fn validate_version(&self) -> bool {
128+
self.version == VERSION
129+
}
130+
}
131+
132+
#[cfg(test)]
133+
mod tests {
134+
use super::*;
135+
136+
#[test]
137+
fn header_size() {
138+
assert_eq!(std::mem::size_of::<Header>(), 64);
139+
}
140+
141+
#[test]
142+
fn header_default() {
143+
let h = Header::default();
144+
assert!(h.validate_magic());
145+
assert!(h.validate_version());
146+
assert_eq!(h.total_size, 0);
147+
}
148+
149+
#[test]
150+
fn header_roundtrip() {
151+
let h = Header {
152+
magic: MAGIC,
153+
version: VERSION,
154+
checksum: 0x12345678,
155+
total_size: 1024,
156+
str_blob_offset: 64,
157+
str_table_offset: 128,
158+
node_types_offset: 192,
159+
node_fields_offset: 256,
160+
trivia_offset: 320,
161+
type_meta_offset: 384,
162+
entrypoints_offset: 448,
163+
transitions_offset: 512,
164+
str_table_count: 10,
165+
node_types_count: 20,
166+
node_fields_count: 5,
167+
trivia_count: 2,
168+
entrypoints_count: 1,
169+
transitions_count: 15,
170+
..Default::default()
171+
};
172+
173+
let bytes = h.to_bytes();
174+
assert_eq!(bytes.len(), 64);
175+
176+
let decoded = Header::from_bytes(&bytes);
177+
assert_eq!(decoded, h);
178+
}
179+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//! Bytecode index newtypes.
2+
3+
use super::constants::{STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_STRING};
4+
5+
/// Index into the Transitions section (8-byte steps).
6+
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
7+
#[repr(transparent)]
8+
pub struct StepId(pub u16);
9+
10+
impl StepId {
11+
pub const ACCEPT: Self = Self(STEP_ACCEPT);
12+
13+
#[inline]
14+
pub fn is_accept(self) -> bool {
15+
self.0 == STEP_ACCEPT
16+
}
17+
18+
#[inline]
19+
pub fn byte_offset(self) -> usize {
20+
self.0 as usize * STEP_SIZE
21+
}
22+
}
23+
24+
/// Index into the String Table.
25+
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
26+
#[repr(transparent)]
27+
pub struct StringId(pub u16);
28+
29+
/// Index into the Type Definition table.
30+
/// Values 0-2 are builtins; 3+ index into TypeDefs.
31+
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
32+
#[repr(transparent)]
33+
pub struct QTypeId(pub u16);
34+
35+
impl QTypeId {
36+
pub const VOID: Self = Self(super::constants::TYPE_VOID);
37+
pub const NODE: Self = Self(super::constants::TYPE_NODE);
38+
pub const STRING: Self = Self(TYPE_STRING);
39+
40+
#[inline]
41+
pub fn is_builtin(self) -> bool {
42+
self.0 <= TYPE_STRING
43+
}
44+
45+
/// Index into TypeDefs array (only valid for non-builtins).
46+
#[inline]
47+
pub fn custom_index(self) -> Option<usize> {
48+
if self.0 >= TYPE_CUSTOM_START {
49+
Some((self.0 - TYPE_CUSTOM_START) as usize)
50+
} else {
51+
None
52+
}
53+
}
54+
55+
#[inline]
56+
pub fn from_custom_index(idx: usize) -> Self {
57+
Self(TYPE_CUSTOM_START + idx as u16)
58+
}
59+
}
60+
61+
#[cfg(test)]
62+
mod tests {
63+
use super::*;
64+
65+
#[test]
66+
fn step_id_byte_offset() {
67+
assert_eq!(StepId(0).byte_offset(), 0);
68+
assert_eq!(StepId(1).byte_offset(), 8);
69+
assert_eq!(StepId(10).byte_offset(), 80);
70+
}
71+
72+
#[test]
73+
fn bc_type_id_builtins() {
74+
assert!(QTypeId::VOID.is_builtin());
75+
assert!(QTypeId::NODE.is_builtin());
76+
assert!(QTypeId::STRING.is_builtin());
77+
assert!(!QTypeId(3).is_builtin());
78+
79+
assert_eq!(QTypeId::VOID.custom_index(), None);
80+
assert_eq!(QTypeId(3).custom_index(), Some(0));
81+
assert_eq!(QTypeId(5).custom_index(), Some(2));
82+
assert_eq!(QTypeId::from_custom_index(0), QTypeId(3));
83+
}
84+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//! Bytecode module for compiled Plotnik queries.
2+
//!
3+
//! Implements the binary format specified in `docs/binary-format/`.
4+
5+
mod constants;
6+
mod header;
7+
mod ids;
8+
9+
pub use constants::{
10+
MAGIC, SECTION_ALIGN, STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING,
11+
TYPE_VOID, VERSION,
12+
};
13+
14+
pub use ids::{QTypeId, StepId, StringId};
15+
16+
pub use header::Header;

crates/plotnik-lib/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
1717
#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
1818

19+
pub mod bytecode;
1920
pub mod diagnostics;
2021
pub mod parser;
2122
pub mod query;

docs/binary-format/01-overview.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,16 @@ Section offsets defined in Header for robust parsing.
3131
| [NodeTypes] | NodeTypeId → StringId | 4 |
3232
| [NodeFields] | NodeFieldId → StringId | 4 |
3333
| [Trivia] | List of NodeTypeId | 2 |
34-
| [TypeMeta] | Types | Var |
34+
| [TypeMeta] | Types (3 sub-sections) | 4 |
3535
| [Entrypoints] | Definitions | 8 |
3636
| [Transitions] | Tree walking graph | 8 |
3737

38+
**TypeMeta sub-sections** (contiguous, offsets computed from counts):
39+
40+
- **TypeDefs**: Structural topology
41+
- **TypeMembers**: Fields and variants
42+
- **TypeNames**: Name → TypeId mapping
43+
3844
[StringBlob]: 02-strings.md
3945
[StringTable]: 02-strings.md
4046
[NodeTypes]: 03-symbols.md
@@ -62,7 +68,7 @@ struct Header {
6268
node_types_offset: u32,
6369
node_fields_offset: u32,
6470
trivia_offset: u32,
65-
type_meta_offset: u32,
71+
type_meta_offset: u32, // Points to TypeMeta header (see 04-types.md)
6672
entrypoints_offset: u32,
6773
transitions_offset: u32,
6874

@@ -71,9 +77,12 @@ struct Header {
7177
node_types_count: u16,
7278
node_fields_count: u16,
7379
trivia_count: u16,
74-
type_defs_count: u16,
75-
type_members_count: u16, // Number of TypeMembers
7680
entrypoints_count: u16,
7781
transitions_count: u16,
82+
_pad: u32,
7883
}
84+
// Size: 16 + 32 + 16 = 64 bytes
85+
//
86+
// Note: TypeMeta sub-section counts are stored in the TypeMeta header,
87+
// not in the main header. See 04-types.md for details.
7988
```

docs/binary-format/02-strings.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ Strings are stored in a centralized pool to eliminate redundancy and alignment p
66

77
**StringId (u16)**: Zero-based index into the String Table.
88

9-
- `0xFFFF` is reserved as a sentinel for "None" or "Anonymous".
10-
119
## 1. String Blob
1210

1311
Contains the raw UTF-8 bytes for all strings concatenated together.

0 commit comments

Comments
 (0)