Skip to content

Commit e242e52

Browse files
committed
refactor: Add explicit Nav::Epsilon and improve dump/trace format
- Add Nav::Epsilon variant for epsilon transitions - Add NodeTypeIR enum to distinguish Any/Named/Anonymous node constraints - Epsilon transitions no longer show `_` pattern (no node interaction) - Add trace_nav_failure to show failed navigation attempts - Update bytecode header to 2+2+4 bit layout
1 parent 48c0a80 commit e242e52

58 files changed

Lines changed: 593 additions & 336 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

crates/plotnik-lib/src/bytecode/constants.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@ pub const SECTION_ALIGN: usize = 64;
1212
/// Step size in bytes (all instructions are 8-byte aligned).
1313
pub const STEP_SIZE: usize = 8;
1414

15-
/// Sentinel value for "any named node" wildcard `(_)`.
16-
///
17-
/// When `node_type` equals this value, the VM checks `node.is_named()`
18-
/// instead of comparing type IDs. This distinguishes `(_)` (any named)
19-
/// from `_` (any node including anonymous).
20-
pub const NAMED_WILDCARD: u16 = 0xFFFF;
21-
2215
/// Maximum payload slots for Match instructions.
2316
///
2417
/// Match64 (the largest variant) supports up to 28 u16 slots for

crates/plotnik-lib/src/bytecode/dump.rs

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ use std::fmt::Write as _;
77

88
use crate::colors::Colors;
99

10-
use super::NAMED_WILDCARD;
11-
use super::format::{LineBuilder, Symbol, format_effect, nav_symbol_epsilon, width_for_count};
10+
use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
1211
use super::ids::TypeId;
1312
use super::instructions::StepId;
13+
use super::ir::NodeTypeIR;
1414
use super::module::{Instruction, Module};
1515
use super::type_meta::{TypeData, TypeKind};
1616
use super::{Call, Match, Return, Trampoline};
@@ -446,7 +446,7 @@ fn format_match(
446446
step_width: usize,
447447
) -> String {
448448
let builder = LineBuilder::new(step_width);
449-
let symbol = nav_symbol_epsilon(m.nav, m.is_epsilon());
449+
let symbol = nav_symbol(m.nav);
450450
let prefix = format!(" {:0sw$} {} ", step, symbol.format(), sw = step_width);
451451

452452
let content = format_match_content(m, ctx);
@@ -464,17 +464,20 @@ fn format_match_content(m: &Match, ctx: &DumpContext) -> String {
464464
parts.push(format!("[{}]", pre.join(" ")));
465465
}
466466

467-
for field_id in m.neg_fields() {
468-
let name = ctx
469-
.node_field_name(field_id)
470-
.map(String::from)
471-
.unwrap_or_else(|| format!("field#{field_id}"));
472-
parts.push(format!("-{name}"));
473-
}
467+
// Skip neg_fields and node pattern for epsilon (no node interaction)
468+
if !m.is_epsilon() {
469+
for field_id in m.neg_fields() {
470+
let name = ctx
471+
.node_field_name(field_id)
472+
.map(String::from)
473+
.unwrap_or_else(|| format!("field#{field_id}"));
474+
parts.push(format!("-{name}"));
475+
}
474476

475-
let node_part = format_node_pattern(m, ctx);
476-
if !node_part.is_empty() {
477-
parts.push(node_part);
477+
let node_part = format_node_pattern(m, ctx);
478+
if !node_part.is_empty() {
479+
parts.push(node_part);
480+
}
478481
}
479482

480483
let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
@@ -485,7 +488,7 @@ fn format_match_content(m: &Match, ctx: &DumpContext) -> String {
485488
parts.join(" ")
486489
}
487490

488-
/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)`
491+
/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
489492
fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
490493
let mut result = String::new();
491494

@@ -498,11 +501,17 @@ fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
498501
result.push_str(": ");
499502
}
500503

501-
if let Some(type_id) = m.node_type {
502-
if type_id.get() == NAMED_WILDCARD {
504+
match m.node_type {
505+
NodeTypeIR::Any => {
506+
// Any node wildcard: `_`
507+
result.push('_');
508+
}
509+
NodeTypeIR::Named(None) => {
503510
// Named wildcard: any named node
504511
result.push_str("(_)");
505-
} else {
512+
}
513+
NodeTypeIR::Named(Some(type_id)) => {
514+
// Specific named node type
506515
let name = ctx
507516
.node_type_name(type_id.get())
508517
.map(String::from)
@@ -511,8 +520,20 @@ fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
511520
result.push_str(&name);
512521
result.push(')');
513522
}
514-
} else if m.node_field.is_some() {
515-
result.push('_');
523+
NodeTypeIR::Anonymous(None) => {
524+
// Anonymous wildcard: any anonymous node (future syntax)
525+
result.push_str("\"_\"");
526+
}
527+
NodeTypeIR::Anonymous(Some(type_id)) => {
528+
// Specific anonymous node (literal token)
529+
let name = ctx
530+
.node_type_name(type_id.get())
531+
.map(String::from)
532+
.unwrap_or_else(|| format!("anon#{}", type_id.get()));
533+
result.push('"');
534+
result.push_str(&name);
535+
result.push('"');
536+
}
516537
}
517538

518539
result
@@ -538,7 +559,7 @@ fn format_call(
538559
) -> String {
539560
let c = &ctx.colors;
540561
let builder = LineBuilder::new(step_width);
541-
let symbol = nav_symbol_epsilon(call.nav, false);
562+
let symbol = nav_symbol(call.nav());
542563
let prefix = format!(" {:0sw$} {} ", step, symbol.format(), sw = step_width);
543564

544565
// Format field constraint if present

crates/plotnik-lib/src/bytecode/format.rs

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ impl Symbol {
7272
///
7373
/// | Nav | Symbol | Notes |
7474
/// | --------------- | ------- | ----------------------------------- |
75+
/// | Epsilon | ε | Pure control flow, no cursor check |
7576
/// | Stay | (blank) | No movement, 5 spaces |
76-
/// | Stay (epsilon) | ε | Only when no type/field constraints |
7777
/// | StayExact | ! | Stay at position, exact match only |
7878
/// | Down | ▽ | First child, skip any |
7979
/// | DownSkip | !▽ | First child, skip trivia |
@@ -86,6 +86,7 @@ impl Symbol {
8686
/// | UpExact(n) | !!△ⁿ | Ascend n, must be last child |
8787
pub fn nav_symbol(nav: Nav) -> Symbol {
8888
match nav {
89+
Nav::Epsilon => Symbol::EPSILON,
8990
Nav::Stay => Symbol::EMPTY,
9091
Nav::StayExact => Symbol::new(" ", "!", " "),
9192
Nav::Down => Symbol::new(" ", "▽", " "),
@@ -100,20 +101,6 @@ pub fn nav_symbol(nav: Nav) -> Symbol {
100101
}
101102
}
102103

103-
/// Format navigation for epsilon transitions (when is_epsilon is true).
104-
///
105-
/// True epsilon transitions require all three conditions:
106-
/// - `nav == Stay` (no cursor movement)
107-
/// - `node_type == None` (no type constraint)
108-
/// - `node_field == None` (no field constraint)
109-
pub fn nav_symbol_epsilon(nav: Nav, is_epsilon: bool) -> Symbol {
110-
if is_epsilon {
111-
Symbol::EPSILON
112-
} else {
113-
nav_symbol(nav)
114-
}
115-
}
116-
117104
/// Trace sub-line symbols.
118105
pub mod trace {
119106
use super::Symbol;

crates/plotnik-lib/src/bytecode/instructions.rs

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::num::NonZeroU16;
77

88
use super::constants::{SECTION_ALIGN, STEP_SIZE};
99
use super::effects::EffectOp;
10+
use super::ir::NodeTypeIR;
1011
use super::nav::Nav;
1112

1213
/// Step address in bytecode (raw u16).
@@ -129,12 +130,12 @@ impl Opcode {
129130
#[derive(Clone, Copy, Debug)]
130131
pub struct Match<'a> {
131132
bytes: &'a [u8],
132-
/// Segment index (0-15, currently only 0 is used).
133+
/// Segment index (0-3, currently only 0 is used).
133134
pub segment: u8,
134-
/// Navigation command.
135+
/// Navigation command. `Epsilon` means no cursor movement or node check.
135136
pub nav: Nav,
136-
/// Node type constraint (None = wildcard).
137-
pub node_type: Option<NonZeroU16>,
137+
/// Node type constraint (Any = wildcard, Named/Anonymous for specific checks).
138+
pub node_type: NodeTypeIR,
138139
/// Field constraint (None = wildcard).
139140
pub node_field: Option<NonZeroU16>,
140141
/// Whether this is Match8 (no payload) or extended.
@@ -153,18 +154,23 @@ impl<'a> Match<'a> {
153154
///
154155
/// The slice must start at the instruction and contain at least
155156
/// the full instruction size (determined by opcode).
157+
///
158+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
156159
#[inline]
157160
pub fn from_bytes(bytes: &'a [u8]) -> Self {
158161
debug_assert!(bytes.len() >= 8, "Match instruction too short");
159162

160163
let type_id_byte = bytes[0];
161-
let segment = type_id_byte >> 4;
162-
debug_assert!(segment == 0, "non-zero segment not yet supported");
164+
// Header byte: segment(2) | node_kind(2) | opcode(4)
165+
let segment = (type_id_byte >> 6) & 0x3;
166+
let node_kind = (type_id_byte >> 4) & 0x3;
163167
let opcode = Opcode::from_u8(type_id_byte & 0xF);
168+
debug_assert!(segment == 0, "non-zero segment not yet supported");
164169
debug_assert!(opcode.is_match(), "expected Match opcode");
165170

166171
let nav = Nav::from_byte(bytes[1]);
167-
let node_type = NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]]));
172+
let node_type_val = u16::from_le_bytes([bytes[2], bytes[3]]);
173+
let node_type = NodeTypeIR::from_bytes(node_kind, node_type_val);
168174
let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]]));
169175

170176
let (is_match8, match8_next, pre_count, neg_count, post_count, succ_count) =
@@ -207,7 +213,7 @@ impl<'a> Match<'a> {
207213
/// Check if this is an epsilon transition (no node interaction).
208214
#[inline]
209215
pub fn is_epsilon(&self) -> bool {
210-
self.nav == Nav::Stay && self.node_type.is_none() && self.node_field.is_none()
216+
self.nav == Nav::Epsilon
211217
}
212218

213219
/// Number of successors.
@@ -282,7 +288,7 @@ impl<'a> Match<'a> {
282288
/// Call instruction for invoking definitions (recursion).
283289
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
284290
pub struct Call {
285-
/// Segment index (0-15).
291+
/// Segment index (0-3).
286292
pub(crate) segment: u8,
287293
/// Navigation to apply before jumping to target.
288294
pub(crate) nav: Nav,
@@ -307,14 +313,17 @@ impl Call {
307313
}
308314

309315
/// Decode from 8-byte bytecode.
316+
///
317+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
318+
/// For Call, node_kind bits are ignored (always 0).
310319
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
311320
let type_id_byte = bytes[0];
312-
let segment = type_id_byte >> 4;
321+
let segment = (type_id_byte >> 6) & 0x3;
322+
let opcode = Opcode::from_u8(type_id_byte & 0xF);
313323
assert!(
314324
segment == 0,
315325
"non-zero segment not yet supported: {segment}"
316326
);
317-
let opcode = Opcode::from_u8(type_id_byte & 0xF);
318327
assert_eq!(opcode, Opcode::Call, "expected Call opcode");
319328

320329
Self {
@@ -327,9 +336,12 @@ impl Call {
327336
}
328337

329338
/// Encode to 8-byte bytecode.
339+
///
340+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
330341
pub fn to_bytes(&self) -> [u8; 8] {
331342
let mut bytes = [0u8; 8];
332-
bytes[0] = (self.segment << 4) | (Opcode::Call as u8);
343+
// node_kind = 0 for Call
344+
bytes[0] = (self.segment << 6) | (Opcode::Call as u8);
333345
bytes[1] = self.nav.to_byte();
334346
bytes[2..4].copy_from_slice(&self.node_field.map_or(0, |v| v.get()).to_le_bytes());
335347
bytes[4..6].copy_from_slice(&self.next.get().to_le_bytes());
@@ -354,7 +366,7 @@ impl Call {
354366
/// Return instruction for returning from definitions.
355367
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
356368
pub struct Return {
357-
/// Segment index (0-15).
369+
/// Segment index (0-3).
358370
pub(crate) segment: u8,
359371
}
360372

@@ -365,23 +377,29 @@ impl Return {
365377
}
366378

367379
/// Decode from 8-byte bytecode.
380+
///
381+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
382+
/// For Return, node_kind bits are ignored (always 0).
368383
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
369384
let type_id_byte = bytes[0];
370-
let segment = type_id_byte >> 4;
385+
let segment = (type_id_byte >> 6) & 0x3;
386+
let opcode = Opcode::from_u8(type_id_byte & 0xF);
371387
assert!(
372388
segment == 0,
373389
"non-zero segment not yet supported: {segment}"
374390
);
375-
let opcode = Opcode::from_u8(type_id_byte & 0xF);
376391
assert_eq!(opcode, Opcode::Return, "expected Return opcode");
377392

378393
Self { segment }
379394
}
380395

381396
/// Encode to 8-byte bytecode.
397+
///
398+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
382399
pub fn to_bytes(&self) -> [u8; 8] {
383400
let mut bytes = [0u8; 8];
384-
bytes[0] = (self.segment << 4) | (Opcode::Return as u8);
401+
// node_kind = 0 for Return
402+
bytes[0] = (self.segment << 6) | (Opcode::Return as u8);
385403
// bytes[1..8] are reserved/padding
386404
bytes
387405
}
@@ -400,7 +418,7 @@ impl Default for Return {
400418
/// the entry preamble: `Obj → Trampoline → EndObj → Accept`.
401419
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
402420
pub struct Trampoline {
403-
/// Segment index (0-15).
421+
/// Segment index (0-3).
404422
pub(crate) segment: u8,
405423
/// Return address (where to continue after entrypoint returns).
406424
pub(crate) next: StepId,
@@ -413,14 +431,17 @@ impl Trampoline {
413431
}
414432

415433
/// Decode from 8-byte bytecode.
434+
///
435+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
436+
/// For Trampoline, node_kind bits are ignored (always 0).
416437
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
417438
let type_id_byte = bytes[0];
418-
let segment = type_id_byte >> 4;
439+
let segment = (type_id_byte >> 6) & 0x3;
440+
let opcode = Opcode::from_u8(type_id_byte & 0xF);
419441
assert!(
420442
segment == 0,
421443
"non-zero segment not yet supported: {segment}"
422444
);
423-
let opcode = Opcode::from_u8(type_id_byte & 0xF);
424445
assert_eq!(opcode, Opcode::Trampoline, "expected Trampoline opcode");
425446

426447
Self {
@@ -430,9 +451,12 @@ impl Trampoline {
430451
}
431452

432453
/// Encode to 8-byte bytecode.
454+
///
455+
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
433456
pub fn to_bytes(&self) -> [u8; 8] {
434457
let mut bytes = [0u8; 8];
435-
bytes[0] = (self.segment << 4) | (Opcode::Trampoline as u8);
458+
// node_kind = 0 for Trampoline
459+
bytes[0] = (self.segment << 6) | (Opcode::Trampoline as u8);
436460
// bytes[1] is padding
437461
bytes[2..4].copy_from_slice(&self.next.get().to_le_bytes());
438462
// bytes[4..8] are reserved/padding

0 commit comments

Comments
 (0)