Skip to content

Commit 5d581db

Browse files
authored
fix: Cascade overflow pre-effects to prevent bytecode encoding panic (#306)
1 parent 3283d57 commit 5d581db

10 files changed

Lines changed: 273 additions & 23 deletions

crates/plotnik-lib/src/bytecode/constants.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,9 @@ pub const STEP_SIZE: usize = 8;
1818
/// effects, neg_fields, and successors combined. When an epsilon
1919
/// transition needs more successors, it must be split into a cascade.
2020
pub const MAX_MATCH_PAYLOAD_SLOTS: usize = 28;
21+
22+
/// Maximum pre-effects per Match instruction.
23+
///
24+
/// Pre-effect count is stored in 3 bits (max 7). When exceeded,
25+
/// overflow effects must be emitted in leading epsilon transitions.
26+
pub const MAX_PRE_EFFECTS: usize = 7;

crates/plotnik-lib/src/bytecode/ir.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -526,12 +526,24 @@ impl MatchIR {
526526
.unwrap_or(0);
527527
bytes[6..8].copy_from_slice(&next.to_le_bytes());
528528
} else {
529-
let pre_count = self.pre_effects.len() as u16;
530-
let neg_count = self.neg_fields.len() as u16;
531-
let post_count = self.post_effects.len() as u16;
532-
let succ_count = self.successors.len() as u16;
533-
let counts =
534-
(pre_count << 13) | (neg_count << 10) | (post_count << 7) | (succ_count << 1);
529+
let pre_count = self.pre_effects.len();
530+
let neg_count = self.neg_fields.len();
531+
let post_count = self.post_effects.len();
532+
let succ_count = self.successors.len();
533+
534+
// Validate bit-packed field limits (3 bits for counts, 6 bits for successors)
535+
assert!(
536+
pre_count <= 7,
537+
"pre_effects overflow: {pre_count} > 7 (use emit_match_with_cascade)"
538+
);
539+
assert!(neg_count <= 7, "neg_fields overflow: {neg_count} > 7");
540+
assert!(post_count <= 7, "post_effects overflow: {post_count} > 7");
541+
assert!(succ_count <= 63, "successors overflow: {succ_count} > 63");
542+
543+
let counts = ((pre_count as u16) << 13)
544+
| ((neg_count as u16) << 10)
545+
| ((post_count as u16) << 7)
546+
| ((succ_count as u16) << 1);
535547
bytes[6..8].copy_from_slice(&counts.to_le_bytes());
536548

537549
let mut offset = 8;

crates/plotnik-lib/src/bytecode/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ mod nav;
1616
mod sections;
1717
mod type_meta;
1818

19-
pub use constants::{MAGIC, MAX_MATCH_PAYLOAD_SLOTS, SECTION_ALIGN, STEP_SIZE, VERSION};
19+
pub use constants::{
20+
MAGIC, MAX_MATCH_PAYLOAD_SLOTS, MAX_PRE_EFFECTS, SECTION_ALIGN, STEP_SIZE, VERSION,
21+
};
2022

2123
pub use ids::{StringId, TypeId};
2224

crates/plotnik-lib/src/bytecode/nav.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,18 @@ impl Nav {
7676
Self::DownSkip => 7,
7777
Self::DownExact => 8,
7878
Self::Up(n) => {
79-
debug_assert!((1..=63).contains(&n));
79+
assert!((1..=63).contains(&n), "Up level overflow: {n} > 63");
8080
0b01_000000 | n
8181
}
8282
Self::UpSkipTrivia(n) => {
83-
debug_assert!((1..=63).contains(&n));
83+
assert!(
84+
(1..=63).contains(&n),
85+
"UpSkipTrivia level overflow: {n} > 63"
86+
);
8487
0b10_000000 | n
8588
}
8689
Self::UpExact(n) => {
87-
debug_assert!((1..=63).contains(&n));
90+
assert!((1..=63).contains(&n), "UpExact level overflow: {n} > 63");
8891
0b11_000000 | n
8992
}
9093
}

crates/plotnik-lib/src/compile/compile_tests.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,68 @@ fn compile_large_tagged_alternation() {
161161

162162
assert!(!result.instructions.is_empty());
163163
}
164+
165+
#[test]
166+
fn compile_unlabeled_alternation_5_branches_with_captures() {
167+
// Regression test: unlabeled alternation with 5+ branches where each has
168+
// a unique capture requires 8+ pre-effects (4 nulls + 4 sets per branch).
169+
// This exceeds the 3-bit limit (max 7) and must cascade via epsilon chain.
170+
let query = QueryBuilder::one_liner(
171+
"Q = [(identifier) @a (number) @b (string) @c (binary_expression) @d (call_expression) @e]",
172+
)
173+
.parse()
174+
.unwrap()
175+
.analyze();
176+
177+
let mut strings = StringTableBuilder::new();
178+
let result = Compiler::compile(
179+
query.interner(),
180+
query.type_context(),
181+
&query.symbol_table,
182+
&mut strings,
183+
None,
184+
None,
185+
)
186+
.unwrap();
187+
188+
assert!(!result.instructions.is_empty());
189+
190+
// Verify that effects cascade created extra epsilon instructions.
191+
// With 5 branches, each branch needs 8 pre-effects (4 missing captures × 2 effects).
192+
// This requires at least one cascade step per branch.
193+
let epsilon_count = result
194+
.instructions
195+
.iter()
196+
.filter(|i| matches!(i, crate::bytecode::InstructionIR::Match(m) if m.is_epsilon()))
197+
.count();
198+
199+
// Should have more epsilon transitions than without cascade
200+
// (5 branches + cascade steps for overflow effects)
201+
assert!(epsilon_count >= 5, "expected cascade epsilon steps");
202+
}
203+
204+
#[test]
205+
fn compile_unlabeled_alternation_8_branches_with_captures() {
206+
// Even more extreme: 8 branches means 14 pre-effects per branch (7 nulls + 7 sets).
207+
// This requires 2 cascade steps per branch.
208+
let query = QueryBuilder::one_liner(
209+
"Q = [(identifier) @a (number) @b (string) @c (binary_expression) @d \
210+
(call_expression) @e (member_expression) @f (array) @g (object) @h]",
211+
)
212+
.parse()
213+
.unwrap()
214+
.analyze();
215+
216+
let mut strings = StringTableBuilder::new();
217+
let result = Compiler::compile(
218+
query.interner(),
219+
query.type_context(),
220+
&query.symbol_table,
221+
&mut strings,
222+
None,
223+
None,
224+
)
225+
.unwrap();
226+
227+
assert!(!result.instructions.is_empty());
228+
}

crates/plotnik-lib/src/compile/expressions.rs

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,14 @@ impl Compiler<'_> {
4040

4141
// If no items, just match and exit (capture effects go here)
4242
if items.is_empty() {
43-
self.instructions.push(
43+
return self.emit_match_with_cascade(
4444
MatchIR::epsilon(entry, exit)
4545
.nav(nav)
4646
.node_type(node_type)
4747
.neg_fields(neg_fields)
4848
.pre_effects(capture.pre)
49-
.post_effects(capture.post)
50-
.into(),
49+
.post_effects(capture.post),
5150
);
52-
return entry;
5351
}
5452

5553
// Determine Up navigation based on trailing anchor
@@ -102,13 +100,12 @@ impl Compiler<'_> {
102100
.push(MatchIR::epsilon(up_label, final_exit).nav(up_nav).into());
103101

104102
// Emit entry instruction into the node (only pre_effects here)
105-
self.instructions.push(
103+
self.emit_match_with_cascade(
106104
MatchIR::epsilon(entry, items_entry)
107105
.nav(nav)
108106
.node_type(node_type)
109107
.neg_fields(neg_fields)
110-
.pre_effects(capture.pre)
111-
.into(),
108+
.pre_effects(capture.pre),
112109
);
113110

114111
entry
@@ -178,13 +175,12 @@ impl Compiler<'_> {
178175
self.emit_wildcard_nav(down_wildcard, Nav::Down, try_label);
179176

180177
// entry: match parent node → down_wildcard (only pre_effects here)
181-
self.instructions.push(
178+
self.emit_match_with_cascade(
182179
MatchIR::epsilon(entry, down_wildcard)
183180
.nav(nav)
184181
.node_type(node_type)
185182
.neg_fields(neg_fields)
186-
.pre_effects(capture.pre)
187-
.into(),
183+
.pre_effects(capture.pre),
188184
);
189185

190186
entry
@@ -220,13 +216,12 @@ impl Compiler<'_> {
220216
None => NodeTypeIR::Any, // `_` wildcard matches any node
221217
};
222218

223-
self.instructions.push(
219+
self.emit_match_with_cascade(
224220
MatchIR::epsilon(entry, exit)
225221
.nav(nav)
226222
.node_type(node_type)
227223
.pre_effects(capture.pre)
228-
.post_effects(capture.post)
229-
.into(),
224+
.post_effects(capture.post),
230225
);
231226

232227
entry

crates/plotnik-lib/src/compile/scope.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,69 @@ impl Compiler<'_> {
420420
.push(MatchIR::at(label).next_many(successors).into());
421421
}
422422

423+
/// Emit a Match instruction, cascading pre-effects if they exceed the bytecode limit.
424+
///
425+
/// When `pre_effects.len() > MAX_PRE_EFFECTS` (7), splits overflow into leading
426+
/// epsilon transitions to avoid bytecode encoding overflow. The original label
427+
/// is preserved as the entry point of the cascade.
428+
///
429+
/// Returns the entry label (same as `instr.label`).
430+
pub(super) fn emit_match_with_cascade(&mut self, mut instr: MatchIR) -> Label {
431+
use crate::bytecode::MAX_PRE_EFFECTS;
432+
433+
let entry = instr.label;
434+
435+
if instr.pre_effects.len() <= MAX_PRE_EFFECTS {
436+
self.instructions.push(instr.into());
437+
return entry;
438+
}
439+
440+
// Move all pre-effects to epsilon chain
441+
let all_effects = std::mem::take(&mut instr.pre_effects);
442+
443+
// Create new label for the actual match instruction
444+
let match_label = self.fresh_label();
445+
instr.label = match_label;
446+
self.instructions.push(instr.into());
447+
448+
// Emit cascade from entry → ... → match_label
449+
self.emit_effects_chain(entry, match_label, all_effects);
450+
451+
entry
452+
}
453+
454+
/// Emit a chain of epsilon transitions to execute effects in order.
455+
///
456+
/// Splits effects into batches of `MAX_PRE_EFFECTS` (7), emitting epsilon
457+
/// transitions: `entry → intermediate1 → ... → exit`.
458+
fn emit_effects_chain(&mut self, entry: Label, exit: Label, mut effects: Vec<EffectIR>) {
459+
use crate::bytecode::MAX_PRE_EFFECTS;
460+
461+
if effects.is_empty() {
462+
// Just link entry to exit
463+
self.instructions.push(MatchIR::epsilon(entry, exit).into());
464+
return;
465+
}
466+
467+
if effects.len() <= MAX_PRE_EFFECTS {
468+
self.instructions
469+
.push(MatchIR::epsilon(entry, exit).pre_effects(effects).into());
470+
return;
471+
}
472+
473+
// Take first batch, recurse for rest
474+
let first_batch: Vec<_> = effects.drain(..MAX_PRE_EFFECTS).collect();
475+
let intermediate = self.fresh_label();
476+
477+
self.instructions.push(
478+
MatchIR::epsilon(entry, intermediate)
479+
.pre_effects(first_batch)
480+
.into(),
481+
);
482+
483+
self.emit_effects_chain(intermediate, exit, effects);
484+
}
485+
423486
/// Emit a wildcard navigation step that accepts any node.
424487
///
425488
/// Used for skip-retry logic in quantifiers: navigates to the next position

crates/plotnik-lib/src/engine/engine_tests.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,3 +529,47 @@ fn wildcard_named_skips_anonymous() {
529529
fn wildcard_bare_matches_anonymous() {
530530
snap!("Q = (program (return_statement _ @x))", "return 42");
531531
}
532+
533+
/// BUG: Unlabeled alternation with 5+ branches and unique captures per branch.
534+
///
535+
/// Each branch in an unlabeled alternation needs to inject null for captures
536+
/// it doesn't have. With 5 branches (@a, @b, @c, @d, @e), each branch needs
537+
/// 8 pre-effects (4 nulls + 4 sets). This exceeds the 3-bit limit (max 7)
538+
/// and caused bytecode encoding overflow, resulting in garbage addresses.
539+
///
540+
/// Fix: Cascade overflow pre-effects into leading epsilon transitions.
541+
#[test]
542+
fn regression_unlabeled_alternation_5_branches_captures() {
543+
snap!(
544+
indoc! {r#"
545+
Q = (program (expression_statement [
546+
(identifier) @a
547+
(number) @b
548+
(string) @c
549+
(binary_expression) @d
550+
(call_expression) @e
551+
]))
552+
"#},
553+
"foo"
554+
);
555+
}
556+
557+
/// Same bug with 8 branches - even more pre-effects requiring multiple cascade steps.
558+
#[test]
559+
fn regression_unlabeled_alternation_8_branches_captures() {
560+
snap!(
561+
indoc! {r#"
562+
Q = (program (expression_statement [
563+
(identifier) @a
564+
(number) @b
565+
(string) @c
566+
(binary_expression) @d
567+
(call_expression) @e
568+
(member_expression) @f
569+
(array) @g
570+
(object) @h
571+
]))
572+
"#},
573+
"42"
574+
);
575+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
---
2+
source: crates/plotnik-lib/src/engine/engine_tests.rs
3+
---
4+
Q = (program (expression_statement [
5+
(identifier) @a
6+
(number) @b
7+
(string) @c
8+
(binary_expression) @d
9+
(call_expression) @e
10+
]))
11+
---
12+
foo
13+
---
14+
{
15+
"b": null,
16+
"c": null,
17+
"d": null,
18+
"e": null,
19+
"a": {
20+
"kind": "identifier",
21+
"text": "foo",
22+
"span": [
23+
0,
24+
3
25+
]
26+
}
27+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
---
2+
source: crates/plotnik-lib/src/engine/engine_tests.rs
3+
---
4+
Q = (program (expression_statement [
5+
(identifier) @a
6+
(number) @b
7+
(string) @c
8+
(binary_expression) @d
9+
(call_expression) @e
10+
(member_expression) @f
11+
(array) @g
12+
(object) @h
13+
]))
14+
---
15+
42
16+
---
17+
{
18+
"a": null,
19+
"c": null,
20+
"d": null,
21+
"e": null,
22+
"f": null,
23+
"g": null,
24+
"h": null,
25+
"b": {
26+
"kind": "number",
27+
"text": "42",
28+
"span": [
29+
0,
30+
2
31+
]
32+
}
33+
}

0 commit comments

Comments
 (0)