11//! Cache-aligned instruction layout.
22//!
33//! Extracts linear chains from the control flow graph and places them
4- //! contiguously. Pads instructions to prevent cache line straddling.
4+ //! contiguously. Packs successor instructions into free space of predecessor
5+ //! blocks for improved d-cache locality.
56
67use std:: collections:: { BTreeMap , HashSet } ;
78
@@ -10,6 +11,170 @@ use crate::bytecode::{InstructionIR, Label, LayoutResult};
1011const CACHE_LINE : usize = 64 ;
1112const STEP_SIZE : usize = 8 ;
1213
14+ /// Intermediate representation for layout optimization.
15+ struct LayoutIR {
16+ blocks : Vec < Block > ,
17+ label_to_block : BTreeMap < Label , usize > ,
18+ label_to_offset : BTreeMap < Label , u8 > ,
19+ }
20+
21+ /// A 64-byte cache-line block.
22+ struct Block {
23+ placements : Vec < Placement > ,
24+ used : u8 ,
25+ }
26+
27+ /// An instruction placed within a block.
28+ struct Placement {
29+ label : Label ,
30+ offset : u8 ,
31+ size : u8 ,
32+ }
33+
34+ impl Block {
35+ fn new ( ) -> Self {
36+ Self {
37+ placements : Vec :: new ( ) ,
38+ used : 0 ,
39+ }
40+ }
41+
42+ fn free ( & self ) -> u8 {
43+ CACHE_LINE as u8 - self . used
44+ }
45+
46+ fn can_fit ( & self , size : u8 ) -> bool {
47+ self . free ( ) >= size
48+ }
49+
50+ fn place ( & mut self , label : Label , size : u8 ) -> u8 {
51+ let offset = self . used ;
52+ self . placements . push ( Placement {
53+ label,
54+ offset,
55+ size,
56+ } ) ;
57+ self . used += size;
58+ offset
59+ }
60+ }
61+
62+ impl LayoutIR {
63+ fn new ( ) -> Self {
64+ Self {
65+ blocks : Vec :: new ( ) ,
66+ label_to_block : BTreeMap :: new ( ) ,
67+ label_to_offset : BTreeMap :: new ( ) ,
68+ }
69+ }
70+
71+ fn place ( & mut self , label : Label , block_idx : usize , size : u8 ) {
72+ let offset = self . blocks [ block_idx] . place ( label, size) ;
73+ self . label_to_block . insert ( label, block_idx) ;
74+ self . label_to_offset . insert ( label, offset) ;
75+ }
76+
77+ /// Move an instruction from its current block to a new block.
78+ fn move_to ( & mut self , label : Label , new_block_idx : usize , size : u8 ) {
79+ // Remove from old block
80+ if let Some ( & old_block_idx) = self . label_to_block . get ( & label)
81+ && let block = & mut self . blocks [ old_block_idx]
82+ && let Some ( pos) = block. placements . iter ( ) . position ( |p| p. label == label)
83+ {
84+ let old_placement = block. placements . remove ( pos) ;
85+ block. used -= old_placement. size ;
86+
87+ // Compact remaining placements
88+ let mut offset = 0u8 ;
89+ for p in & mut block. placements {
90+ p. offset = offset;
91+ offset += p. size ;
92+ }
93+ }
94+
95+ // Add to new block
96+ let offset = self . blocks [ new_block_idx] . place ( label, size) ;
97+ self . label_to_block . insert ( label, new_block_idx) ;
98+ self . label_to_offset . insert ( label, offset) ;
99+ }
100+
101+ fn finalize ( self ) -> LayoutResult {
102+ let mut mapping = BTreeMap :: new ( ) ;
103+ let mut max_step_end = 0u16 ;
104+
105+ for ( block_idx, block) in self . blocks . iter ( ) . enumerate ( ) {
106+ let block_base_step = ( block_idx * CACHE_LINE / STEP_SIZE ) as u16 ;
107+ for placement in & block. placements {
108+ let step = block_base_step + ( placement. offset / STEP_SIZE as u8 ) as u16 ;
109+ mapping. insert ( placement. label , step) ;
110+ let step_end = step + ( placement. size / STEP_SIZE as u8 ) as u16 ;
111+ max_step_end = max_step_end. max ( step_end) ;
112+ }
113+ }
114+
115+ LayoutResult :: new ( mapping, max_step_end)
116+ }
117+ }
118+
119+ /// Block-to-block reference counts for scoring.
120+ struct BlockRefs {
121+ /// (from_block, to_block) -> reference count
122+ direct : BTreeMap < ( usize , usize ) , usize > ,
123+ /// block -> list of predecessor blocks
124+ predecessors : BTreeMap < usize , Vec < usize > > ,
125+ }
126+
127+ impl BlockRefs {
128+ fn new ( ) -> Self {
129+ Self {
130+ direct : BTreeMap :: new ( ) ,
131+ predecessors : BTreeMap :: new ( ) ,
132+ }
133+ }
134+
135+ fn add_ref ( & mut self , from_block : usize , to_block : usize ) {
136+ * self . direct . entry ( ( from_block, to_block) ) . or_default ( ) += 1 ;
137+ let preds = self . predecessors . entry ( to_block) . or_default ( ) ;
138+ if !preds. contains ( & from_block) {
139+ preds. push ( from_block) ;
140+ }
141+ }
142+
143+ fn count ( & self , from_block : usize , to_block : usize ) -> usize {
144+ self . direct . get ( & ( from_block, to_block) ) . copied ( ) . unwrap_or ( 0 )
145+ }
146+
147+ fn predecessors ( & self , block : usize ) -> & [ usize ] {
148+ self . predecessors
149+ . get ( & block)
150+ . map ( |v| v. as_slice ( ) )
151+ . unwrap_or ( & [ ] )
152+ }
153+ }
154+
155+ /// Score a candidate block for packing based on reference distance.
156+ /// Direct refs count 1.0, 1-hop = 0.5, 2-hop = 0.25, capped at 3 hops.
157+ fn block_score ( target_block : usize , candidate_block : usize , refs : & BlockRefs ) -> f32 {
158+ let mut score = 0.0f32 ;
159+ let mut frontier = vec ! [ ( candidate_block, 0u8 ) ] ;
160+ let mut visited = HashSet :: new ( ) ;
161+
162+ while let Some ( ( block, dist) ) = frontier. pop ( ) {
163+ if !visited. insert ( block) || dist > 3 {
164+ continue ;
165+ }
166+
167+ let direct_refs = refs. count ( block, target_block) ;
168+ score += direct_refs as f32 / ( 1u32 << dist) as f32 ;
169+
170+ for & pred in refs. predecessors ( block) {
171+ frontier. push ( ( pred, dist + 1 ) ) ;
172+ }
173+ }
174+
175+ score
176+ }
177+
13178/// Successor graph for layout analysis.
14179struct Graph {
15180 /// label -> list of successor labels
@@ -70,7 +235,121 @@ impl CacheAligned {
70235 let chains = extract_chains ( & graph, instructions, entries) ;
71236 let ordered = order_chains ( chains, entries) ;
72237
73- assign_step_ids ( ordered, & label_to_instr)
238+ let mut ir = build_layout_ir ( & ordered, & label_to_instr) ;
239+ let refs = build_block_refs ( & ir, & label_to_instr) ;
240+ pack_successors ( & mut ir, & refs, & label_to_instr) ;
241+
242+ ir. finalize ( )
243+ }
244+ }
245+
246+ /// Build initial LayoutIR from ordered chains.
247+ fn build_layout_ir (
248+ chains : & [ Vec < Label > ] ,
249+ label_to_instr : & BTreeMap < Label , & InstructionIR > ,
250+ ) -> LayoutIR {
251+ let mut ir = LayoutIR :: new ( ) ;
252+
253+ for chain in chains {
254+ for & label in chain {
255+ let Some ( instr) = label_to_instr. get ( & label) else {
256+ continue ;
257+ } ;
258+ let size = instr. size ( ) as u8 ;
259+
260+ // Ensure current block can fit, or create new one
261+ if ir. blocks . is_empty ( ) || !ir. blocks . last ( ) . unwrap ( ) . can_fit ( size) {
262+ ir. blocks . push ( Block :: new ( ) ) ;
263+ }
264+ let block_idx = ir. blocks . len ( ) - 1 ;
265+
266+ ir. place ( label, block_idx, size) ;
267+ }
268+ }
269+
270+ ir
271+ }
272+
273+ /// Build block reference counts from current layout.
274+ fn build_block_refs (
275+ ir : & LayoutIR ,
276+ label_to_instr : & BTreeMap < Label , & InstructionIR > ,
277+ ) -> BlockRefs {
278+ let mut refs = BlockRefs :: new ( ) ;
279+
280+ for ( & label, & block_idx) in & ir. label_to_block {
281+ let Some ( instr) = label_to_instr. get ( & label) else {
282+ continue ;
283+ } ;
284+ for succ in instr. successors ( ) {
285+ if let Some ( & succ_block) = ir. label_to_block . get ( & succ)
286+ && succ_block != block_idx
287+ {
288+ refs. add_ref ( block_idx, succ_block) ;
289+ }
290+ }
291+ }
292+
293+ refs
294+ }
295+
296+ /// Pack successor instructions into free space of predecessor blocks.
297+ ///
298+ /// When X → Y and X is in block B, try to move Y to an earlier block
299+ /// that has free space and high reference score to B.
300+ fn pack_successors (
301+ ir : & mut LayoutIR ,
302+ refs : & BlockRefs ,
303+ label_to_instr : & BTreeMap < Label , & InstructionIR > ,
304+ ) {
305+ // Collect candidates: (successor_label, successor_block, predecessor_block)
306+ // We want to move successors to earlier blocks with free space
307+ let mut candidates: Vec < ( Label , usize , usize ) > = Vec :: new ( ) ;
308+
309+ for ( & label, & block_idx) in & ir. label_to_block {
310+ let Some ( instr) = label_to_instr. get ( & label) else {
311+ continue ;
312+ } ;
313+
314+ // For each successor of this instruction
315+ for succ in instr. successors ( ) {
316+ if let Some ( & succ_block) = ir. label_to_block . get ( & succ) {
317+ // Only consider moving if successor is in a later block
318+ if succ_block > block_idx {
319+ candidates. push ( ( succ, succ_block, block_idx) ) ;
320+ }
321+ }
322+ }
323+ }
324+
325+ // Sort by successor block descending (process later blocks first)
326+ candidates. sort_by_key ( |( _, succ_block, _) | std:: cmp:: Reverse ( * succ_block) ) ;
327+
328+ // Try to move each successor to an earlier block
329+ for ( succ_label, _succ_block, pred_block) in candidates {
330+ // Re-check current block (might have changed)
331+ let Some ( & current_block) = ir. label_to_block . get ( & succ_label) else {
332+ continue ;
333+ } ;
334+
335+ let Some ( instr) = label_to_instr. get ( & succ_label) else {
336+ continue ;
337+ } ;
338+ let size = instr. size ( ) as u8 ;
339+
340+ // Find the best earlier block with free space
341+ // Prefer blocks that reference the predecessor block (cache locality)
342+ let best = ( 0 ..current_block)
343+ . filter ( |& c| ir. blocks [ c] . can_fit ( size) )
344+ . max_by ( |& a, & b| {
345+ let score_a = block_score ( pred_block, a, refs) ;
346+ let score_b = block_score ( pred_block, b, refs) ;
347+ score_a. partial_cmp ( & score_b) . unwrap_or ( std:: cmp:: Ordering :: Equal )
348+ } ) ;
349+
350+ if let Some ( candidate) = best {
351+ ir. move_to ( succ_label, candidate, size) ;
352+ }
74353 }
75354}
76355
@@ -144,46 +423,3 @@ fn order_chains(mut chains: Vec<Vec<Label>>, entries: &[Label]) -> Vec<Vec<Label
144423 entry_chains
145424}
146425
147- /// Assign step IDs with cache line awareness.
148- fn assign_step_ids (
149- chains : Vec < Vec < Label > > ,
150- label_to_instr : & BTreeMap < Label , & InstructionIR > ,
151- ) -> LayoutResult {
152- let mut mapping = BTreeMap :: new ( ) ;
153-
154- let mut current_step = 0u16 ;
155- let mut current_offset = 0usize ; // Byte offset for cache alignment
156-
157- for chain in chains {
158- for label in chain {
159- let Some ( instr) = label_to_instr. get ( & label) else {
160- continue ;
161- } ;
162- let size = instr. size ( ) ;
163-
164- // Pad if instruction would straddle cache line boundary
165- let line_offset = current_offset % CACHE_LINE ;
166- if line_offset + size > CACHE_LINE {
167- let padding_bytes = CACHE_LINE - line_offset;
168- let padding_steps = ( padding_bytes / STEP_SIZE ) as u16 ;
169- current_step += padding_steps;
170- current_offset += padding_bytes;
171- }
172-
173- // Invariant: instruction must not straddle cache line
174- assert ! (
175- current_offset % CACHE_LINE + size <= CACHE_LINE ,
176- "instruction at offset {} with size {} straddles 64-byte cache line" ,
177- current_offset,
178- size
179- ) ;
180-
181- mapping. insert ( label, current_step) ;
182- let step_count = ( size / STEP_SIZE ) as u16 ;
183- current_step += step_count;
184- current_offset += size;
185- }
186- }
187-
188- LayoutResult :: new ( mapping, current_step)
189- }
0 commit comments