diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs
index 744f2454..e359fdbd 100644
--- a/cc/arch/aarch64/codegen.rs
+++ b/cc/arch/aarch64/codegen.rs
@@ -1745,6 +1745,16 @@ impl Aarch64CodeGen {
                 self.emit_fence(insn);
             }
 
+            // Int128 decomposition ops (from mapping pass expansion)
+            Opcode::Lo64 => self.emit_lo64(insn),
+            Opcode::Hi64 => self.emit_hi64(insn),
+            Opcode::Pair64 => self.emit_pair64(insn),
+            Opcode::AddC => self.emit_addc(insn, false),
+            Opcode::AdcC => self.emit_addc(insn, true),
+            Opcode::SubC => self.emit_subc(insn, false),
+            Opcode::SbcC => self.emit_subc(insn, true),
+            Opcode::UMulHi => self.emit_umulhi(insn),
+
             // Skip no-ops and unimplemented
             _ => {}
         }
diff --git a/cc/arch/aarch64/expression.rs b/cc/arch/aarch64/expression.rs
index a39dbfb1..216554a2 100644
--- a/cc/arch/aarch64/expression.rs
+++ b/cc/arch/aarch64/expression.rs
@@ -24,8 +24,11 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
+        // 128-bit shifts are still handled by the backend (mapping pass doesn't expand them)
         if size == 128 {
-            self.emit_int128_binop(insn);
+            if matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr) {
+                self.emit_int128_binop(insn);
+            }
             return;
         }
 
@@ -122,11 +125,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_unary(insn, op);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let src = match insn.src.first() {
             Some(&s) => s,
@@ -167,11 +165,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_binop(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -208,11 +201,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_div(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -273,11 +261,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_compare(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -347,9 +330,9 @@ impl Aarch64CodeGen {
             None => return,
         };
 
-        // Handle 128-bit extensions and truncations
-        if insn.size == 128 || insn.src_size == 128 {
-            self.emit_int128_extend(insn);
+        // Handle truncation FROM 128-bit (Zext/Sext TO 128 handled by mapping pass)
+        if insn.src_size == 128 && insn.op == Opcode::Trunc {
+            self.emit_int128_trunc(insn);
             return;
         }
 
@@ -504,7 +487,7 @@ impl Aarch64CodeGen {
         }
     }
 
-    /// Emit 128-bit binary operation (add, sub, and, or, xor, shl, lsr, asr, mul)
+    /// Emit 128-bit shift operation (shl, lsr, asr)
     fn emit_int128_binop(&mut self, insn: &Instruction) {
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -518,144 +501,13 @@ impl Aarch64CodeGen {
         // Load src1 as 128-bit: X9=lo1, X10=hi1
         self.load_int128(src1, Reg::X9, Reg::X10);
 
-        // For shift ops, src2 is the shift amount (a regular small integer, not int128).
-        // Load it as a 64-bit value into X11 only. For all other ops, load as 128-bit.
-        let is_shift = matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr);
-        if is_shift {
-            self.emit_move(src2, Reg::X11, 64);
-        } else {
-            self.load_int128(src2, Reg::X11, Reg::X16);
-        }
+        // Shift amount is a regular small integer, not int128.
+        self.emit_move(src2, Reg::X11, 64);
 
         match insn.op {
-            Opcode::Add => {
-                // adds x9, x9, x11  (lo + lo, set carry)
-                // adc  x10, x10, x16 (hi + hi + carry)
-                self.push_lir(Aarch64Inst::Adds {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Adc {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X16,
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Sub => {
-                // subs x9, x9, x11  (lo - lo, set borrow)
-                // sbc  x10, x10, x16 (hi - hi - borrow)
-                self.push_lir(Aarch64Inst::Subs {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Sbc {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X16,
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::And => {
-                self.push_lir(Aarch64Inst::And {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::And {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Or => {
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Xor => {
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Mul => {
-                // 128-bit multiply: (lo1, hi1) * (lo2, hi2)
-                // result_lo = lo1 * lo2 (lower 64 bits)
-                // result_hi = umulh(lo1, lo2) + hi1*lo2 + lo1*hi2
-                //
-                // X9=lo1, X10=hi1, X11=lo2, X16=hi2
-                // X17 = umulh(lo1, lo2)
-                self.push_lir(Aarch64Inst::Umulh {
-                    src1: Reg::X9,
-                    src2: Reg::X11,
-                    dst: Reg::X17,
-                });
-                // X17 = X17 + hi1*lo2 = madd(X10, X11, X17)
-                self.push_lir(Aarch64Inst::MAdd {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X11,
-                    acc: Reg::X17,
-                    dst: Reg::X17,
-                });
-                // X17 = X17 + lo1*hi2 = madd(X9, X16, X17)
-                self.push_lir(Aarch64Inst::MAdd {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: Reg::X16,
-                    acc: Reg::X17,
-                    dst: Reg::X17,
-                });
-                // X9 = lo1 * lo2 (lower 64 bits)
-                self.push_lir(Aarch64Inst::Mul {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: Reg::X11,
-                    dst: Reg::X9,
-                });
-                // hi result in X10
-                self.push_lir(Aarch64Inst::Mov {
-                    size: OperandSize::B64,
-                    src: GpOperand::Reg(Reg::X17),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Shl => {
-                // 128-bit left shift: shift amount in X11 (lo half of src2)
-                self.emit_int128_shl();
-            }
-            Opcode::Lsr => {
-                // 128-bit logical right shift
-                self.emit_int128_lsr();
-            }
-            Opcode::Asr => {
-                // 128-bit arithmetic right shift
-                self.emit_int128_asr();
-            }
+            Opcode::Shl => self.emit_int128_shl(),
+            Opcode::Lsr => self.emit_int128_lsr(),
+            Opcode::Asr => self.emit_int128_asr(),
             _ => return,
         }
 
@@ -976,384 +828,214 @@ impl Aarch64CodeGen {
         self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_done)));
     }
 
-    /// Emit 128-bit division (calls __udivti3/__divti3/__umodti3/__modti3 runtime helpers)
-    fn emit_int128_div(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
+    /// Emit truncation from 128-bit to a smaller type
+    fn emit_int128_trunc(&mut self, insn: &Instruction) {
+        let src = match insn.src.first() {
+            Some(&s) => s,
             None => return,
         };
-
-        // AAPCS64: 128-bit args passed in X0:X1 (first) and X2:X3 (second)
-        // Return value in X0:X1
-        self.load_int128(src1, Reg::X0, Reg::X1);
-        self.load_int128(src2, Reg::X2, Reg::X3);
-
-        let func_name = match insn.op {
-            Opcode::DivS => "__divti3",
-            Opcode::DivU => "__udivti3",
-            Opcode::ModS => "__modti3",
-            Opcode::ModU => "__umodti3",
-            _ => return,
-        };
-
-        use crate::arch::lir::CallTarget;
-        self.push_lir(Aarch64Inst::Bl {
-            target: CallTarget::Direct(crate::arch::lir::Symbol::extern_sym(func_name)),
-        });
-
-        // Result in X0:X1 -> store to target
-        // Move to X9:X10 first to avoid clobbering if target overlaps arg regs
-        self.push_lir(Aarch64Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::X0),
-            dst: Reg::X9,
-        });
-        self.push_lir(Aarch64Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::X1),
-            dst: Reg::X10,
-        });
-        self.store_int128(Reg::X9, Reg::X10, target);
-    }
-
-    /// Emit 128-bit comparison
-    fn emit_int128_compare(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
         let target = match insn.target {
             Some(t) => t,
             None => return,
         };
 
-        self.load_int128(src1, Reg::X9, Reg::X10);
-        self.load_int128(src2, Reg::X11, Reg::X16);
-
+        // Truncate from 128: just take lo half (or part of it)
+        self.load_int128(src, Reg::X9, Reg::X10);
+        // X9 has lo half, which is what we want
+        let target_size = insn.size;
         let dst_loc = self.get_location(target);
         let dst_reg = match &dst_loc {
             Loc::Reg(r) => *r,
-            _ => Reg::X17,
+            _ => Reg::X9, // already in X9
         };
-
-        match insn.op {
-            Opcode::SetEq | Opcode::SetNe => {
-                // Eq/Ne: eor both halves, orr results, compare with zero
-                // X9 = lo1 ^ lo2
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                // X10 = hi1 ^ hi2
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-                // X9 = X9 | X10
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X10),
-                    dst: Reg::X9,
-                });
-                // cmp X9, #0
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Imm(0),
-                });
-                let cond = if insn.op == Opcode::SetEq {
-                    CondCode::Eq
-                } else {
-                    CondCode::Ne
-                };
-                self.push_lir(Aarch64Inst::Cset { cond, dst: dst_reg });
-            }
-            _ => {
-                // Ordered comparisons: branch-based approach to avoid ccmp nzcv=0 bug.
-                // Compare hi halves first; if not equal, hi comparison determines result.
-                // If hi halves are equal, compare lo halves (always unsigned tiebreaker).
-                let label_hi_gt = self.next_unique_label("i128cmp");
-                let label_hi_lt = self.next_unique_label("i128cmp");
-                let label_done = self.next_unique_label("i128cmp");
-
-                // Determine signedness and what result to produce in each case.
-                // For signed: compare hi with signed conditions.
-                // For unsigned: compare hi with unsigned conditions.
-                let (hi_gt_cond, hi_lt_cond, lo_cond, hi_gt_val, hi_lt_val) = match insn.op {
-                    // SetLt: result=1 when src1 < src2
-                    Opcode::SetLt => (CondCode::Sgt, CondCode::Slt, CondCode::Ult, 0i64, 1i64),
-                    // SetLe: result=1 when src1 <= src2
-                    Opcode::SetLe => (CondCode::Sgt, CondCode::Slt, CondCode::Ule, 0, 1),
-                    // SetGt: result=1 when src1 > src2
-                    Opcode::SetGt => (CondCode::Sgt, CondCode::Slt, CondCode::Ugt, 1, 0),
-                    // SetGe: result=1 when src1 >= src2
-                    Opcode::SetGe => (CondCode::Sgt, CondCode::Slt, CondCode::Uge, 1, 0),
-                    // SetB (unsigned <): result=1 when src1 < src2
-                    Opcode::SetB => (CondCode::Ugt, CondCode::Ult, CondCode::Ult, 0, 1),
-                    // SetBe (unsigned <=)
-                    Opcode::SetBe => (CondCode::Ugt, CondCode::Ult, CondCode::Ule, 0, 1),
-                    // SetA (unsigned >)
-                    Opcode::SetA => (CondCode::Ugt, CondCode::Ult, CondCode::Ugt, 1, 0),
-                    // SetAe (unsigned >=)
-                    Opcode::SetAe => (CondCode::Ugt, CondCode::Ult, CondCode::Uge, 1, 0),
-                    _ => return,
-                };
-
-                // Compare hi halves (X10=hi1, X16=hi2)
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                });
-                self.push_lir(Aarch64Inst::BCond {
-                    cond: hi_gt_cond,
-                    target: label_hi_gt.clone(),
-                });
-                self.push_lir(Aarch64Inst::BCond {
-                    cond: hi_lt_cond,
-                    target: label_hi_lt.clone(),
-                });
-
-                // Hi halves equal: compare lo halves (unsigned tiebreaker)
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                });
-                self.push_lir(Aarch64Inst::Cset {
-                    cond: lo_cond,
-                    dst: dst_reg,
-                });
-                self.push_lir(Aarch64Inst::B {
-                    target: label_done.clone(),
-                });
-
-                // Hi1 > Hi2 (signed or unsigned depending on comparison)
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_hi_gt)));
-                self.push_lir(Aarch64Inst::Mov {
+        if dst_reg != Reg::X9 {
+            self.push_lir(Aarch64Inst::Mov {
+                size: OperandSize::B64,
+                src: GpOperand::Reg(Reg::X9),
+                dst: dst_reg,
+            });
+        }
+        // Mask to target size if needed
+        match target_size {
+            8 => {
+                self.push_lir(Aarch64Inst::And {
                     size: OperandSize::B32,
-                    src: GpOperand::Imm(hi_gt_val),
+                    src1: dst_reg,
+                    src2: GpOperand::Imm(0xff),
                     dst: dst_reg,
                 });
-                self.push_lir(Aarch64Inst::B {
-                    target: label_done.clone(),
-                });
-
-                // Hi1 < Hi2 (signed or unsigned depending on comparison)
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_hi_lt)));
-                self.push_lir(Aarch64Inst::Mov {
+            }
+            16 => {
+                self.push_lir(Aarch64Inst::And {
                     size: OperandSize::B32,
-                    src: GpOperand::Imm(hi_lt_val),
+                    src1: dst_reg,
+                    src2: GpOperand::Imm(0xffff),
                     dst: dst_reg,
                 });
-
-                // Done
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_done)));
             }
+            32 | 64 => {
+                // Already correct width
+            }
+            _ => {}
         }
-
-        // Store as 64-bit so CBR's 64-bit load doesn't read stack garbage
         if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+            self.emit_move_to_loc(dst_reg, &dst_loc, target_size);
         }
     }
 
-    /// Emit 128-bit unary operation (neg, not)
-    fn emit_int128_unary(&mut self, insn: &Instruction, op: UnaryOp) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+    // ========================================================================
+    // Int128 decomposition ops (Lo64, Hi64, Pair64)
+    // ========================================================================
 
+    /// Lo64: extract low 64 bits from 128-bit pseudo.
+    pub(super) fn emit_lo64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Lo64 must have target");
+        // Load both halves, use lo
         self.load_int128(src, Reg::X9, Reg::X10);
-
-        match op {
-            UnaryOp::Neg => {
-                // negs x9, x9  (negate lo, set flags)
-                // ngc  x10, x10 (negate hi with borrow)
-                self.push_lir(Aarch64Inst::Negs {
-                    size: OperandSize::B64,
-                    src: Reg::X9,
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Ngc {
-                    size: OperandSize::B64,
-                    src: Reg::X10,
-                    dst: Reg::X10,
-                });
+        let dst_loc = self.get_location(target);
+        match dst_loc {
+            Loc::Reg(r) => {
+                if r != Reg::X9 {
+                    self.push_lir(Aarch64Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(Reg::X9),
+                        dst: r,
+                    });
+                }
             }
-            UnaryOp::Not => {
-                // mvn x9, x9
-                // mvn x10, x10
-                self.push_lir(Aarch64Inst::Mvn {
-                    size: OperandSize::B64,
-                    src: Reg::X9,
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Mvn {
-                    size: OperandSize::B64,
-                    src: Reg::X10,
-                    dst: Reg::X10,
-                });
+            _ => self.emit_move_to_loc(Reg::X9, &dst_loc, 64),
+        }
+    }
+
+    /// Hi64: extract high 64 bits from 128-bit pseudo.
+    pub(super) fn emit_hi64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Hi64 must have target");
+        // Load both halves, use hi
+        self.load_int128(src, Reg::X9, Reg::X10);
+        let dst_loc = self.get_location(target);
+        match dst_loc {
+            Loc::Reg(r) => {
+                if r != Reg::X10 {
+                    self.push_lir(Aarch64Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(Reg::X10),
+                        dst: r,
+                    });
+                }
             }
+            _ => self.emit_move_to_loc(Reg::X10, &dst_loc, 64),
         }
+    }
+
+    /// Pair64: combine two 64-bit pseudos into 128-bit.
+    pub(super) fn emit_pair64(&mut self, insn: &Instruction) {
+        let src_lo = insn.src[0];
+        let src_hi = insn.src[1];
+        let target = insn.target.expect("Pair64 must have target");
 
+        self.emit_move(src_lo, Reg::X9, 64);
+        self.emit_move(src_hi, Reg::X10, 64);
         self.store_int128(Reg::X9, Reg::X10, target);
     }
 
-    /// Emit 128-bit extend/truncate operations
-    fn emit_int128_extend(&mut self, insn: &Instruction) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
+    /// AddC/AdcC: 64-bit add with carry.
+    /// AddC (with_carry=false): adds (sets flags)
+    /// AdcC (with_carry=true): adc (add with carry in)
+    pub(super) fn emit_addc(&mut self, insn: &Instruction, with_carry: bool) {
+        let target = insn.target.expect("AddC/AdcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
         };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
+
+        self.emit_move(src1, dst_reg, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        if with_carry {
+            self.push_lir(Aarch64Inst::Adc {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: Reg::X10,
+                dst: dst_reg,
+            });
+        } else {
+            self.push_lir(Aarch64Inst::Adds {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: GpOperand::Reg(Reg::X10),
+                dst: dst_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// SubC/SbcC: 64-bit sub with borrow.
+    /// SubC (with_borrow=false): subs (sets flags)
+    /// SbcC (with_borrow=true): sbc (sub with borrow in)
+    pub(super) fn emit_subc(&mut self, insn: &Instruction, with_borrow: bool) {
+        let target = insn.target.expect("SubC/SbcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
         };
 
-        match insn.op {
-            Opcode::Zext => {
-                // Zero extend to 128: lo = src, hi = 0
-                let dst_loc = self.get_location(target);
-                if let Loc::Stack(dst_offset) = dst_loc {
-                    self.emit_move(src, Reg::X9, 64);
-                    // Zero-extend from smaller source if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(Aarch64Inst::Uxtb {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(Aarch64Inst::Uxth {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        32 => {
-                            // Writing to w9 zeroes upper 32 bits
-                            self.push_lir(Aarch64Inst::Mov {
-                                size: OperandSize::B32,
-                                src: GpOperand::Reg(Reg::X9),
-                                dst: Reg::X9,
-                            });
-                        }
-                        _ => {} // 64-bit: nothing extra needed
-                    }
-                    let mem = self.stack_mem(dst_offset);
-                    self.push_lir(Aarch64Inst::Stp {
-                        size: OperandSize::B64,
-                        src1: Reg::X9,
-                        src2: Reg::Xzr,
-                        addr: mem,
-                    });
-                }
-            }
-            Opcode::Sext => {
-                // Sign extend to 128: lo = src, hi = src >> 63 (sign bit)
-                let dst_loc = self.get_location(target);
-                if let Loc::Stack(dst_offset) = dst_loc {
-                    self.emit_move(src, Reg::X9, 64);
-                    // Sign-extend from smaller source if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(Aarch64Inst::Sxtb {
-                                dst_size: OperandSize::B64,
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(Aarch64Inst::Sxth {
-                                dst_size: OperandSize::B64,
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        32 => {
-                            self.push_lir(Aarch64Inst::Sxtw {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        _ => {} // 64-bit: nothing extra needed
-                    }
-                    // hi = lo >> 63 (arithmetic)
-                    self.push_lir(Aarch64Inst::Asr {
-                        size: OperandSize::B64,
-                        src: Reg::X9,
-                        amount: GpOperand::Imm(63),
-                        dst: Reg::X10,
-                    });
-                    let mem = self.stack_mem(dst_offset);
-                    self.push_lir(Aarch64Inst::Stp {
-                        size: OperandSize::B64,
-                        src1: Reg::X9,
-                        src2: Reg::X10,
-                        addr: mem,
-                    });
-                }
-            }
-            Opcode::Trunc => {
-                // Truncate from 128: just take lo half (or part of it)
-                self.load_int128(src, Reg::X9, Reg::X10);
-                // X9 has lo half, which is what we want
-                let target_size = insn.size;
-                let dst_loc = self.get_location(target);
-                let dst_reg = match &dst_loc {
-                    Loc::Reg(r) => *r,
-                    _ => Reg::X9, // already in X9
-                };
-                if dst_reg != Reg::X9 {
-                    self.push_lir(Aarch64Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Reg(Reg::X9),
-                        dst: dst_reg,
-                    });
-                }
-                // Mask to target size if needed
-                match target_size {
-                    8 => {
-                        self.push_lir(Aarch64Inst::And {
-                            size: OperandSize::B32,
-                            src1: dst_reg,
-                            src2: GpOperand::Imm(0xff),
-                            dst: dst_reg,
-                        });
-                    }
-                    16 => {
-                        self.push_lir(Aarch64Inst::And {
-                            size: OperandSize::B32,
-                            src1: dst_reg,
-                            src2: GpOperand::Imm(0xffff),
-                            dst: dst_reg,
-                        });
-                    }
-                    32 | 64 => {
-                        // Already correct width
-                    }
-                    _ => {}
-                }
-                if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-                    self.emit_move_to_loc(dst_reg, &dst_loc, target_size);
-                }
-            }
-            _ => {}
+        self.emit_move(src1, dst_reg, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        if with_borrow {
+            self.push_lir(Aarch64Inst::Sbc {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: Reg::X10,
+                dst: dst_reg,
+            });
+        } else {
+            self.push_lir(Aarch64Inst::Subs {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: GpOperand::Reg(Reg::X10),
+                dst: dst_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// UMulHi: upper 64 bits of 64×64 unsigned multiply.
+    pub(super) fn emit_umulhi(&mut self, insn: &Instruction) {
+        let target = insn.target.expect("UMulHi must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
+        };
+
+        self.emit_move(src1, Reg::X9, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        self.push_lir(Aarch64Inst::Umulh {
+            src1: Reg::X9,
+            src2: Reg::X10,
+            dst: dst_reg,
+        });
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
         }
     }
 }
diff --git a/cc/arch/aarch64/float.rs b/cc/arch/aarch64/float.rs
index a220884d..05e255be 100644
--- a/cc/arch/aarch64/float.rs
+++ b/cc/arch/aarch64/float.rs
@@ -587,6 +587,14 @@ impl Aarch64CodeGen {
         let src_kind = insn.src_typ.map(|t| types.kind(t));
         let dst_kind = insn.typ.map(|t| types.kind(t));
         let needs_convert = match (src_kind, dst_kind) {
+            (
+                Some(TypeKind::Float16),
+                Some(TypeKind::Float | TypeKind::Double | TypeKind::LongDouble),
+            ) => true,
+            (
+                Some(TypeKind::Float | TypeKind::Double | TypeKind::LongDouble),
+                Some(TypeKind::Float16),
+            ) => true,
             (Some(TypeKind::Float), Some(TypeKind::Double | TypeKind::LongDouble)) => true,
             (Some(TypeKind::Double | TypeKind::LongDouble), Some(TypeKind::Float)) => true,
             // On aarch64, Double and LongDouble are the same, no conversion needed
diff --git a/cc/arch/aarch64/lir.rs b/cc/arch/aarch64/lir.rs
index 26566e69..68a4d4e2 100644
--- a/cc/arch/aarch64/lir.rs
+++ b/cc/arch/aarch64/lir.rs
@@ -642,29 +642,6 @@ pub enum Aarch64Inst {
     /// UMULH - Unsigned multiply high (upper 64 bits of 64x64->128 multiply)
     Umulh { src1: Reg, src2: Reg, dst: Reg },
 
-    /// MADD - Multiply-add: dst = acc + (src1 * src2)
-    MAdd {
-        size: OperandSize,
-        src1: Reg,
-        src2: Reg,
-        acc: Reg,
-        dst: Reg,
-    },
-
-    /// NEGS - Negate and set flags (used for 128-bit negate: lo half)
-    Negs {
-        size: OperandSize,
-        src: Reg,
-        dst: Reg,
-    },
-
-    /// NGC - Negate with carry (used for 128-bit negate: hi half)
-    Ngc {
-        size: OperandSize,
-        src: Reg,
-        dst: Reg,
-    },
-
     // ========================================================================
     // Directives (Architecture-Independent)
     // ========================================================================
@@ -1789,44 +1766,6 @@ impl EmitAsm for Aarch64Inst {
                 );
             }
 
-            Aarch64Inst::MAdd {
-                size,
-                src1,
-                src2,
-                acc,
-                dst,
-            } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    madd {}, {}, {}, {}",
-                    dst.name_for_size(sz),
-                    src1.name_for_size(sz),
-                    src2.name_for_size(sz),
-                    acc.name_for_size(sz)
-                );
-            }
-
-            Aarch64Inst::Negs { size, src, dst } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    negs {}, {}",
-                    dst.name_for_size(sz),
-                    src.name_for_size(sz)
-                );
-            }
-
-            Aarch64Inst::Ngc { size, src, dst } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    ngc {}, {}",
-                    dst.name_for_size(sz),
-                    src.name_for_size(sz)
-                );
-            }
-
             // Directives - delegate to shared implementation
             Aarch64Inst::Directive(dir) => {
                 dir.emit(target, out);
diff --git a/cc/arch/aarch64/mapping.rs b/cc/arch/aarch64/mapping.rs
new file mode 100644
index 00000000..94346ad2
--- /dev/null
+++ b/cc/arch/aarch64/mapping.rs
@@ -0,0 +1,482 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// AArch64 instruction mapping
+//
+
+use crate::abi::CallingConv;
+use crate::arch::mapping::{
+    build_binop_rtlib_call, build_convert_rtlib_call, int_suffix_for_longdouble,
+    longdouble_needs_rtlib, map_int128_divmod, map_int128_expand, map_int128_float_convert,
+    ArchMapper, MappedInsn, MappingCtx,
+};
+use crate::ir::{Instruction, Opcode};
+use crate::types::TypeKind;
+
+/// AArch64 instruction mapper.
+pub struct Aarch64Mapper;
+
+impl ArchMapper for Aarch64Mapper {
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn {
+        // Shared: int128 div/mod → rtlib
+        if let Some(r) = map_int128_divmod(insn, ctx) {
+            return r;
+        }
+        // Shared: int128 expand (add/sub/mul/bitwise/neg/not/cmp/zext/sext)
+        if let Some(r) = map_int128_expand(insn, ctx) {
+            return r;
+        }
+        // Shared: int128↔float → rtlib
+        if let Some(r) = map_int128_float_convert(insn, ctx) {
+            return r;
+        }
+        // aarch64 only: long double → rtlib (Linux, not macOS)
+        if let Some(r) = self.map_longdouble(insn, ctx) {
+            return r;
+        }
+        MappedInsn::Legal
+    }
+}
+
+impl Aarch64Mapper {
+    /// Classify and expand long double operations via rtlib calls.
+    /// Only applies on aarch64/Linux where long double is 128-bit IEEE quad.
+    fn map_longdouble(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> Option<MappedInsn> {
+        if !longdouble_needs_rtlib(ctx.target) {
+            return None;
+        }
+
+        match insn.op {
+            // Binary arithmetic: FAdd/FSub/FMul/FDiv → single rtlib call
+            Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+                let typ = insn.typ?;
+                if ctx.types.kind(typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let name = match insn.op {
+                    Opcode::FAdd => "__addtf3",
+                    Opcode::FSub => "__subtf3",
+                    Opcode::FMul => "__multf3",
+                    Opcode::FDiv => "__divtf3",
+                    _ => unreachable!(),
+                };
+                let call = build_binop_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Negation: FNeg → single rtlib call
+            Opcode::FNeg => {
+                let typ = insn.typ?;
+                if ctx.types.kind(typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let call = build_binop_rtlib_call(insn, "__negtf2", ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Comparisons: call rtlib cmp, then compare result against 0
+            Opcode::FCmpOLt
+            | Opcode::FCmpOLe
+            | Opcode::FCmpOGt
+            | Opcode::FCmpOGe
+            | Opcode::FCmpOEq
+            | Opcode::FCmpONe => {
+                if insn.size != 128 {
+                    return None;
+                }
+                // Also check src_typ if available
+                if let Some(src_typ) = insn.src_typ {
+                    if ctx.types.kind(src_typ) != TypeKind::LongDouble {
+                        return None;
+                    }
+                }
+                let (name, cmp_op) = match insn.op {
+                    Opcode::FCmpOLt => ("__lttf2", Opcode::SetLt),
+                    Opcode::FCmpOLe => ("__letf2", Opcode::SetLe),
+                    Opcode::FCmpOGt => ("__gttf2", Opcode::SetGt),
+                    Opcode::FCmpOGe => ("__getf2", Opcode::SetGe),
+                    Opcode::FCmpOEq => ("__eqtf2", Opcode::SetEq),
+                    Opcode::FCmpONe => ("__netf2", Opcode::SetNe),
+                    _ => unreachable!(),
+                };
+
+                let result_pseudo = insn.target.expect("cmp must have target");
+                let int_type = ctx.types.int_id;
+                let int_size = ctx.types.size_bits(int_type);
+                let ld_type = ctx.types.longdouble_id;
+
+                // Allocate pseudo for cmp call result
+                let cmp_result = ctx.func.create_reg_pseudo();
+                let zero = ctx.func.create_const_pseudo(0);
+
+                // Build the rtlib call: cmp_result = __lttf2(left, right)
+                let arg_vals = insn.src.clone();
+                let arg_types = vec![ld_type; arg_vals.len()];
+                let mut call = Instruction::call_with_abi(
+                    Some(cmp_result),
+                    name,
+                    arg_vals,
+                    arg_types,
+                    int_type,
+                    CallingConv::C,
+                    ctx.types,
+                    ctx.target,
+                );
+                call.pos = insn.pos;
+
+                // Build the int comparison: result = cmp_op(cmp_result, 0)
+                let cmp =
+                    Instruction::binop(cmp_op, result_pseudo, cmp_result, zero, int_type, int_size);
+
+                Some(MappedInsn::Replace(vec![call, cmp]))
+            }
+
+            // Float-to-float conversions involving long double
+            Opcode::FCvtF => {
+                let dst_typ = insn.typ?;
+                let src_typ = insn.src_typ?;
+                let dst_kind = ctx.types.kind(dst_typ);
+                let src_kind = ctx.types.kind(src_typ);
+                if src_kind == TypeKind::LongDouble {
+                    // longdouble → float/double
+                    let name = match dst_kind {
+                        TypeKind::Float => "__trunctfsf2",
+                        TypeKind::Double => "__trunctfdf2",
+                        _ => return None,
+                    };
+                    let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                    Some(MappedInsn::Replace(vec![call]))
+                } else if dst_kind == TypeKind::LongDouble {
+                    // float/double → longdouble
+                    let name = match src_kind {
+                        TypeKind::Float => "__extendsftf2",
+                        TypeKind::Double => "__extenddftf2",
+                        _ => return None,
+                    };
+                    let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                    Some(MappedInsn::Replace(vec![call]))
+                } else {
+                    None
+                }
+            }
+
+            // Int-to-float: int → longdouble
+            Opcode::SCvtF | Opcode::UCvtF => {
+                let dst_typ = insn.typ?;
+                if ctx.types.kind(dst_typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let src_typ = insn.src_typ?;
+                // Skip int128 (handled by map_int128_float_convert)
+                if ctx.types.kind(src_typ) == TypeKind::Int128 {
+                    return None;
+                }
+                let isuf = int_suffix_for_longdouble(ctx.types, src_typ);
+                let name: &'static str = match isuf {
+                    "si" => "__floatsitf",
+                    "di" => "__floatditf",
+                    "usi" => "__floatunsitf",
+                    "udi" => "__floatunditf",
+                    _ => return None,
+                };
+                let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Float-to-int: longdouble → int
+            Opcode::FCvtS | Opcode::FCvtU => {
+                let src_typ = insn.src_typ?;
+                if ctx.types.kind(src_typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let dst_typ = insn.typ?;
+                // Skip int128 (handled by map_int128_float_convert)
+                if ctx.types.kind(dst_typ) == TypeKind::Int128 {
+                    return None;
+                }
+                let isuf = int_suffix_for_longdouble(ctx.types, dst_typ);
+                let name: &'static str = match isuf {
+                    "si" => "__fixtfsi",
+                    "di" => "__fixtfdi",
+                    "usi" => "__fixunstfsi",
+                    "udi" => "__fixunstfdi",
+                    _ => return None,
+                };
+                let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arch::mapping::test_helpers::*;
+    use crate::arch::mapping::MappingCtx;
+    use crate::ir::{Instruction, Opcode, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    #[test]
+    fn test_aarch64_legal_insns() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let func_template = make_test_func(&types);
+        for block in &func_template.blocks {
+            for insn in &block.insns {
+                let mut func = make_minimal_func(&types);
+                let mut ctx = MappingCtx {
+                    func: &mut func,
+                    types: &types,
+                    target: &target,
+                };
+                let result = mapper.map_insn(insn, &mut ctx);
+                assert_legal(&result);
+            }
+        }
+    }
+
+    // ========================================================================
+    // Int128 div/mod
+    // ========================================================================
+
+    #[test]
+    fn test_aarch64_int128_divmod() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__divti3");
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__umodti3");
+    }
+
+    // ========================================================================
+    // Long double → rtlib (aarch64/Linux only)
+    // ========================================================================
+
+    #[test]
+    fn test_aarch64_longdouble_binop() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        for (op, name) in [
+            (Opcode::FAdd, "__addtf3"),
+            (Opcode::FSub, "__subtf3"),
+            (Opcode::FMul, "__multf3"),
+            (Opcode::FDiv, "__divtf3"),
+        ] {
+            let insn = Instruction::binop(
+                op,
+                PseudoId(2),
+                PseudoId(0),
+                PseudoId(1),
+                types.longdouble_id,
+                128,
+            );
+            let mut func = make_minimal_func(&types);
+            let mut ctx = MappingCtx {
+                func: &mut func,
+                types: &types,
+                target: &target,
+            };
+            assert_libcall(&mapper.map_insn(&insn, &mut ctx), name);
+        }
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_binop_macos_legal() {
+        let target = Target::new(Arch::Aarch64, Os::MacOS);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // macOS aarch64: long double == double, native
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            64,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_neg() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let insn = Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.longdouble_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__negtf2");
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_cmp() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_cmp_libcall(&mapper.map_insn(&insn, &mut ctx), "__lttf2", Opcode::SetLt);
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_cmp_libcall(&mapper.map_insn(&insn, &mut ctx), "__eqtf2", Opcode::SetEq);
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_convert() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // float → longdouble
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendsftf2");
+
+        // longdouble → double
+        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.longdouble_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__trunctfdf2");
+
+        // int32 → longdouble
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 128, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatsitf");
+
+        // longdouble → int64
+        let insn = make_convert_insn(Opcode::FCvtS, types.long_id, 64, types.longdouble_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixtfdi");
+    }
+
+    #[test]
+    fn test_aarch64_int128_longdouble() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // aarch64 long double uses "tf" suffix
+        let insn = make_convert_insn(
+            Opcode::SCvtF,
+            types.longdouble_id,
+            128,
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattitf");
+    }
+}
diff --git a/cc/arch/aarch64/mod.rs b/cc/arch/aarch64/mod.rs
index 76d8effe..bcfb71fb 100644
--- a/cc/arch/aarch64/mod.rs
+++ b/cc/arch/aarch64/mod.rs
@@ -16,6 +16,7 @@ mod features;
 mod float;
 pub mod lir;
 pub mod macros;
+pub(crate) mod mapping;
 pub mod regalloc;
 
 pub use macros::get_macros;
diff --git a/cc/arch/codegen.rs b/cc/arch/codegen.rs
index 355742e4..3a4fe664 100644
--- a/cc/arch/codegen.rs
+++ b/cc/arch/codegen.rs
@@ -284,7 +284,7 @@ impl<I: LirInst + EmitAsm> CodeGenBase<I> {
                 if size > 8 {
                     // 128-bit: emit as two quads (little-endian: lo then hi)
                     let lo = *val as i64;
-                    let hi = (*val >> 64) as i64;
+                    let hi = (*val >> 64) as u64 as i64;
                     self.push_directive(Directive::Quad(lo));
                     self.push_directive(Directive::Quad(hi));
                 } else {
diff --git a/cc/arch/mapping.rs b/cc/arch/mapping.rs
new file mode 100644
index 00000000..031e58c2
--- /dev/null
+++ b/cc/arch/mapping.rs
@@ -0,0 +1,2055 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// Architecture-independent instruction mapping interface
+//
+// This pass runs after SSA construction and before optimization.
+// It handles target-specific lowering: expanding unsupported operations
+// into sequences of simpler instructions or runtime library calls.
+//
+
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
+use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, PseudoId};
+use crate::rtlib::{Float16Abi, RtlibNames};
+use crate::target::{Arch, Os, Target};
+use crate::types::{TypeId, TypeKind, TypeTable};
+
+// ============================================================================
+// Trait and types
+// ============================================================================
+
+/// Context passed to arch mapper. Provides mutable access to the
+/// function for pseudo allocation, plus type/target info.
+pub struct MappingCtx<'a> {
+    pub func: &'a mut Function,
+    pub types: &'a TypeTable,
+    pub target: &'a Target,
+}
+
+/// Result of mapping a single instruction.
+pub enum MappedInsn {
+    /// Instruction is natively supported — keep unchanged.
+    Legal,
+    /// Replace with these instructions in the same basic block.
+    Replace(Vec<Instruction>),
+}
+
+/// Per-architecture instruction mapper.
+pub trait ArchMapper {
+    /// Map one instruction. The arch impl calls shared helpers
+    /// to build replacement IR, then returns it in MappedInsn::Replace.
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn;
+}
+
+// ============================================================================
+// Complex number rtlib name selection
+// ============================================================================
+
+/// Get the rtlib function name for complex multiplication.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_mul_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__mulsc3",
+        TypeKind::Double => "__muldc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__muldc3" // macOS aarch64: long double == double
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__mulxc3",
+                    Arch::Aarch64 => "__multc3",
+                }
+            }
+        }
+        _ => "__muldc3",
+    }
+}
+
+/// Get the rtlib function name for complex division.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_div_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__divsc3",
+        TypeKind::Double => "__divdc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__divdc3"
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__divxc3",
+                    Arch::Aarch64 => "__divtc3",
+                }
+            }
+        }
+        _ => "__divdc3",
+    }
+}
+
+// ============================================================================
+// Utility helpers
+// ============================================================================
+
+/// Get the rtlib suffix for a float type kind on the given target.
+pub(crate) fn float_suffix(kind: TypeKind, target: &Target) -> &'static str {
+    match kind {
+        TypeKind::Float => "sf",
+        TypeKind::Double => "df",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::X86_64 {
+                "xf"
+            } else {
+                "tf"
+            }
+        }
+        _ => "",
+    }
+}
+
+/// Check if long double needs soft-float rtlib on this target.
+/// Returns true only for aarch64/Linux (128-bit IEEE quad).
+/// x86_64 uses native x87; macOS aarch64 long double == double.
+pub(crate) fn longdouble_needs_rtlib(target: &Target) -> bool {
+    target.arch == Arch::Aarch64 && target.os != Os::MacOS
+}
+
+/// Get the integer suffix for a long double↔int conversion.
+pub(crate) fn int_suffix_for_longdouble(types: &TypeTable, int_type: TypeId) -> &'static str {
+    let size = types.size_bits(int_type);
+    let is_unsigned = types.is_unsigned(int_type);
+    match (is_unsigned, size <= 32) {
+        (true, true) => "usi",
+        (true, false) => "udi",
+        (false, true) => "si",
+        (false, false) => "di",
+    }
+}
+
+// ============================================================================
+// Instruction helpers
+// ============================================================================
+
+/// Extract lo and hi 64-bit halves from a 128-bit pseudo.
+fn extract_halves(
+    func: &mut Function,
+    insns: &mut Vec<Instruction>,
+    src: PseudoId,
+    long_type: TypeId,
+) -> (PseudoId, PseudoId) {
+    let lo = func.create_reg_pseudo();
+    insns.push(Instruction::unop(Opcode::Lo64, lo, src, long_type, 64));
+    let hi = func.create_reg_pseudo();
+    insns.push(Instruction::unop(Opcode::Hi64, hi, src, long_type, 64));
+    (lo, hi)
+}
+
+// ============================================================================
+// Rtlib call builders (convenience wrappers over Instruction::call_with_abi)
+// ============================================================================
+
+/// Build a rtlib call replacing a binop (both args same type as result).
+pub(crate) fn build_binop_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("binop must have type");
+    let arg_types = vec![ret_type; insn.src.len()];
+    let mut call = Instruction::call_with_abi(
+        insn.target,
+        func_name,
+        insn.src.clone(),
+        arg_types,
+        ret_type,
+        CallingConv::C,
+        types,
+        target,
+    );
+    call.pos = insn.pos;
+    call
+}
+
+/// Build a rtlib call replacing a conversion (single arg, different src/dst types).
+pub(crate) fn build_convert_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("conversion must have type");
+    let src_type = insn.src_typ.expect("conversion must have src_typ");
+    let mut call = Instruction::call_with_abi(
+        insn.target,
+        func_name,
+        insn.src.clone(),
+        vec![src_type],
+        ret_type,
+        CallingConv::C,
+        types,
+        target,
+    );
+    call.pos = insn.pos;
+    call
+}
+
+/// Build a call to a Float16 conversion rtlib function with correct ABI.
+///
+/// Handles the ABI difference between compiler-rt (Integer ABI: Float16
+/// passed/returned as u16 in GP registers) and libgcc (SSE ABI: Float16
+/// passed/returned in XMM registers).
+pub(crate) fn build_f16_convert_call(
+    insn: &Instruction,
+    func_name: &str,
+    src_type: TypeId,
+    dst_type: TypeId,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let target_pseudo = insn.target.expect("conversion must have target");
+    let dst_size = types.size_bits(dst_type);
+    let src_kind = types.kind(src_type);
+    let dst_kind = types.kind(dst_type);
+
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+
+    // Arg type: ushort for compiler-rt if src is Float16, otherwise use actual type
+    let arg_type = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
+        types.ushort_id
+    } else {
+        src_type
+    };
+
+    // Arg classification
+    let param_class = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_param(arg_type, types)
+    };
+
+    // Return classification
+    let ret_class = if f16_abi == Float16Abi::Integer && dst_kind == TypeKind::Float16 {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_return(dst_type, types)
+    };
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        func_name,
+        insn.src.clone(),
+        vec![arg_type],
+        dst_type,
+        dst_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = insn.pos;
+    call_insn
+}
+
+/// Build a call to __extendhfsf2 (Float16 → float) with proper ABI.
+fn build_f16_extend_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+
+    // Arg type: ushort for compiler-rt, Float16 for libgcc
+    let arg_type = if f16_abi == Float16Abi::Integer {
+        types.ushort_id
+    } else {
+        types.float16_id
+    };
+
+    // Arg classification
+    let param_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_param(types.float16_id, types)
+    };
+
+    // Return is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let ret_class = abi.classify_return(float_type, types);
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__extendhfsf2",
+        vec![src],
+        vec![arg_type],
+        float_type,
+        float_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+/// Build a call to __truncsfhf2 (float → Float16) with proper ABI.
+fn build_f16_truncate_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float16_type = types.float16_id;
+    let f16_size = types.size_bits(float16_type);
+
+    // Arg is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_class = abi.classify_param(float_type, types);
+
+    // Return: ushort for compiler-rt, Float16/SSE for libgcc
+    let ret_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        abi.classify_return(float16_type, types)
+    };
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__truncsfhf2",
+        vec![src],
+        vec![float_type],
+        float16_type,
+        f16_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+// ============================================================================
+// Int128 expansion helpers
+// ============================================================================
+
+/// Expand int128 bitwise op (And/Or/Xor) into 64-bit operations.
+fn expand_int128_bitwise(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = func.create_reg_pseudo();
+    insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
+    let r_hi = func.create_reg_pseudo();
+    insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Not into 64-bit operations.
+fn expand_int128_not(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+
+    let r_lo = func.create_reg_pseudo();
+    insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
+    let r_hi = func.create_reg_pseudo();
+    insns.push(Instruction::unop(Opcode::Not, r_hi, s_hi, long_type, 64));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Neg (0 - value with borrow chain).
+fn expand_int128_neg(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let zero = func.create_const_pseudo(0);
+
+    let r_lo = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::SubC,
+        r_lo,
+        zero,
+        s_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = func.create_reg_pseudo();
+    let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
+    sbc.src.push(r_lo);
+    insns.push(sbc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Add (carry chain).
+fn expand_int128_add(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::AddC,
+        r_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = func.create_reg_pseudo();
+    let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
+    adc.src.push(r_lo);
+    insns.push(adc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Sub (borrow chain).
+fn expand_int128_sub(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::SubC,
+        r_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = func.create_reg_pseudo();
+    let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
+    sbc.src.push(r_lo);
+    insns.push(sbc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Mul (cross-product decomposition).
+fn expand_int128_mul(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let low_result = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        low_result,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let high_part = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::UMulHi,
+        high_part,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let cross1 = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        cross1,
+        a_lo,
+        b_hi,
+        long_type,
+        64,
+    ));
+
+    let cross2 = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        cross2,
+        a_hi,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let sum1 = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Add,
+        sum1,
+        high_part,
+        cross1,
+        long_type,
+        64,
+    ));
+    let final_hi = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Add,
+        final_hi,
+        sum1,
+        cross2,
+        long_type,
+        64,
+    ));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        low_result,
+        final_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 equality comparison (SetEq/SetNe).
+fn expand_int128_cmp_eq(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let xor_lo = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Xor,
+        xor_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let xor_hi = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Xor,
+        xor_hi,
+        a_hi,
+        b_hi,
+        long_type,
+        64,
+    ));
+    let or_result = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Or,
+        or_result,
+        xor_lo,
+        xor_hi,
+        long_type,
+        64,
+    ));
+
+    let zero = func.create_const_pseudo(0);
+    insns.push(Instruction::binop(
+        insn.op, result, or_result, zero, long_type, 64,
+    ));
+    insns
+}
+
+/// Expand int128 ordered comparison (SetLt/SetLe/SetGt/SetGe/SetB/SetBe/SetA/SetAe).
+fn expand_int128_cmp_ord(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let hi_eq = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::SetEq,
+        hi_eq,
+        a_hi,
+        b_hi,
+        long_type,
+        64,
+    ));
+
+    let hi_cmp = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
+    ));
+
+    // Low halves always use unsigned compare
+    let lo_op = match insn.op {
+        Opcode::SetLt | Opcode::SetB => Opcode::SetB,
+        Opcode::SetLe | Opcode::SetBe => Opcode::SetBe,
+        Opcode::SetGt | Opcode::SetA => Opcode::SetA,
+        Opcode::SetGe | Opcode::SetAe => Opcode::SetAe,
+        _ => unreachable!(),
+    };
+    let lo_cmp = func.create_reg_pseudo();
+    insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
+
+    insns.push(Instruction::select(
+        result, hi_eq, lo_cmp, hi_cmp, long_type, 64,
+    ));
+    insns
+}
+
+/// Expand int128 Zext (zero-extend to 128 bits).
+fn expand_int128_zext(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let src = insn.src[0];
+    let src_size = insn.src_size;
+
+    // Zero-extend src to 64-bit if needed
+    let lo = if src_size < 64 {
+        let ext = func.create_reg_pseudo();
+        let mut zext_insn = Instruction::unop(Opcode::Zext, ext, src, long_type, 64);
+        zext_insn.src_size = src_size;
+        insns.push(zext_insn);
+        ext
+    } else {
+        src
+    };
+
+    let zero = func.create_const_pseudo(0);
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        lo,
+        zero,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Sext (sign-extend to 128 bits).
+fn expand_int128_sext(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let src = insn.src[0];
+    let src_size = insn.src_size;
+
+    // Sign-extend src to 64-bit if needed
+    let lo = if src_size < 64 {
+        let ext = func.create_reg_pseudo();
+        let mut sext_insn = Instruction::unop(Opcode::Sext, ext, src, long_type, 64);
+        sext_insn.src_size = src_size;
+        insns.push(sext_insn);
+        ext
+    } else {
+        src
+    };
+
+    let shift_amount = func.create_const_pseudo(63);
+    let hi = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        Opcode::Asr,
+        hi,
+        lo,
+        shift_amount,
+        long_type,
+        64,
+    ));
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        lo,
+        hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+// ============================================================================
+// Int128 constant shift expansion helpers
+// ============================================================================
+
+/// Expand int128 Shl by a constant amount into 64-bit operations.
+fn expand_int128_const_shl(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_hi = (hi << n) | (lo >> (64-n))
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_hi,
+            hi_shifted,
+            lo_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_lo = lo << n
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            new_lo,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (lo, _hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            lo,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (lo, _hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            new_hi,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else {
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            zero,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
+/// Expand int128 Lsr (logical shift right) by a constant amount into 64-bit operations.
+fn expand_int128_const_lsr(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_lo = (lo >> n) | (hi << (64-n))
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_lo,
+            lo_shifted,
+            hi_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_hi = hi >> n (logical)
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            new_hi,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            hi,
+            zero,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            new_lo,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            zero,
+            int128_type,
+            128,
+        ));
+    } else {
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            zero,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
+/// Expand int128 Asr (arithmetic shift right) by a constant amount into 64-bit operations.
+fn expand_int128_const_asr(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_lo = (lo >> n) | (hi << (64-n))
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_lo,
+            lo_shifted,
+            hi_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_hi = hi >>> n (arithmetic)
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            new_hi,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            hi,
+            sign,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            new_lo,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            sign,
+            int128_type,
+            128,
+        ));
+    } else {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            sign,
+            sign,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
+// ============================================================================
+// Float16 expansion helpers
+// ============================================================================
+
+/// Expand Float16 binary arithmetic (promote-operate-truncate).
+pub(crate) fn expand_float16_arith(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("binop must have target");
+    let left = insn.src[0];
+    let right = insn.src[1];
+    let mut insns = Vec::new();
+
+    // Extend left to float
+    let left_ext = func.create_reg_pseudo();
+    insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+    // Extend right to float
+    let right_ext = func.create_reg_pseudo();
+    insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+    // Native float operation
+    let float_result = func.create_reg_pseudo();
+    insns.push(Instruction::binop(
+        insn.op,
+        float_result,
+        left_ext,
+        right_ext,
+        float_type,
+        float_size,
+    ));
+
+    // Truncate result back to Float16
+    insns.push(build_f16_truncate_call(
+        result,
+        float_result,
+        pos,
+        types,
+        target,
+    ));
+    insns
+}
+
+/// Expand Float16 negation (promote-negate-truncate).
+pub(crate) fn expand_float16_neg(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("unary must have target");
+    let src = insn.src[0];
+    let mut insns = Vec::new();
+
+    let src_ext = func.create_reg_pseudo();
+    insns.push(build_f16_extend_call(src_ext, src, pos, types, target));
+
+    let neg_result = func.create_reg_pseudo();
+    insns.push(Instruction::unop(
+        Opcode::FNeg,
+        neg_result,
+        src_ext,
+        float_type,
+        float_size,
+    ));
+
+    insns.push(build_f16_truncate_call(
+        result, neg_result, pos, types, target,
+    ));
+    insns
+}
+
+/// Expand Float16 comparison (promote both, compare — no truncate).
+pub(crate) fn expand_float16_cmp(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("cmp must have target");
+    let left = insn.src[0];
+    let right = insn.src[1];
+    let mut insns = Vec::new();
+
+    let left_ext = func.create_reg_pseudo();
+    insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+    let right_ext = func.create_reg_pseudo();
+    insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+    // Float comparison — result type is int, keep original type/size
+    let mut cmp = Instruction::binop(
+        insn.op,
+        result,
+        left_ext,
+        right_ext,
+        insn.typ.unwrap_or(types.int_id),
+        float_size,
+    );
+    cmp.src_typ = Some(float_type);
+    insns.push(cmp);
+    insns
+}
+
+// ============================================================================
+// Shared mapping decision functions
+// ============================================================================
+
+/// Classify and expand an int128 div/mod instruction into a rtlib call.
+pub(crate) fn map_int128_divmod(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    if insn.size != 128 {
+        return None;
+    }
+    let typ = insn.typ?;
+    if ctx.types.kind(typ) != TypeKind::Int128 {
+        return None;
+    }
+    let name = match insn.op {
+        Opcode::DivS => "__divti3",
+        Opcode::DivU => "__udivti3",
+        Opcode::ModS => "__modti3",
+        Opcode::ModU => "__umodti3",
+        _ => return None,
+    };
+    let call = build_binop_rtlib_call(insn, name, ctx.types, ctx.target);
+    Some(MappedInsn::Replace(vec![call]))
+}
+
+/// Classify and expand an int128 operation into 64-bit sequences.
+pub(crate) fn map_int128_expand(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    if insn.size != 128 {
+        return None;
+    }
+    let types = ctx.types;
+
+    match insn.op {
+        // Arithmetic/bitwise/unary: result type is int128
+        Opcode::And | Opcode::Or | Opcode::Xor => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_bitwise(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Not => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_not(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Neg => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_neg(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Add => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_add(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Sub => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_sub(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Mul => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_mul(
+                insn, ctx.func, types,
+            )))
+        }
+        // Constant-amount shifts: expand if shift amount is a known constant
+        Opcode::Shl | Opcode::Lsr | Opcode::Asr => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            // Only expand if shift amount is a compile-time constant
+            let shift_val = ctx.func.const_val(insn.src[1])?;
+            let n = shift_val as u32;
+            let expanded = match insn.op {
+                Opcode::Shl => expand_int128_const_shl(insn, ctx.func, types, n),
+                Opcode::Lsr => expand_int128_const_lsr(insn, ctx.func, types, n),
+                Opcode::Asr => expand_int128_const_asr(insn, ctx.func, types, n),
+                _ => unreachable!(),
+            };
+            Some(MappedInsn::Replace(expanded))
+        }
+        // Extensions to 128: result type is int128
+        Opcode::Zext => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_zext(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Sext => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_sext(
+                insn, ctx.func, types,
+            )))
+        }
+        // Equality comparisons
+        Opcode::SetEq | Opcode::SetNe => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_cmp_eq(
+                insn, ctx.func, types,
+            )))
+        }
+        // Ordered comparisons
+        Opcode::SetLt
+        | Opcode::SetLe
+        | Opcode::SetGt
+        | Opcode::SetGe
+        | Opcode::SetB
+        | Opcode::SetBe
+        | Opcode::SetA
+        | Opcode::SetAe => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_cmp_ord(
+                insn, ctx.func, types,
+            )))
+        }
+        _ => None,
+    }
+}
+
+/// Classify and expand an int128↔float conversion into a rtlib call.
+pub(crate) fn map_int128_float_convert(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    let types = ctx.types;
+    let target = ctx.target;
+    match insn.op {
+        // int128 → float
+        Opcode::SCvtF | Opcode::UCvtF => {
+            if insn.src_size != 128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            if types.kind(src_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            let dst_kind = types.kind(dst_typ);
+            let fsuf = float_suffix(dst_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::UCvtF;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__floattisf",
+                (false, "df") => "__floattidf",
+                (false, "xf") => "__floattixf",
+                (false, "tf") => "__floattitf",
+                (true, "sf") => "__floatuntisf",
+                (true, "df") => "__floatuntidf",
+                (true, "xf") => "__floatuntixf",
+                (true, "tf") => "__floatuntitf",
+                _ => return None,
+            };
+            let call = build_convert_rtlib_call(insn, func_name, types, target);
+            Some(MappedInsn::Replace(vec![call]))
+        }
+        // float → int128
+        Opcode::FCvtS | Opcode::FCvtU => {
+            if insn.size != 128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            if types.kind(dst_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            let src_kind = types.kind(src_typ);
+            let fsuf = float_suffix(src_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::FCvtU;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__fixsfti",
+                (false, "df") => "__fixdfti",
+                (false, "xf") => "__fixxfti",
+                (false, "tf") => "__fixtfti",
+                (true, "sf") => "__fixunssfti",
+                (true, "df") => "__fixunsdfti",
+                (true, "xf") => "__fixunsxfti",
+                (true, "tf") => "__fixunstfti",
+                _ => return None,
+            };
+            let call = build_convert_rtlib_call(insn, func_name, types, target);
+            Some(MappedInsn::Replace(vec![call]))
+        }
+        _ => None,
+    }
+}
+
+// ============================================================================
+// Pass infrastructure
+// ============================================================================
+
+/// Create the appropriate ArchMapper for the given target.
+fn create_mapper(target: &Target) -> Box<dyn ArchMapper> {
+    match target.arch {
+        Arch::X86_64 => Box::new(crate::arch::x86_64::mapping::X86_64Mapper),
+        Arch::Aarch64 => Box::new(crate::arch::aarch64::mapping::Aarch64Mapper),
+    }
+}
+
+/// Run the instruction mapping pass on a single function.
+fn map_function(func: &mut Function, types: &TypeTable, target: &Target, mapper: &dyn ArchMapper) {
+    for block_idx in 0..func.blocks.len() {
+        // Take the insns out of the block to avoid borrow conflicts
+        let old_insns = std::mem::take(&mut func.blocks[block_idx].insns);
+        let mut new_insns = Vec::with_capacity(old_insns.len());
+        let mut changed = false;
+
+        for insn in &old_insns {
+            let mut ctx = MappingCtx {
+                func: &mut *func,
+                types,
+                target,
+            };
+            match mapper.map_insn(insn, &mut ctx) {
+                MappedInsn::Legal => new_insns.push(insn.clone()),
+                MappedInsn::Replace(replacements) => {
+                    new_insns.extend(replacements);
+                    changed = true;
+                }
+            }
+        }
+
+        if changed {
+            func.blocks[block_idx].insns = new_insns;
+        } else {
+            func.blocks[block_idx].insns = old_insns;
+        }
+    }
+}
+
+/// Run the instruction mapping pass on an entire module.
+pub fn run_mapping(module: &mut Module, types: &TypeTable, target: &Target) {
+    let mapper = create_mapper(target);
+    for func in &mut module.functions {
+        map_function(func, types, target, mapper.as_ref());
+    }
+}
+
+// ============================================================================
+// Shared test helpers
+// ============================================================================
+
+#[cfg(test)]
+pub(crate) mod test_helpers {
+    use super::*;
+    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
+    use crate::types::{TypeId, TypeTable};
+
+    /// Create a function with 3 pseudos for classification tests.
+    pub fn make_minimal_func(types: &TypeTable) -> Function {
+        let mut func = Function::new("test", types.int_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+        func.next_pseudo = 3;
+        func
+    }
+
+    /// Create a function with various legal instructions for pass runner tests.
+    pub fn make_test_func(types: &TypeTable) -> Function {
+        let mut func = Function::new("test_mapping", types.int_id);
+
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+
+        // Integer arithmetic
+        bb.add_insn(Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Sub,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Mul,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Bitwise
+        bb.add_insn(Instruction::binop(
+            Opcode::And,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Or,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Xor,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Comparisons
+        bb.add_insn(Instruction::binop(
+            Opcode::SetEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::SetLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Unary
+        bb.add_insn(Instruction::unop(
+            Opcode::Neg,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::Not,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+
+        // Float ops
+        bb.add_insn(Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.double_id,
+            64,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.double_id,
+            64,
+        ));
+
+        // Conversions
+        let mut sext = Instruction::unop(Opcode::Sext, PseudoId(2), PseudoId(0), types.long_id, 64);
+        sext.src_size = 32;
+        bb.add_insn(sext);
+        let mut zext =
+            Instruction::unop(Opcode::Zext, PseudoId(2), PseudoId(0), types.ulong_id, 64);
+        zext.src_size = 32;
+        bb.add_insn(zext);
+
+        // Memory
+        bb.add_insn(Instruction::load(
+            PseudoId(2),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::store(
+            PseudoId(1),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+
+        // Terminator
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+        func
+    }
+
+    /// Create a conversion instruction for testing.
+    pub fn make_convert_insn(
+        op: Opcode,
+        dst_type: TypeId,
+        dst_size: u32,
+        src_type: TypeId,
+        src_size: u32,
+    ) -> Instruction {
+        let mut insn = Instruction::new(op)
+            .with_target(PseudoId(2))
+            .with_src(PseudoId(0))
+            .with_type_and_size(dst_type, dst_size);
+        insn.src_size = src_size;
+        insn.src_typ = Some(src_type);
+        insn
+    }
+
+    /// Assert the mapping result is Legal.
+    pub fn assert_legal(result: &MappedInsn) {
+        assert!(matches!(result, MappedInsn::Legal));
+    }
+
+    /// Assert the mapping result is a single LibCall replacement.
+    pub fn assert_libcall(result: &MappedInsn, expected_name: &str) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert_eq!(insns.len(), 1, "expected single Call replacement");
+                assert_eq!(insns[0].op, Opcode::Call);
+                assert_eq!(insns[0].func_name.as_deref(), Some(expected_name));
+            }
+            MappedInsn::Legal => {
+                panic!("expected Replace with LibCall to {expected_name}, got Legal")
+            }
+        }
+    }
+
+    /// Assert the mapping result is a multi-instruction expansion.
+    pub fn assert_expand(result: &MappedInsn) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert!(!insns.is_empty(), "expected non-empty expansion");
+            }
+            MappedInsn::Legal => panic!("expected Replace with expansion, got Legal"),
+        }
+    }
+
+    /// Assert the mapping result is a CmpLibCall (call + int compare).
+    pub fn assert_cmp_libcall(result: &MappedInsn, expected_name: &str, expected_cmp_op: Opcode) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert_eq!(insns.len(), 2, "expected Call + compare");
+                assert_eq!(insns[0].op, Opcode::Call);
+                assert_eq!(insns[0].func_name.as_deref(), Some(expected_name));
+                assert_eq!(insns[1].op, expected_cmp_op);
+            }
+            MappedInsn::Legal => {
+                panic!("expected Replace with CmpLibCall to {expected_name}, got Legal")
+            }
+        }
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::test_helpers::*;
+    use super::*;
+    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    // ========================================================================
+    // Pass runner tests
+    // ========================================================================
+
+    #[test]
+    fn test_run_mapping_empty() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut module = Module::new();
+
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_run_mapping_with_functions() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        module.add_function(make_test_func(&types));
+
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_mapping_idempotent() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+
+        run_mapping(&mut module, &types, &target);
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_mapping_all_targets() {
+        let targets = vec![
+            Target::new(Arch::X86_64, Os::Linux),
+            Target::new(Arch::X86_64, Os::MacOS),
+            Target::new(Arch::X86_64, Os::FreeBSD),
+            Target::new(Arch::Aarch64, Os::Linux),
+            Target::new(Arch::Aarch64, Os::MacOS),
+        ];
+
+        for target in &targets {
+            let types = TypeTable::new(target);
+            let mut module = Module::new();
+            module.add_function(make_test_func(&types));
+            run_mapping(&mut module, &types, target);
+        }
+    }
+
+    #[test]
+    fn test_mapping_all_legal_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        let orig_insn_count = module.functions[0].blocks[0].insns.len();
+
+        run_mapping(&mut module, &types, &target);
+
+        // All instructions should be unchanged (all legal)
+        assert_eq!(module.functions[0].blocks[0].insns.len(), orig_insn_count);
+    }
+
+    #[test]
+    fn test_mapping_all_legal_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        let orig_insn_count = module.functions[0].blocks[0].insns.len();
+
+        run_mapping(&mut module, &types, &target);
+
+        assert_eq!(module.functions[0].blocks[0].insns.len(), orig_insn_count);
+    }
+
+    // ========================================================================
+    // Integration: int128 div/mod transformation
+    // ========================================================================
+
+    #[test]
+    fn test_mapping_transforms_int128_divmod() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_divmod", types.int128_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+        func.next_pseudo = 3;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        let mut module = Module::new();
+        module.add_function(func);
+        run_mapping(&mut module, &types, &target);
+
+        // After mapping, the DivS should be replaced with a Call to __divti3
+        let block = &module.functions[0].blocks[0];
+        assert_eq!(block.insns.len(), 3); // Entry, Call, Ret
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__divti3"));
+        assert_eq!(block.insns[1].target, Some(PseudoId(2)));
+        assert_eq!(block.insns[1].src, vec![PseudoId(0), PseudoId(1)]);
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Integration: int128↔float conversion transformation
+    // ========================================================================
+
+    #[test]
+    fn test_mapping_transforms_int128_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_convert", types.double_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.next_pseudo = 2;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(make_convert_insn(
+            Opcode::SCvtF,
+            types.double_id,
+            64,
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(1))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        let mut module = Module::new();
+        module.add_function(func);
+        run_mapping(&mut module, &types, &target);
+
+        let block = &module.functions[0].blocks[0];
+        assert_eq!(block.insns.len(), 3);
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__floattidf"));
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Complex mul/div rtlib name tests
+    // ========================================================================
+
+    #[test]
+    fn test_complex_mul_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Float, &target), "__mulsc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_double() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Double, &target), "__muldc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::LongDouble, &x86), "__mulxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_linux),
+            "__multc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_macos),
+            "__muldc3"
+        );
+    }
+
+    #[test]
+    fn test_complex_div_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::Float, &target), "__divsc3");
+    }
+
+    #[test]
+    fn test_complex_div_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::LongDouble, &x86), "__divxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_linux),
+            "__divtc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_macos),
+            "__divdc3"
+        );
+    }
+}
diff --git a/cc/arch/mod.rs b/cc/arch/mod.rs
index cf94972c..a6cb923a 100644
--- a/cc/arch/mod.rs
+++ b/cc/arch/mod.rs
@@ -15,6 +15,7 @@ pub mod aarch64;
 pub mod codegen;
 pub mod dwarf;
 pub mod lir;
+pub mod mapping;
 pub mod regalloc;
 pub mod x86_64;
 
diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs
index 349b1bc7..87a28e9e 100644
--- a/cc/arch/x86_64/codegen.rs
+++ b/cc/arch/x86_64/codegen.rs
@@ -1717,6 +1717,16 @@ impl X86_64CodeGen {
                 self.emit_fence(insn);
             }
 
+            // Int128 decomposition ops (from mapping pass expansion)
+            Opcode::Lo64 => self.emit_lo64(insn),
+            Opcode::Hi64 => self.emit_hi64(insn),
+            Opcode::Pair64 => self.emit_pair64(insn),
+            Opcode::AddC => self.emit_addc(insn, false),
+            Opcode::AdcC => self.emit_addc(insn, true),
+            Opcode::SubC => self.emit_subc(insn, false),
+            Opcode::SbcC => self.emit_subc(insn, true),
+            Opcode::UMulHi => self.emit_umulhi(insn),
+
             // Skip no-ops and unimplemented
             _ => {}
         }
@@ -2836,7 +2846,7 @@ impl X86_64CodeGen {
         match &src_loc {
             Loc::Imm(v) => {
                 let lo = *v as i64;
-                let hi = (*v >> 64) as i64;
+                let hi = (*v >> 64) as u64 as i64;
                 // Store lo half
                 if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
                     self.push_lir(X86Inst::MovAbs {
diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index b0621c62..c3403e94 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -172,10 +172,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_unary(insn, op);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let src = match insn.src.first() {
             Some(&s) => s,
@@ -211,10 +207,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_mul(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -259,10 +251,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_div(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -339,10 +327,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_compare(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -606,6 +590,17 @@ impl X86_64CodeGen {
                     dst: GpOperand::Reg(dst),
                 });
             }
+            Loc::Reg(r) => {
+                // After optimization, a 64-bit value feeding Pair64 may be
+                // register-allocated. Lo64 of such a value is the register itself.
+                if *r != dst {
+                    self.push_lir(X86Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(*r),
+                        dst: GpOperand::Reg(dst),
+                    });
+                }
+            }
             _ => panic!("int128_load_lo: unexpected loc {:?}", loc),
         }
     }
@@ -615,7 +610,7 @@ impl X86_64CodeGen {
         let loc = self.get_location(pseudo);
         match &loc {
             Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
+                let hi = (*v >> 64) as u64 as i64;
                 if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
                     self.push_lir(X86Inst::MovAbs { imm: hi, dst });
                 } else {
@@ -634,6 +629,15 @@ impl X86_64CodeGen {
                     dst: GpOperand::Reg(dst),
                 });
             }
+            Loc::Reg(_) => {
+                // After optimization, a 64-bit value feeding Pair64 may be
+                // register-allocated. Hi64 of such a value is always 0.
+                self.push_lir(X86Inst::Mov {
+                    size: OperandSize::B64,
+                    src: GpOperand::Imm(0),
+                    dst: GpOperand::Reg(dst),
+                });
+            }
             _ => panic!("int128_load_hi: unexpected loc {:?}", loc),
         }
     }
@@ -703,50 +707,9 @@ impl X86_64CodeGen {
         Label::new(prefix, suffix)
     }
 
-    /// Get the GpOperand for the lo half of src2 (for use in add/sub/etc).
-    /// If the operand is an immediate, returns GpOperand::Imm or loads into R11.
-    fn int128_src2_lo_operand(&mut self, src2: PseudoId) -> GpOperand {
-        let loc = self.get_location(src2);
-        match &loc {
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::R11,
-                    });
-                    GpOperand::Reg(Reg::R11)
-                } else {
-                    GpOperand::Imm(lo)
-                }
-            }
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_lo_mem(&loc)),
-            _ => panic!("int128_src2_lo_operand: unexpected loc {:?}", loc),
-        }
-    }
-
-    /// Get the GpOperand for the hi half of src2.
-    fn int128_src2_hi_operand(&mut self, src2: PseudoId) -> GpOperand {
-        let loc = self.get_location(src2);
-        match &loc {
-            Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
-                if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: hi,
-                        dst: Reg::R11,
-                    });
-                    GpOperand::Reg(Reg::R11)
-                } else {
-                    GpOperand::Imm(hi)
-                }
-            }
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_hi_mem(&loc)),
-            _ => panic!("int128_src2_hi_operand: unexpected loc {:?}", loc),
-        }
-    }
-
-    /// Emit 128-bit binary operation (Add, Sub, And, Or, Xor, Shl, Lsr, Asr).
+    /// Emit 128-bit shift operations (Shl, Lsr, Asr).
+    /// Other int128 ops (Add, Sub, And, Or, Xor, Mul, Neg, Not, comparisons)
+    /// are expanded by the mapping pass into 64-bit sequences.
     fn emit_int128_binop(&mut self, insn: &Instruction) {
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -759,91 +722,6 @@ impl X86_64CodeGen {
         let dst_loc = self.get_location(target);
 
         match insn.op {
-            Opcode::Add => {
-                // lo: addq src2_lo, src1_lo → dst_lo (sets CF)
-                // hi: adcq src2_hi, src1_hi → dst_hi (uses CF)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Add {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Adc {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
-            Opcode::Sub => {
-                // lo: subq src2_lo, src1_lo → dst_lo (sets CF)
-                // hi: sbbq src2_hi, src1_hi → dst_hi (uses CF)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Sub {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Sbb {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
-            Opcode::And | Opcode::Or | Opcode::Xor => {
-                // Independent 64-bit ops on lo and hi halves (no carry)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                match insn.op {
-                    Opcode::And => self.push_lir(X86Inst::And {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Or => self.push_lir(X86Inst::Or {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Xor => self.push_lir(X86Inst::Xor {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    _ => unreachable!(),
-                }
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                match insn.op {
-                    Opcode::And => self.push_lir(X86Inst::And {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Or => self.push_lir(X86Inst::Or {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Xor => self.push_lir(X86Inst::Xor {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    _ => unreachable!(),
-                }
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
             Opcode::Shl => {
                 self.emit_int128_shl(src1, src2, &dst_loc);
             }
@@ -853,7 +731,10 @@ impl X86_64CodeGen {
             Opcode::Asr => {
                 self.emit_int128_asr(src1, src2, &dst_loc);
             }
-            _ => {}
+            _ => panic!(
+                "emit_int128_binop: unexpected opcode {:?} (mapping pass should have expanded it)",
+                insn.op
+            ),
         }
     }
 
@@ -1193,501 +1074,227 @@ impl X86_64CodeGen {
         self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label)));
     }
 
-    /// Emit 128-bit multiply.
-    /// result_lo = lo(src1_lo * src2_lo)
-    /// result_hi = hi(src1_lo * src2_lo) + src1_lo * src2_hi + src1_hi * src2_lo
-    ///
-    /// Uses RAX, RDX (for mul), R10, R11 as scratch. RAX/RDX are allocatable
-    /// but the regalloc ensures they are not live across this instruction.
-    fn emit_int128_mul(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
+    /// Emit 128-bit extend/truncate operations.
+    fn emit_int128_extend(&mut self, insn: &Instruction) {
+        let src = match insn.src.first() {
+            Some(&s) => s,
+            None => return,
         };
         let target = match insn.target {
             Some(t) => t,
             None => return,
         };
-        let dst_loc = self.get_location(target);
 
-        // Step 1: RAX = src1_lo, mulq src2_lo → RDX:RAX = src1_lo * src2_lo
-        self.int128_load_lo(src1, Reg::Rax);
-        let src2_lo_loc = self.get_location(src2);
-        let src2_lo_op = match &src2_lo_loc {
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_lo_mem(&src2_lo_loc)),
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::R10,
-                    });
-                } else {
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Imm(lo),
-                        dst: GpOperand::Reg(Reg::R10),
-                    });
-                }
-                GpOperand::Reg(Reg::R10)
-            }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_lo_loc),
+        // Truncating FROM 128-bit (insn.src_size == 128)
+        // Just load the lo half and truncate
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
         };
-        self.push_lir(X86Inst::Mul1 {
-            size: OperandSize::B64,
-            src: src2_lo_op,
-        });
-        // RAX = result_lo, RDX = partial_hi
-        self.int128_store_lo(Reg::Rax, &dst_loc);
-        // Save partial_hi in R11
-        self.push_lir(X86Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::Rdx),
-            dst: GpOperand::Reg(Reg::R11),
-        });
-
-        // Step 2: R10 = src1_hi * src2_lo (only lo 64 bits matter)
-        self.int128_load_hi(src1, Reg::R10);
-        // We need src2_lo in a register for imulq
-        let src2_lo_loc2 = self.get_location(src2);
-        let src2_lo_gp = match &src2_lo_loc2 {
-            Loc::Stack(_) | Loc::IncomingArg(_) => {
-                GpOperand::Mem(self.int128_lo_mem(&src2_lo_loc2))
+        self.int128_load_lo(src, dst_reg);
+        // Truncate to target size
+        match insn.size {
+            8 => {
+                self.push_lir(X86Inst::Movzx {
+                    src_size: OperandSize::B8,
+                    dst_size: OperandSize::B32,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: dst_reg,
+                });
             }
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::Rax,
-                    });
-                    GpOperand::Reg(Reg::Rax)
-                } else {
-                    GpOperand::Imm(lo)
-                }
+            16 => {
+                self.push_lir(X86Inst::Movzx {
+                    src_size: OperandSize::B16,
+                    dst_size: OperandSize::B32,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: dst_reg,
+                });
             }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_lo_loc2),
-        };
-        self.push_lir(X86Inst::IMul2 {
-            size: OperandSize::B64,
-            src: src2_lo_gp,
-            dst: Reg::R10,
-        });
-        // R11 += R10
-        self.push_lir(X86Inst::Add {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: Reg::R11,
-        });
-
-        // Step 3: R10 = src1_lo * src2_hi (only lo 64 bits matter)
-        self.int128_load_lo(src1, Reg::R10);
-        let src2_hi_loc = self.get_location(src2);
-        let src2_hi_gp = match &src2_hi_loc {
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_hi_mem(&src2_hi_loc)),
-            Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
-                if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: hi,
-                        dst: Reg::Rax,
-                    });
-                    GpOperand::Reg(Reg::Rax)
-                } else {
-                    GpOperand::Imm(hi)
-                }
+            32 => {
+                self.push_lir(X86Inst::Mov {
+                    size: OperandSize::B32,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: GpOperand::Reg(dst_reg),
+                });
             }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_hi_loc),
-        };
-        self.push_lir(X86Inst::IMul2 {
-            size: OperandSize::B64,
-            src: src2_hi_gp,
-            dst: Reg::R10,
-        });
-        // R11 += R10
-        self.push_lir(X86Inst::Add {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: Reg::R11,
-        });
-
-        // Store result_hi
-        self.int128_store_hi(Reg::R11, &dst_loc);
+            _ => {} // 64-bit: lo half is the result
+        }
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, insn.size);
+        }
     }
 
-    /// Emit 128-bit division.
-    /// For __int128 division, we call the compiler runtime functions
-    /// __divti3 (signed) or __udivti3 (unsigned).
-    /// Args: (lo1, hi1, lo2, hi2) in RDI, RSI, RDX, RCX
-    /// Returns: (lo, hi) in RAX, RDX
-    fn emit_int128_div(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+    // ========================================================================
+    // Int128 decomposition ops (Lo64, Hi64, Pair64)
+    // ========================================================================
+
+    /// Lo64: extract low 64 bits from 128-bit pseudo.
+    /// target(64) = lo64(src(128))
+    pub(super) fn emit_lo64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Lo64 must have target");
         let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
+        self.int128_load_lo(src, dst_reg);
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
 
-        let func_name = match insn.op {
-            Opcode::DivS => "__divti3",
-            Opcode::DivU => "__udivti3",
-            Opcode::ModS => "__modti3",
-            Opcode::ModU => "__umodti3",
-            _ => return,
+    /// Hi64: extract high 64 bits from 128-bit pseudo.
+    /// target(64) = hi64(src(128))
+    pub(super) fn emit_hi64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Hi64 must have target");
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
         };
+        self.int128_load_hi(src, dst_reg);
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
 
-        // SysV ABI: __int128 args passed as (lo, hi) pairs in GP registers
-        // arg1 = (RDI=lo1, RSI=hi1), arg2 = (RDX=lo2, RCX=hi2)
-        // BUT: we must be careful about order because loading src2_lo into RDX
-        // could clobber a register we need. Load src2 first into RCX/R10,
-        // then src1, then move src2_lo to RDX.
-
-        // Load src2_hi into RCX first
-        self.int128_load_hi(src2, Reg::Rcx);
-        // Load src2_lo into R10 (temporary, will move to RDX later)
-        self.int128_load_lo(src2, Reg::R10);
-        // Load src1
-        self.int128_load_lo(src1, Reg::Rdi);
-        self.int128_load_hi(src1, Reg::Rsi);
-        // Now move src2_lo to RDX
-        self.push_lir(X86Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: GpOperand::Reg(Reg::Rdx),
-        });
+    /// Pair64: combine two 64-bit pseudos into 128-bit.
+    /// target(128) = pair64(lo(64), hi(64))
+    pub(super) fn emit_pair64(&mut self, insn: &Instruction) {
+        let src_lo = insn.src[0];
+        let src_hi = insn.src[1];
+        let target = insn.target.expect("Pair64 must have target");
+        let dst_loc = self.get_location(target);
 
-        // Call the runtime function
-        let sym = crate::arch::lir::Symbol::global(func_name.to_string());
-        self.push_lir(X86Inst::Call {
-            target: crate::arch::lir::CallTarget::Direct(sym),
-        });
+        // Store lo half
+        self.emit_move(src_lo, Reg::R10, 64);
+        self.int128_store_lo(Reg::R10, &dst_loc);
 
-        // Result in RAX (lo), RDX (hi)
-        self.int128_store_lo(Reg::Rax, &dst_loc);
-        self.int128_store_hi(Reg::Rdx, &dst_loc);
+        // Store hi half
+        self.emit_move(src_hi, Reg::R10, 64);
+        self.int128_store_hi(Reg::R10, &dst_loc);
     }
 
-    /// Emit 128-bit comparison.
-    fn emit_int128_compare(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+    /// AddC/AdcC: 64-bit add with carry.
+    /// AddC (with_carry=false): add, sets CF
+    /// AdcC (with_carry=true): adc (add with carry in), sets CF
+    pub(super) fn emit_addc(&mut self, insn: &Instruction, with_carry: bool) {
+        let target = insn.target.expect("AddC/AdcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
         let dst_loc = self.get_location(target);
         let work_reg = match &dst_loc {
             Loc::Reg(r) => *r,
             _ => Reg::R10,
         };
 
-        match insn.op {
-            Opcode::SetEq | Opcode::SetNe => {
-                // XOR both halves and OR them. Result is zero iff equal.
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Xor {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_load_hi(src1, Reg::R11);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Xor {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R11,
-                });
-                self.push_lir(X86Inst::Or {
-                    size: OperandSize::B64,
-                    src: GpOperand::Reg(Reg::R11),
-                    dst: Reg::R10,
-                });
-                let cc = if insn.op == Opcode::SetEq {
-                    CondCode::Eq
-                } else {
-                    CondCode::Ne
-                };
-                self.push_lir(X86Inst::SetCC { cc, dst: work_reg });
-                self.push_lir(X86Inst::Movzx {
-                    src_size: OperandSize::B8,
-                    dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
-                });
+        self.emit_move(src1, work_reg, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            Loc::Imm(v) if *v >= i32::MIN as i128 && *v <= i32::MAX as i128 => {
+                GpOperand::Imm(*v as i64)
             }
             _ => {
-                // Ordered comparisons: compare hi first, then lo if hi equal.
-                // For signed: hi compared signed, lo compared unsigned.
-                // For unsigned: both compared unsigned.
-                let is_signed = matches!(
-                    insn.op,
-                    Opcode::SetLt | Opcode::SetLe | Opcode::SetGt | Opcode::SetGe
-                );
-
-                let hi_decides_label = self.int128_label("i128cmp_hi");
-                let done_label = self.int128_label("i128cmp_done");
-
-                // Compare hi halves
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Cmp {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: GpOperand::Reg(Reg::R10),
-                });
-                // If hi halves are not equal, the hi comparison decides
-                self.push_lir(X86Inst::Jcc {
-                    cc: CondCode::Ne,
-                    target: hi_decides_label.clone(),
-                });
-
-                // Hi halves are equal: compare lo halves (always unsigned)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Cmp {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: GpOperand::Reg(Reg::R10),
-                });
-                // Use unsigned comparison for lo half
-                let lo_cc = match insn.op {
-                    Opcode::SetLt | Opcode::SetB => CondCode::Ult,
-                    Opcode::SetLe | Opcode::SetBe => CondCode::Ule,
-                    Opcode::SetGt | Opcode::SetA => CondCode::Ugt,
-                    Opcode::SetGe | Opcode::SetAe => CondCode::Uge,
-                    _ => CondCode::Ult,
-                };
-                self.push_lir(X86Inst::SetCC {
-                    cc: lo_cc,
-                    dst: work_reg,
-                });
-                self.push_lir(X86Inst::Movzx {
-                    src_size: OperandSize::B8,
-                    dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
-                });
-                self.push_lir(X86Inst::Jmp {
-                    target: done_label.clone(),
-                });
-
-                // Hi decides the comparison
-                self.push_lir(X86Inst::Directive(Directive::BlockLabel(hi_decides_label)));
-                let hi_cc = if is_signed {
-                    match insn.op {
-                        Opcode::SetLt => CondCode::Slt,
-                        Opcode::SetLe => CondCode::Sle,
-                        Opcode::SetGt => CondCode::Sgt,
-                        Opcode::SetGe => CondCode::Sge,
-                        _ => CondCode::Slt,
-                    }
-                } else {
-                    match insn.op {
-                        Opcode::SetB => CondCode::Ult,
-                        Opcode::SetBe => CondCode::Ule,
-                        Opcode::SetA => CondCode::Ugt,
-                        Opcode::SetAe => CondCode::Uge,
-                        _ => CondCode::Ult,
-                    }
-                };
-                self.push_lir(X86Inst::SetCC {
-                    cc: hi_cc,
-                    dst: work_reg,
-                });
-                self.push_lir(X86Inst::Movzx {
-                    src_size: OperandSize::B8,
-                    dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
-                });
-
-                self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label)));
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
             }
+        };
+
+        if with_carry {
+            self.push_lir(X86Inst::Adc {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        } else {
+            self.push_lir(X86Inst::Add {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
         }
 
         if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) {
-            self.emit_move_to_loc(work_reg, &dst_loc, u32::BITS);
+            self.emit_move_to_loc(work_reg, &dst_loc, 64);
         }
     }
 
-    /// Emit 128-bit unary operation (Neg, Not).
-    fn emit_int128_unary(&mut self, insn: &Instruction, op: UnaryOp) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+    /// SubC/SbcC: 64-bit sub with borrow.
+    /// SubC (with_borrow=false): sub, sets CF
+    /// SbcC (with_borrow=true): sbb (sub with borrow in), sets CF
+    pub(super) fn emit_subc(&mut self, insn: &Instruction, with_borrow: bool) {
+        let target = insn.target.expect("SubC/SbcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
         let dst_loc = self.get_location(target);
+        let work_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
 
-        match op {
-            UnaryOp::Not => {
-                // Bitwise NOT: not lo; not hi
-                self.int128_load_lo(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
+        self.emit_move(src1, work_reg, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            Loc::Imm(v) if *v >= i32::MIN as i128 && *v <= i32::MAX as i128 => {
+                GpOperand::Imm(*v as i64)
             }
-            UnaryOp::Neg => {
-                // Two's complement negate: not lo; not hi; add $1, lo; adc $0, hi
-                self.int128_load_lo(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.push_lir(X86Inst::Add {
-                    size: OperandSize::B64,
-                    src: GpOperand::Imm(1),
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.push_lir(X86Inst::Adc {
-                    size: OperandSize::B64,
-                    src: GpOperand::Imm(0),
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
+            _ => {
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
             }
+        };
+
+        if with_borrow {
+            self.push_lir(X86Inst::Sbb {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        } else {
+            self.push_lir(X86Inst::Sub {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) {
+            self.emit_move_to_loc(work_reg, &dst_loc, 64);
         }
     }
 
-    /// Emit 128-bit extend/truncate operations.
-    fn emit_int128_extend(&mut self, insn: &Instruction) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+    /// UMulHi: upper 64 bits of 64×64 unsigned multiply.
+    /// Uses mul instruction which puts result in RDX:RAX.
+    pub(super) fn emit_umulhi(&mut self, insn: &Instruction) {
+        let target = insn.target.expect("UMulHi must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
 
-        if insn.size == 128 {
-            // Extending TO 128-bit
-            let dst_loc = self.get_location(target);
-            match insn.op {
-                Opcode::Zext => {
-                    // Zero-extend: lo = src, hi = 0
-                    self.emit_move(src, Reg::R10, insn.src_size.max(32));
-                    // If src_size < 64, ensure upper bits are zeroed
-                    if insn.src_size < 32 {
-                        let mask = (1i64 << insn.src_size) - 1;
-                        self.push_lir(X86Inst::And {
-                            size: OperandSize::B32,
-                            src: GpOperand::Imm(mask),
-                            dst: Reg::R10,
-                        });
-                    }
-                    self.int128_store_lo(Reg::R10, &dst_loc);
-                    self.int128_store_hi_imm(0, &dst_loc);
-                }
-                Opcode::Sext => {
-                    // Sign-extend: lo = src, hi = src >> 63 (sign extension)
-                    self.emit_move(src, Reg::R10, insn.src_size.max(32));
-                    // Sign-extend src to 64 bits first if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B8,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B16,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        32 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B32,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        _ => {} // 64-bit: already correct
-                    }
-                    self.int128_store_lo(Reg::R10, &dst_loc);
-                    // hi = sign extension of lo
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Reg(Reg::R10),
-                        dst: GpOperand::Reg(Reg::R11),
-                    });
-                    self.push_lir(X86Inst::Sar {
-                        size: OperandSize::B64,
-                        count: ShiftCount::Imm(63),
-                        dst: Reg::R11,
-                    });
-                    self.int128_store_hi(Reg::R11, &dst_loc);
-                }
-                _ => {}
-            }
-        } else {
-            // Truncating FROM 128-bit (insn.src_size == 128)
-            // Just load the lo half and truncate
-            let dst_loc = self.get_location(target);
-            let dst_reg = match &dst_loc {
-                Loc::Reg(r) => *r,
-                _ => Reg::R10,
-            };
-            self.int128_load_lo(src, dst_reg);
-            // Truncate to target size
-            match insn.size {
-                8 => {
-                    self.push_lir(X86Inst::Movzx {
-                        src_size: OperandSize::B8,
-                        dst_size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: dst_reg,
-                    });
-                }
-                16 => {
-                    self.push_lir(X86Inst::Movzx {
-                        src_size: OperandSize::B16,
-                        dst_size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: dst_reg,
-                    });
-                }
-                32 => {
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: GpOperand::Reg(dst_reg),
-                    });
-                }
-                _ => {} // 64-bit: lo half is the result
-            }
-            if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-                self.emit_move_to_loc(dst_reg, &dst_loc, insn.size);
+        // mul uses RAX as implicit first operand, result in RDX:RAX
+        self.emit_move(src1, Reg::Rax, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            _ => {
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
             }
+        };
+
+        self.push_lir(X86Inst::Mul1 {
+            size: OperandSize::B64,
+            src: src2_op,
+        });
+
+        // High result is in RDX
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == Reg::Rdx) {
+            self.emit_move_to_loc(Reg::Rdx, &dst_loc, 64);
         }
     }
 }
diff --git a/cc/arch/x86_64/mapping.rs b/cc/arch/x86_64/mapping.rs
new file mode 100644
index 00000000..f0982e0c
--- /dev/null
+++ b/cc/arch/x86_64/mapping.rs
@@ -0,0 +1,769 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// x86-64 instruction mapping
+//
+
+use crate::arch::mapping::{
+    build_f16_convert_call, expand_float16_arith, expand_float16_cmp, expand_float16_neg,
+    float_suffix, map_int128_divmod, map_int128_expand, map_int128_float_convert, ArchMapper,
+    MappedInsn, MappingCtx,
+};
+use crate::ir::{Instruction, Opcode};
+use crate::rtlib::RtlibNames;
+use crate::types::TypeKind;
+
+/// x86-64 instruction mapper.
+pub struct X86_64Mapper;
+
+impl ArchMapper for X86_64Mapper {
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn {
+        // Shared: int128 div/mod → rtlib
+        if let Some(r) = map_int128_divmod(insn, ctx) {
+            return r;
+        }
+        // Shared: int128 expand (add/sub/mul/bitwise/neg/not/cmp/zext/sext)
+        if let Some(r) = map_int128_expand(insn, ctx) {
+            return r;
+        }
+        // Shared: int128↔float → rtlib
+        if let Some(r) = map_int128_float_convert(insn, ctx) {
+            return r;
+        }
+        // x86-64 only: Float16 soft-float → expand
+        if let Some(r) = self.map_float16(insn, ctx) {
+            return r;
+        }
+        MappedInsn::Legal
+    }
+}
+
+impl X86_64Mapper {
+    /// Classify and expand Float16 operations via promote-operate-truncate.
+    fn map_float16(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> Option<MappedInsn> {
+        let types = ctx.types;
+        match insn.op {
+            // Arithmetic: promote-operate-truncate
+            Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+                let typ = insn.typ?;
+                if types.kind(typ) == TypeKind::Float16 {
+                    Some(MappedInsn::Replace(expand_float16_arith(
+                        insn, ctx.func, types, ctx.target,
+                    )))
+                } else {
+                    None
+                }
+            }
+            // Negation: promote-negate-truncate
+            Opcode::FNeg => {
+                let typ = insn.typ?;
+                if types.kind(typ) == TypeKind::Float16 {
+                    Some(MappedInsn::Replace(expand_float16_neg(
+                        insn, ctx.func, types, ctx.target,
+                    )))
+                } else {
+                    None
+                }
+            }
+            // Comparisons: promote both, compare (no truncate)
+            Opcode::FCmpOEq
+            | Opcode::FCmpONe
+            | Opcode::FCmpOLt
+            | Opcode::FCmpOLe
+            | Opcode::FCmpOGt
+            | Opcode::FCmpOGe => {
+                if let Some(src_typ) = insn.src_typ {
+                    if types.kind(src_typ) == TypeKind::Float16 {
+                        return Some(MappedInsn::Replace(expand_float16_cmp(
+                            insn, ctx.func, types, ctx.target,
+                        )));
+                    }
+                }
+                // Fallback: check operand size (Float16 = 16 bits)
+                if insn.size == 16 {
+                    return Some(MappedInsn::Replace(expand_float16_cmp(
+                        insn, ctx.func, types, ctx.target,
+                    )));
+                }
+                None
+            }
+            // Float16↔float/double/longdouble conversions
+            Opcode::FCvtF => {
+                let src_typ = insn.src_typ?;
+                let dst_typ = insn.typ?;
+                let src_kind = types.kind(src_typ);
+                let dst_kind = types.kind(dst_typ);
+                if src_kind == TypeKind::Float16 {
+                    let to_suffix = float_suffix(dst_kind, ctx.target);
+                    let rtlib = RtlibNames::new(ctx.target);
+                    let func_name = rtlib.float16_convert("hf", to_suffix)?;
+                    let call = build_f16_convert_call(
+                        insn, func_name, src_typ, dst_typ, types, ctx.target,
+                    );
+                    Some(MappedInsn::Replace(vec![call]))
+                } else if dst_kind == TypeKind::Float16 {
+                    let from_suffix = float_suffix(src_kind, ctx.target);
+                    let rtlib = RtlibNames::new(ctx.target);
+                    let func_name = rtlib.float16_convert(from_suffix, "hf")?;
+                    let call = build_f16_convert_call(
+                        insn, func_name, src_typ, dst_typ, types, ctx.target,
+                    );
+                    Some(MappedInsn::Replace(vec![call]))
+                } else {
+                    None
+                }
+            }
+            // Float16↔integer conversions
+            Opcode::FCvtS | Opcode::FCvtU => {
+                // Float16 → int
+                let src_typ = insn.src_typ?;
+                if types.kind(src_typ) != TypeKind::Float16 {
+                    return None;
+                }
+                let dst_typ = insn.typ?;
+                let dst_size = types.size_bits(dst_typ);
+                let is_unsigned = insn.op == Opcode::FCvtU;
+                let to_suffix = if is_unsigned {
+                    if dst_size <= 32 {
+                        "usi"
+                    } else {
+                        "udi"
+                    }
+                } else if dst_size <= 32 {
+                    "si"
+                } else {
+                    "di"
+                };
+                let rtlib = RtlibNames::new(ctx.target);
+                let func_name = rtlib.float16_convert("hf", to_suffix)?;
+                let call =
+                    build_f16_convert_call(insn, func_name, src_typ, dst_typ, types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+            Opcode::SCvtF | Opcode::UCvtF => {
+                // int → Float16
+                let dst_typ = insn.typ?;
+                if types.kind(dst_typ) != TypeKind::Float16 {
+                    return None;
+                }
+                let src_typ = insn.src_typ?;
+                let src_size = types.size_bits(src_typ);
+                let is_unsigned = insn.op == Opcode::UCvtF;
+                let from_suffix = if is_unsigned {
+                    if src_size <= 32 {
+                        "usi"
+                    } else {
+                        "udi"
+                    }
+                } else if src_size <= 32 {
+                    "si"
+                } else {
+                    "di"
+                };
+                let rtlib = RtlibNames::new(ctx.target);
+                let func_name = rtlib.float16_convert(from_suffix, "hf")?;
+                let call =
+                    build_f16_convert_call(insn, func_name, src_typ, dst_typ, types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arch::mapping::test_helpers::*;
+    use crate::arch::mapping::MappingCtx;
+    use crate::ir::{Instruction, Opcode, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    #[test]
+    fn test_x86_64_legal_insns() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let func_template = make_test_func(&types);
+        for block in &func_template.blocks {
+            for insn in &block.insns {
+                let mut func = make_minimal_func(&types);
+                let mut ctx = MappingCtx {
+                    func: &mut func,
+                    types: &types,
+                    target: &target,
+                };
+                let result = mapper.map_insn(insn, &mut ctx);
+                assert_legal(&result);
+            }
+        }
+    }
+
+    // ========================================================================
+    // Int128 div/mod
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_divs() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__divti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_divu() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__udivti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_mods() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::ModS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__modti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_modu() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__umodti3");
+    }
+
+    #[test]
+    fn test_x86_64_int32_div_stays_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Int128 expand
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_add_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Int128↔float conversion
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_to_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // signed int128 → float
+        let insn = make_convert_insn(Opcode::SCvtF, types.float_id, 32, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattisf");
+
+        // signed int128 → double
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattidf");
+
+        // unsigned int128 → float
+        let insn = make_convert_insn(Opcode::UCvtF, types.float_id, 32, types.uint128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatuntisf");
+
+        // unsigned int128 → double
+        let insn = make_convert_insn(Opcode::UCvtF, types.double_id, 64, types.uint128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatuntidf");
+    }
+
+    #[test]
+    fn test_x86_64_float_to_int128() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // float → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixsfti");
+
+        // double → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixdfti");
+
+        // float → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunssfti");
+
+        // double → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunsdfti");
+    }
+
+    #[test]
+    fn test_x86_64_int128_longdouble() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86-64 long double uses "xf" suffix
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 80, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattixf");
+
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.longdouble_id, 80);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixxfti");
+    }
+
+    #[test]
+    fn test_x86_64_non_int128_conversion_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // int32 → double should remain Legal
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+
+        // double → int32 should remain Legal
+        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_longdouble_binop_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86_64 long double (x87) is native — should be Legal
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            80,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_longdouble_convert_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86_64 long double conversions are native
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 80, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Int128 constant shifts
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_const_shl_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Create Shl.128 with constant shift amount
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(5);
+        let insn = Instruction::binop(
+            Opcode::Shl,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.int128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_const_lsr_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(64);
+        let insn = Instruction::binop(
+            Opcode::Lsr,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.uint128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_const_asr_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(127);
+        let insn = Instruction::binop(
+            Opcode::Asr,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.int128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_variable_shift_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Variable shift (PseudoId(1) is a register, not a constant) → stays Legal
+        let insn = Instruction::binop(
+            Opcode::Shl,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Float16 conversions
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_float16_to_float_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → float should expand to __extendhfsf2
+        let insn = make_convert_insn(Opcode::FCvtF, types.float_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendhfsf2");
+    }
+
+    #[test]
+    fn test_x86_64_float_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // float → Float16 should expand to __truncsfhf2
+        let insn = make_convert_insn(Opcode::FCvtF, types.float16_id, 16, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__truncsfhf2");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_double_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendhfdf2");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_int_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → int (signed) should call __fixhfsi
+        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixhfsi");
+    }
+
+    #[test]
+    fn test_x86_64_int_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // int (signed) → Float16 should call __floatsihf
+        let insn = make_convert_insn(Opcode::SCvtF, types.float16_id, 16, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatsihf");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_uint_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → unsigned int should call __fixunshfsi
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunshfsi");
+    }
+
+    #[test]
+    fn test_x86_64_uint_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // unsigned int → Float16 should call __floatunsihf
+        let insn = make_convert_insn(Opcode::UCvtF, types.float16_id, 16, types.uint_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatunsihf");
+    }
+}
diff --git a/cc/arch/x86_64/mod.rs b/cc/arch/x86_64/mod.rs
index 5ae9eb98..4550db9a 100644
--- a/cc/arch/x86_64/mod.rs
+++ b/cc/arch/x86_64/mod.rs
@@ -16,6 +16,7 @@ mod features;
 mod float;
 pub mod lir;
 pub mod macros;
+pub(crate) mod mapping;
 pub mod regalloc;
 mod x87;
 
diff --git a/cc/arch/x86_64/regalloc.rs b/cc/arch/x86_64/regalloc.rs
index 966371c6..e2437758 100644
--- a/cc/arch/x86_64/regalloc.rs
+++ b/cc/arch/x86_64/regalloc.rs
@@ -598,28 +598,44 @@ impl RegAlloc {
                             | Opcode::SetAe
                     );
 
-                    // For Load: target is int128, but src[0] is the address (64-bit pointer).
-                    // For Store: src[0] is address (64-bit), src[1] is the int128 value.
-                    // For comparisons: target is a small integer result.
-                    if !is_comparison && !matches!(insn.op, Opcode::Load) {
-                        if let Some(target) = insn.target {
-                            self.int128_pseudos.insert(target);
-                        }
-                    }
-                    if matches!(insn.op, Opcode::Load) {
-                        // Load: only target is int128, not the address src[0]
-                        if let Some(target) = insn.target {
-                            self.int128_pseudos.insert(target);
+                    // Lo64/Hi64: target is 64-bit (not int128), source is int128
+                    // Pair64: target is int128, sources are 64-bit (not int128)
+                    // AddC/AdcC/SubC/SbcC/UMulHi: 64-bit ops, not int128
+                    match insn.op {
+                        Opcode::Lo64 | Opcode::Hi64 => {
+                            // Source is int128, target is 64-bit
+                            for &src in &insn.src {
+                                self.int128_pseudos.insert(src);
+                            }
                         }
-                    } else if matches!(insn.op, Opcode::Store) {
-                        // Store: src[0] is address (skip), src[1] is the int128 value
-                        if let Some(&val) = insn.src.get(1) {
-                            self.int128_pseudos.insert(val);
+                        Opcode::Pair64 => {
+                            // Target is int128, sources are 64-bit
+                            if let Some(target) = insn.target {
+                                self.int128_pseudos.insert(target);
+                            }
                         }
-                    } else {
-                        // Other ops: all sources are int128
-                        for &src in &insn.src {
-                            self.int128_pseudos.insert(src);
+                        _ => {
+                            // For Load: target is int128, but src[0] is the address (64-bit pointer).
+                            // For Store: src[0] is address (64-bit), src[1] is the int128 value.
+                            // For comparisons: target is a small integer result.
+                            if !is_comparison && !matches!(insn.op, Opcode::Load) {
+                                if let Some(target) = insn.target {
+                                    self.int128_pseudos.insert(target);
+                                }
+                            }
+                            if matches!(insn.op, Opcode::Load) {
+                                if let Some(target) = insn.target {
+                                    self.int128_pseudos.insert(target);
+                                }
+                            } else if matches!(insn.op, Opcode::Store) {
+                                if let Some(&val) = insn.src.get(1) {
+                                    self.int128_pseudos.insert(val);
+                                }
+                            } else {
+                                for &src in &insn.src {
+                                    self.int128_pseudos.insert(src);
+                                }
+                            }
                         }
                     }
                 }
diff --git a/cc/builtins.rs b/cc/builtins.rs
index 601647de..21de2862 100644
--- a/cc/builtins.rs
+++ b/cc/builtins.rs
@@ -113,15 +113,22 @@ pub const SUPPORTED_BUILTINS: &[&str] = &[
 ];
 
 /// Check if a name is a supported builtin function.
-/// Used by __has_builtin() in the preprocessor.
+/// Used by __has_builtin() in the preprocessor when only a string is available.
 #[inline]
 pub fn is_builtin(name: &str) -> bool {
     SUPPORTED_BUILTINS.contains(&name)
 }
 
+/// Check if a StringId is a supported builtin function (O(1) via tag lookup).
+#[inline]
+pub fn is_builtin_id(id: crate::strings::StringId) -> bool {
+    crate::kw::has_tag(id, crate::kw::BUILTIN)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::strings::StringTable;
 
     #[test]
     fn test_is_builtin() {
@@ -131,4 +138,21 @@ mod tests {
         assert!(!is_builtin("__builtin_nonexistent"));
         assert!(!is_builtin("printf"));
     }
+
+    /// Verify every SUPPORTED_BUILTINS entry has the BUILTIN tag in kw.rs,
+    /// ensuring the string list and tag-based lookup can never diverge.
+    #[test]
+    fn test_supported_builtins_match_kw_tags() {
+        let table = StringTable::new();
+        for &name in SUPPORTED_BUILTINS {
+            let id = table
+                .lookup(name)
+                .unwrap_or_else(|| panic!("builtin '{}' not pre-interned in kw.rs", name));
+            assert!(
+                is_builtin_id(id),
+                "builtin '{}' is in SUPPORTED_BUILTINS but missing BUILTIN tag in kw.rs",
+                name
+            );
+        }
+    }
 }
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index dd5d0f31..b3e813f1 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -15,13 +15,12 @@ use super::{
     AsmConstraint, AsmData, BasicBlock, BasicBlockId, CallAbiInfo, Function, Initializer,
     Instruction, MemoryOrder, Module, Opcode, Pseudo, PseudoId,
 };
-use crate::abi::{get_abi_for_conv, ArgClass, CallingConv, RegClass};
+use crate::abi::{get_abi_for_conv, CallingConv};
 use crate::diag::{error, get_all_stream_names, Position};
 use crate::parse::ast::{
     AsmOperand, AssignOp, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind,
     ExternalDecl, ForInit, FunctionDef, InitElement, OffsetOfPath, Stmt, TranslationUnit, UnaryOp,
 };
-use crate::rtlib::{Float16Abi, RtlibNames};
 use crate::strings::{StringId, StringTable};
 use crate::symbol::{SymbolId, SymbolTable};
 use crate::target::Target;
@@ -4652,201 +4651,6 @@ impl<'a> Linearizer<'a> {
         // Emit conversion if needed
         let src_is_float = self.types.is_float(src_type);
         let dst_is_float = self.types.is_float(cast_type);
-        let src_kind = self.types.kind(src_type);
-        let dst_kind = self.types.kind(cast_type);
-
-        // Check for long double conversions that need rtlib
-        let src_is_longdouble = src_kind == TypeKind::LongDouble;
-        let dst_is_longdouble = dst_kind == TypeKind::LongDouble;
-
-        // Check for Float16 conversions that need rtlib
-        let src_is_float16 = src_kind == TypeKind::Float16;
-        let dst_is_float16 = dst_kind == TypeKind::Float16;
-
-        // Get long double suffix based on target architecture
-        let ld_suffix = if self.target.arch == crate::target::Arch::X86_64 {
-            "xf"
-        } else {
-            "tf"
-        };
-
-        // Skip Float16 handling for Int128 operands — no direct hf↔ti rtlib exists.
-        // These will fall through to the Int128 handler which converts via double.
-        let src_is_int128 = src_kind == TypeKind::Int128;
-        let dst_is_int128 = dst_kind == TypeKind::Int128;
-        if (src_is_float16 || dst_is_float16) && !src_is_int128 && !dst_is_int128 {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_float16 && dst_is_float {
-                // Float16 -> float/double/long double
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                ("hf", to)
-            } else if dst_is_float16 && src_is_float {
-                // float/double/long double -> Float16
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                (from, "hf")
-            } else if src_is_float16 && !dst_is_float {
-                // Float16 -> integer
-                let dst_size = self.types.size_bits(cast_type);
-                let is_unsigned = self.types.is_unsigned(cast_type);
-                let to = if is_unsigned {
-                    if dst_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if dst_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                ("hf", to)
-            } else if dst_is_float16 && !src_is_float {
-                // Integer -> Float16
-                let src_size = self.types.size_bits(src_type);
-                let is_unsigned = self.types.is_unsigned(src_type);
-                let from = if is_unsigned {
-                    if src_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if src_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (from, "hf")
-            } else {
-                ("", "")
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.float16_convert(from_suffix, to_suffix) {
-                    // Use Float16-specific call that handles x86-64 soft-float ABI
-                    return self.emit_float16_convert_call(func_name, src, src_type, cast_type);
-                }
-            }
-        }
-
-        if src_is_longdouble || dst_is_longdouble {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_longdouble && dst_is_float {
-                // Long double -> float/double
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    _ => "",
-                };
-                (ld_suffix, to)
-            } else if dst_is_longdouble && src_is_float {
-                // Float/double -> long double
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    _ => "",
-                };
-                (from, ld_suffix)
-            } else if src_is_longdouble && !dst_is_float {
-                // Long double -> integer
-                let dst_size = self.types.size_bits(cast_type);
-                let is_unsigned = self.types.is_unsigned(cast_type);
-                let to = if is_unsigned {
-                    if dst_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if dst_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (ld_suffix, to)
-            } else if dst_is_longdouble && !src_is_float {
-                // Integer -> long double
-                let src_size = self.types.size_bits(src_type);
-                let is_unsigned = self.types.is_unsigned(src_type);
-                let from = if is_unsigned {
-                    if src_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if src_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (from, ld_suffix)
-            } else {
-                ("", "")
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.longdouble_convert(from_suffix, to_suffix) {
-                    return self.emit_longdouble_convert_call(func_name, src, src_type, cast_type);
-                }
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
-        // Check for Int128 <-> float conversions that need rtlib
-        let src_is_int128 = src_kind == TypeKind::Int128;
-        let dst_is_int128 = dst_kind == TypeKind::Int128;
-
-        if (src_is_int128 && dst_is_float) || (dst_is_int128 && src_is_float) {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_int128 && dst_is_float {
-                // Int128 -> float type
-                let from = if self.types.is_unsigned(src_type) {
-                    "uti"
-                } else {
-                    "ti"
-                };
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::Float16 => "hf",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                (from, to)
-            } else {
-                // float type -> Int128
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::Float16 => "hf",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                let to = if self.types.is_unsigned(cast_type) {
-                    "uti"
-                } else {
-                    "ti"
-                };
-                (from, to)
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.int128_convert(from_suffix, to_suffix) {
-                    return self.emit_rtlib_call(func_name, vec![src], vec![src_type], cast_type);
-                }
-            }
-        }
 
         if src_is_float && !dst_is_float {
             // Float to integer conversion
@@ -7682,24 +7486,6 @@ impl<'a> Linearizer<'a> {
         let is_float = self.types.is_float(typ);
         let size = self.types.size_bits(typ);
 
-        // Check for long double negation that needs rtlib
-        if op == UnaryOp::Neg && self.types.kind(typ) == TypeKind::LongDouble {
-            let rtlib = RtlibNames::new(self.target);
-            if let Some(func_name) = rtlib.longdouble_neg() {
-                return self.emit_longdouble_neg_call(func_name, src, typ);
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
-        // Check for Float16 negation that needs soft-float on x86-64
-        if op == UnaryOp::Neg && self.types.kind(typ) == TypeKind::Float16 {
-            let rtlib = RtlibNames::new(self.target);
-            if rtlib.float16_needs_softfloat() {
-                return self.emit_float16_neg_via_float(src);
-            }
-            // Fall through to native FP16 for AArch64
-        }
-
         let result = self.alloc_pseudo();
 
         let opcode = match op {
@@ -7818,90 +7604,6 @@ impl<'a> Linearizer<'a> {
         let is_float = self.types.is_float(operand_typ);
         let is_unsigned = self.types.is_unsigned(operand_typ);
 
-        // Check if this is a Float16 operation that needs soft-float on x86-64
-        if self.types.kind(operand_typ) == TypeKind::Float16 {
-            let rtlib = RtlibNames::new(self.target);
-            if rtlib.float16_needs_softfloat() {
-                // Arithmetic: promote-operate-truncate
-                if matches!(
-                    op,
-                    BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div
-                ) {
-                    return self.emit_float16_arith_via_float(op, left, right);
-                }
-                // Comparisons: promote-compare (no truncate needed)
-                if matches!(
-                    op,
-                    BinaryOp::Lt
-                        | BinaryOp::Le
-                        | BinaryOp::Gt
-                        | BinaryOp::Ge
-                        | BinaryOp::Eq
-                        | BinaryOp::Ne
-                ) {
-                    return self.emit_float16_cmp_via_float(op, left, right);
-                }
-            }
-            // Fall through to native FP16 for AArch64
-        }
-
-        // Check if this is a long double operation that needs rtlib
-        if self.types.kind(operand_typ) == TypeKind::LongDouble {
-            let rtlib = RtlibNames::new(self.target);
-
-            // Handle arithmetic operations
-            let arith_op = match op {
-                BinaryOp::Add => Some("add"),
-                BinaryOp::Sub => Some("sub"),
-                BinaryOp::Mul => Some("mul"),
-                BinaryOp::Div => Some("div"),
-                _ => None,
-            };
-
-            if let Some(op_str) = arith_op {
-                if let Some(func_name) = rtlib.longdouble_binop(op_str) {
-                    return self.emit_longdouble_binop_call(func_name, left, right, operand_typ);
-                }
-            }
-
-            // Handle comparison operations
-            let cmp_kind = match op {
-                BinaryOp::Lt => Some("lt"),
-                BinaryOp::Le => Some("le"),
-                BinaryOp::Gt => Some("gt"),
-                BinaryOp::Ge => Some("ge"),
-                BinaryOp::Eq => Some("eq"),
-                BinaryOp::Ne => Some("ne"),
-                _ => None,
-            };
-
-            if let Some(kind) = cmp_kind {
-                if let Some(func_name) = rtlib.longdouble_cmp(kind) {
-                    return self.emit_longdouble_cmp_call(func_name, left, right, op);
-                }
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
-        // Check if this is an __int128 div/mod operation that needs rtlib
-        if self.types.kind(operand_typ) == TypeKind::Int128
-            && matches!(op, BinaryOp::Div | BinaryOp::Mod)
-        {
-            let rtlib = RtlibNames::new(self.target);
-            let op_str = if matches!(op, BinaryOp::Div) {
-                "div"
-            } else {
-                "mod"
-            };
-            let func_name = rtlib.int128_divmod(op_str, is_unsigned);
-            return self.emit_rtlib_call(
-                func_name,
-                vec![left, right],
-                vec![operand_typ, operand_typ],
-                result_typ,
-            );
-        }
-
         let result = self.alloc_pseudo();
 
         let opcode = match op {
@@ -8129,10 +7831,8 @@ impl<'a> Linearizer<'a> {
             }
             BinaryOp::Mul => {
                 // Complex multiply via rtlib call (__mulsc3, __muldc3, etc.)
-                // Uses robust implementation that handles overflow correctly
-                let rtlib = RtlibNames::new(self.target);
                 let base_kind = self.types.kind(base_typ);
-                let func_name = rtlib.complex_mul(base_kind);
+                let func_name = crate::arch::mapping::complex_mul_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
@@ -8155,10 +7855,8 @@ impl<'a> Linearizer<'a> {
             }
             BinaryOp::Div => {
                 // Complex divide via rtlib call (__divsc3, __divdc3, etc.)
-                // Uses Smith's method for robust overflow handling
-                let rtlib = RtlibNames::new(self.target);
                 let base_kind = self.types.kind(base_typ);
-                let func_name = rtlib.complex_div(base_kind);
+                let func_name = crate::arch::mapping::complex_div_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
@@ -8283,495 +7981,6 @@ impl<'a> Linearizer<'a> {
         result_sym
     }
 
-    /// Emit a call to a runtime library function with ABI classification.
-    ///
-    /// Handles ABI param/return classification, instruction creation, and emission.
-    fn emit_rtlib_call(
-        &mut self,
-        func_name: &str,
-        arg_vals: Vec<PseudoId>,
-        arg_types: Vec<TypeId>,
-        ret_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let ret_size = self.types.size_bits(ret_type);
-
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(ret_type, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            ret_type,
-            ret_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit a call to a long double rtlib function (__addxf3, __multf3, etc.).
-    ///
-    /// These functions take 2 long double args and return a long double.
-    fn emit_longdouble_binop_call(
-        &mut self,
-        func_name: &str,
-        left: PseudoId,
-        right: PseudoId,
-        longdouble_typ: TypeId,
-    ) -> PseudoId {
-        self.emit_rtlib_call(
-            func_name,
-            vec![left, right],
-            vec![longdouble_typ, longdouble_typ],
-            longdouble_typ,
-        )
-    }
-
-    /// Emit a call to a long double comparison rtlib function (__cmpxf2, __cmptf2).
-    ///
-    /// The comparison function returns an int:
-    /// - < 0 if a < b
-    /// - 0 if a == b
-    /// - > 0 if a > b
-    ///
-    /// We then compare that result with 0 to produce the final boolean.
-    fn emit_longdouble_cmp_call(
-        &mut self,
-        func_name: &str,
-        left: PseudoId,
-        right: PseudoId,
-        op: BinaryOp,
-    ) -> PseudoId {
-        let longdouble_typ = self.types.longdouble_id;
-        let int_typ = self.types.int_id;
-        let int_size = self.types.size_bits(int_typ);
-
-        let arg_vals = vec![left, right];
-        let arg_types = vec![longdouble_typ, longdouble_typ];
-
-        // Compute ABI classification for the call
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(int_typ, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        // Call the comparison function - it returns an int
-        let cmp_result = self.alloc_pseudo();
-        let mut call_insn = Instruction::call(
-            Some(cmp_result),
-            func_name,
-            arg_vals,
-            arg_types,
-            int_typ,
-            int_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        // Now compare the result with 0 based on the original comparison op
-        let zero = self.emit_const(0, int_typ);
-        let result = self.alloc_pseudo();
-
-        // Map the original FP comparison to an int comparison
-        // cmp_result < 0 means a < b
-        // cmp_result == 0 means a == b
-        // cmp_result > 0 means a > b
-        let opcode = match op {
-            BinaryOp::Lt => Opcode::SetLt, // cmp_result < 0
-            BinaryOp::Gt => Opcode::SetGt, // cmp_result > 0
-            BinaryOp::Le => Opcode::SetLe, // cmp_result <= 0
-            BinaryOp::Ge => Opcode::SetGe, // cmp_result >= 0
-            BinaryOp::Eq => Opcode::SetEq, // cmp_result == 0
-            BinaryOp::Ne => Opcode::SetNe, // cmp_result != 0
-            _ => unreachable!("emit_longdouble_cmp_call called with non-comparison op"),
-        };
-
-        self.emit(Instruction::binop(
-            opcode, result, cmp_result, zero, int_typ, int_size,
-        ));
-
-        result
-    }
-
-    // ========================================================================
-    // Float16 soft-float helpers (for x86-64)
-    // ========================================================================
-
-    /// Emit Float16 arithmetic using promote-operate-truncate pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend both operands from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE arithmetic operation
-    /// 3. Truncate result back to Float16 using __truncsfhf2
-    fn emit_float16_arith_via_float(
-        &mut self,
-        op: BinaryOp,
-        left: PseudoId,
-        right: PseudoId,
-    ) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend left operand: Float16 -> float
-        let left_ext = self.emit_float16_extend_call(left, float16_typ, float_typ);
-
-        // 2. Extend right operand: Float16 -> float
-        let right_ext = self.emit_float16_extend_call(right, float16_typ, float_typ);
-
-        // 3. Perform native float operation
-        let result_float = self.alloc_reg_pseudo();
-        let opcode = match op {
-            BinaryOp::Add => Opcode::FAdd,
-            BinaryOp::Sub => Opcode::FSub,
-            BinaryOp::Mul => Opcode::FMul,
-            BinaryOp::Div => Opcode::FDiv,
-            _ => unreachable!("emit_float16_arith_via_float called with non-arithmetic op"),
-        };
-        self.emit(Instruction::binop(
-            opcode,
-            result_float,
-            left_ext,
-            right_ext,
-            float_typ,
-            float_size,
-        ));
-
-        // 4. Truncate result: float -> Float16
-        self.emit_float16_truncate_call(result_float, float_typ, float16_typ)
-    }
-
-    /// Emit Float16 comparison using promote-compare pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend both operands from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE comparison (no truncation needed - result is int)
-    fn emit_float16_cmp_via_float(
-        &mut self,
-        op: BinaryOp,
-        left: PseudoId,
-        right: PseudoId,
-    ) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend left operand: Float16 -> float
-        let left_ext = self.emit_float16_extend_call(left, float16_typ, float_typ);
-
-        // 2. Extend right operand: Float16 -> float
-        let right_ext = self.emit_float16_extend_call(right, float16_typ, float_typ);
-
-        // 3. Perform native float comparison
-        let result = self.alloc_reg_pseudo();
-        let opcode = match op {
-            BinaryOp::Lt => Opcode::FCmpOLt,
-            BinaryOp::Le => Opcode::FCmpOLe,
-            BinaryOp::Gt => Opcode::FCmpOGt,
-            BinaryOp::Ge => Opcode::FCmpOGe,
-            BinaryOp::Eq => Opcode::FCmpOEq,
-            BinaryOp::Ne => Opcode::FCmpONe,
-            _ => unreachable!("emit_float16_cmp_via_float called with non-comparison op"),
-        };
-        self.emit(Instruction::binop(
-            opcode, result, left_ext, right_ext, float_typ, float_size,
-        ));
-
-        result
-    }
-
-    /// Emit Float16 negation using promote-negate-truncate pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend operand from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE negation
-    /// 3. Truncate result back to Float16 using __truncsfhf2
-    fn emit_float16_neg_via_float(&mut self, src: PseudoId) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend operand: Float16 -> float
-        let src_ext = self.emit_float16_extend_call(src, float16_typ, float_typ);
-
-        // 2. Perform native float negation
-        let result_float = self.alloc_reg_pseudo();
-        self.emit(Instruction::unop(
-            Opcode::FNeg,
-            result_float,
-            src_ext,
-            float_typ,
-            float_size,
-        ));
-
-        // 3. Truncate result: float -> Float16
-        self.emit_float16_truncate_call(result_float, float_typ, float16_typ)
-    }
-
-    /// Emit call to __extendhfsf2 to extend Float16 to float.
-    ///
-    /// The ABI for Float16 parameter depends on the runtime library:
-    /// - compiler-rt: Float16 passed as 16-bit integer in RDI
-    /// - libgcc: Float16 passed in XMM (SSE ABI)
-    fn emit_float16_extend_call(
-        &mut self,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-
-        // Query rtlib for Float16 ABI
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // Set up argument type based on rtlib ABI
-        let arg_type_for_abi = if f16_abi == Float16Abi::Integer {
-            self.types.ushort_id // compiler-rt: use integer type
-        } else {
-            src_type // libgcc: use Float16 type (SSE ABI)
-        };
-
-        let arg_vals = vec![src];
-        let arg_types = vec![arg_type_for_abi];
-
-        // Set up ABI classification based on rtlib
-        let param_classes = if f16_abi == Float16Abi::Integer {
-            // compiler-rt: argument is INTEGER (16-bit)
-            vec![ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }]
-        } else {
-            // libgcc: use standard SSE ABI
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            vec![abi.classify_param(arg_type_for_abi, self.types)]
-        };
-
-        // Return is always SSE (float)
-        let ret_class = ArgClass::Direct {
-            classes: vec![RegClass::Sse],
-            size_bits: 32,
-        };
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            "__extendhfsf2",
-            arg_vals,
-            arg_types,
-            dst_type,
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit call to __truncsfhf2 to truncate float to Float16.
-    ///
-    /// The ABI for Float16 return depends on the runtime library:
-    /// - compiler-rt: Float16 returned as 16-bit integer in RAX
-    /// - libgcc: Float16 returned in XMM (SSE ABI)
-    fn emit_float16_truncate_call(
-        &mut self,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-
-        let arg_vals = vec![src];
-        let arg_types = vec![src_type];
-
-        // Query rtlib for Float16 ABI
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // Argument is always SSE (float)
-        let param_classes = vec![ArgClass::Direct {
-            classes: vec![RegClass::Sse],
-            size_bits: 32,
-        }];
-
-        // Return type depends on rtlib
-        let ret_class = if f16_abi == Float16Abi::Integer {
-            // compiler-rt: return is INTEGER (16-bit in RAX)
-            ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }
-        } else {
-            // libgcc: return is SSE (Float16 in XMM)
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            abi.classify_return(dst_type, self.types)
-        };
-
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            "__truncsfhf2",
-            arg_vals,
-            arg_types,
-            dst_type, // Keep Float16 type for proper subsequent handling
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit a call to a long double conversion rtlib function.
-    ///
-    /// These functions convert between long double and other types:
-    /// - __extendsfxf2/__extendsftf2: float -> long double
-    /// - __extenddfxf2/__extenddftf2: double -> long double
-    /// - __truncxfsf2/__trunctfsf2: long double -> float
-    /// - __truncxfdf2/__trunctfdf2: long double -> double
-    /// - __floatsixf/__floatsitf: int32 -> long double
-    /// - __floatdixf/__floatditf: int64 -> long double
-    /// - __fixxfsi/__fixtfsi: long double -> int32
-    /// - __fixxfdi/__fixtfdi: long double -> int64
-    fn emit_longdouble_convert_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        self.emit_rtlib_call(func_name, vec![src], vec![src_type], dst_type)
-    }
-
-    /// Emit a call to a Float16 conversion rtlib function with correct ABI for x86-64.
-    ///
-    /// On x86-64 without native FP16, Float16 values are passed/returned as integers:
-    /// - Float16 argument: 16-bit value in RDI (zero-extended)
-    /// - Float16 return: 16-bit value in AX
-    /// - Other float types: standard SSE ABI (XMM0)
-    /// - Integer types: standard integer ABI
-    fn emit_float16_convert_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-        let src_kind = self.types.kind(src_type);
-        let dst_kind = self.types.kind(dst_type);
-
-        // Query rtlib for Float16 ABI - this is an rtlib attribute
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // For argument type, use u16 if src is Float16 with integer ABI (compiler-rt)
-        let arg_type_for_abi = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
-            self.types.ushort_id
-        } else {
-            src_type
-        };
-
-        let arg_vals = vec![src];
-        let arg_types = vec![arg_type_for_abi];
-
-        // Compute ABI classification based on rtlib requirements
-        let param_classes = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
-            // compiler-rt: Float16 passed as 16-bit integer (zero-extended)
-            vec![ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }]
-        } else {
-            // libgcc or non-Float16: use standard ABI classification
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            arg_types
-                .iter()
-                .map(|&t| abi.classify_param(t, self.types))
-                .collect()
-        };
-
-        let ret_class = if f16_abi == Float16Abi::Integer && dst_kind == TypeKind::Float16 {
-            // compiler-rt: Float16 returned as 16-bit integer
-            ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }
-        } else {
-            // libgcc or non-Float16: use standard ABI classification
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            abi.classify_return(dst_type, self.types)
-        };
-
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            dst_type,
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit a call to a long double negation rtlib function (__negxf2, __negtf2).
-    fn emit_longdouble_neg_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        longdouble_typ: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let size = self.types.size_bits(longdouble_typ);
-
-        let arg_vals = vec![src];
-        let arg_types = vec![longdouble_typ];
-
-        // Compute ABI classification for the call
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(longdouble_typ, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            longdouble_typ,
-            size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
     fn emit_compare_zero(&mut self, val: PseudoId, operand_typ: TypeId) -> PseudoId {
         let result = self.alloc_pseudo();
         let zero = self.emit_const(0, operand_typ);
@@ -9076,40 +8285,11 @@ impl<'a> Linearizer<'a> {
                     target_typ
                 };
 
-                // Check for Float16 soft-float on x86-64
-                let needs_float16_softfloat = self.types.kind(target_typ) == TypeKind::Float16
-                    && RtlibNames::new(self.target).float16_needs_softfloat();
-
-                if needs_float16_softfloat {
-                    // Use promote-operate-truncate pattern for Float16
-                    match op {
-                        AssignOp::AddAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Add, lhs, rhs)
-                        }
-                        AssignOp::SubAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Sub, lhs, rhs)
-                        }
-                        AssignOp::MulAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Mul, lhs, rhs)
-                        }
-                        AssignOp::DivAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Div, lhs, rhs)
-                        }
-                        _ => {
-                            let arith_size = self.types.size_bits(arith_type);
-                            self.emit(Instruction::binop(
-                                opcode, result, lhs, rhs, arith_type, arith_size,
-                            ));
-                            result
-                        }
-                    }
-                } else {
-                    let arith_size = self.types.size_bits(arith_type);
-                    self.emit(Instruction::binop(
-                        opcode, result, lhs, rhs, arith_type, arith_size,
-                    ));
-                    result
-                }
+                let arith_size = self.types.size_bits(arith_type);
+                self.emit(Instruction::binop(
+                    opcode, result, lhs, rhs, arith_type, arith_size,
+                ));
+                result
             }
         };
 
diff --git a/cc/ir/lower.rs b/cc/ir/lower.rs
index fefc8e1a..d9a006b7 100644
--- a/cc/ir/lower.rs
+++ b/cc/ir/lower.rs
@@ -202,10 +202,7 @@ fn sequentialize_copies(copies: &[CopyInfo], func: &mut Function) -> Vec<CopyInf
             let copy_typ = copy.typ;
 
             // Create a temporary pseudo to hold the original source value
-            let temp_id = super::PseudoId(func.next_pseudo);
-            func.next_pseudo += 1;
-            let temp_pseudo = super::Pseudo::reg(temp_id, temp_id.0);
-            func.add_pseudo(temp_pseudo);
+            let temp_id = func.create_reg_pseudo();
 
             // Emit: temp = copy source (save the source before it gets overwritten)
             result.push(CopyInfo {
diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 8f6b9b27..a5038132 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -22,9 +22,10 @@ pub mod linearize;
 pub mod lower;
 pub mod ssa;
 
-use crate::abi::ArgClass;
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
 use crate::diag::Position;
-use crate::types::TypeId;
+use crate::target::Target;
+use crate::types::{TypeId, TypeTable};
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 
@@ -231,6 +232,16 @@ pub enum Opcode {
     AtomicFetchOr,  // Atomic fetch-and-or
     AtomicFetchXor, // Atomic fetch-and-xor
     Fence,          // Memory fence
+
+    // Int128 decomposition ops (used by mapping pass expansion)
+    Lo64,   // Extract low 64 bits from 128-bit pseudo
+    Hi64,   // Extract high 64 bits from 128-bit pseudo
+    Pair64, // Combine two 64-bit pseudos into 128-bit: target = (src[0]=lo, src[1]=hi)
+    AddC,   // 64-bit add with carry output: target = src[0] + src[1], sets carry
+    AdcC, // 64-bit add with carry in+out: target = src[0] + src[1] + carry; src[2] = carry producer
+    SubC, // 64-bit sub with borrow output: target = src[0] - src[1], sets borrow
+    SbcC, // 64-bit sub with borrow in+out: target = src[0] - src[1] - borrow; src[2] = borrow producer
+    UMulHi, // Upper 64 bits of unsigned 64×64 multiply: target = (src[0] * src[1]) >> 64
 }
 
 impl Opcode {
@@ -383,6 +394,14 @@ impl Opcode {
             Opcode::AtomicFetchOr => "atomic_fetch_or",
             Opcode::AtomicFetchXor => "atomic_fetch_xor",
             Opcode::Fence => "fence",
+            Opcode::Lo64 => "lo64",
+            Opcode::Hi64 => "hi64",
+            Opcode::Pair64 => "pair64",
+            Opcode::AddC => "addc",
+            Opcode::AdcC => "adcc",
+            Opcode::SubC => "subc",
+            Opcode::SbcC => "sbcc",
+            Opcode::UMulHi => "umulhi",
         }
     }
 }
@@ -924,6 +943,37 @@ impl Instruction {
         insn
     }
 
+    /// Create a call instruction with ABI classification.
+    ///
+    /// This is the canonical way for IR passes to synthesize call instructions
+    /// (e.g., runtime library calls). It classifies parameters and return value
+    /// using the given calling convention, attaches `CallAbiInfo`, and returns
+    /// a ready-to-emit instruction.
+    #[allow(clippy::too_many_arguments)]
+    pub fn call_with_abi(
+        target: Option<PseudoId>,
+        func_name: &str,
+        args: Vec<PseudoId>,
+        arg_types: Vec<TypeId>,
+        ret_type: TypeId,
+        conv: CallingConv,
+        types: &TypeTable,
+        target_info: &Target,
+    ) -> Self {
+        let ret_size = types.size_bits(ret_type);
+        let abi = get_abi_for_conv(conv, target_info);
+        let param_classes: Vec<_> = arg_types
+            .iter()
+            .map(|&t| abi.classify_param(t, types))
+            .collect();
+        let ret_class = abi.classify_return(ret_type, types);
+        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+        let mut insn = Self::call(target, func_name, args, arg_types, ret_type, ret_size);
+        insn.abi_info = Some(call_abi_info);
+        insn
+    }
+
     /// Create an indirect call instruction (call through function pointer)
     pub fn call_indirect(
         target: Option<PseudoId>,
@@ -1024,8 +1074,23 @@ impl fmt::Display for Instruction {
 
         write!(f, "{}", self.op.name())?;
 
-        // Size suffix
-        if self.size > 0 {
+        // Size suffix (for conversions, show src_size→size)
+        if self.src_size > 0
+            && self.src_size != self.size
+            && matches!(
+                self.op,
+                Opcode::Sext
+                    | Opcode::Zext
+                    | Opcode::Trunc
+                    | Opcode::FCvtS
+                    | Opcode::FCvtU
+                    | Opcode::SCvtF
+                    | Opcode::UCvtF
+                    | Opcode::FCvtF
+            )
+        {
+            write!(f, ".{}to{}", self.src_size, self.size)?;
+        } else if self.size > 0 {
             write!(f, ".{}", self.size)?;
         }
 
@@ -1440,8 +1505,16 @@ impl Function {
         id
     }
 
-    /// Create a new constant integer pseudo and return its ID
-    /// The pseudo is added to self.pseudos
+    /// Create a new register pseudo and return its ID.
+    /// The pseudo is added to self.pseudos.
+    pub fn create_reg_pseudo(&mut self) -> PseudoId {
+        let id = self.alloc_pseudo();
+        self.add_pseudo(Pseudo::reg(id, id.0));
+        id
+    }
+
+    /// Create a new constant integer pseudo and return its ID.
+    /// The pseudo is added to self.pseudos.
     pub fn create_const_pseudo(&mut self, value: i128) -> PseudoId {
         let id = self.alloc_pseudo();
         let pseudo = Pseudo::val(id, value);
@@ -2275,4 +2348,104 @@ mod tests {
         )));
         assert!(insn.returns_two_regs());
     }
+
+    // ========================================================================
+    // Function::create_reg_pseudo
+    // ========================================================================
+
+    #[test]
+    fn test_create_reg_pseudo() {
+        let types = TypeTable::new(&Target::host());
+        let mut func = Function::new("test", types.int_id);
+        func.next_pseudo = 10;
+
+        let id1 = func.create_reg_pseudo();
+        assert_eq!(id1, PseudoId(10));
+        assert_eq!(func.next_pseudo, 11);
+
+        // Verify the pseudo was registered
+        let pseudo = func.get_pseudo(id1).expect("pseudo must exist");
+        assert!(matches!(pseudo.kind, PseudoKind::Reg(_)));
+
+        let id2 = func.create_reg_pseudo();
+        assert_eq!(id2, PseudoId(11));
+        assert_eq!(func.next_pseudo, 12);
+
+        // IDs must be distinct
+        assert_ne!(id1, id2);
+    }
+
+    // ========================================================================
+    // Instruction::call_with_abi
+    // ========================================================================
+
+    #[test]
+    fn test_call_with_abi_basic() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(2)),
+            "__divti3",
+            vec![PseudoId(0), PseudoId(1)],
+            vec![types.int128_id, types.int128_id],
+            types.int128_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        assert_eq!(insn.op, Opcode::Call);
+        assert_eq!(insn.target, Some(PseudoId(2)));
+        assert_eq!(insn.func_name.as_deref(), Some("__divti3"));
+        assert_eq!(insn.src.len(), 2);
+        assert_eq!(insn.arg_types.len(), 2);
+        assert!(insn.abi_info.is_some());
+
+        let abi = insn.abi_info.as_ref().unwrap();
+        assert_eq!(abi.params.len(), 2);
+    }
+
+    #[test]
+    fn test_call_with_abi_conversion() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        // float → signed int128 (__fixsfti)
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(1)),
+            "__fixsfti",
+            vec![PseudoId(0)],
+            vec![types.float_id],
+            types.int128_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        assert_eq!(insn.op, Opcode::Call);
+        assert_eq!(insn.func_name.as_deref(), Some("__fixsfti"));
+        assert!(insn.abi_info.is_some());
+        assert_eq!(insn.abi_info.as_ref().unwrap().params.len(), 1);
+    }
+
+    #[test]
+    fn test_call_with_abi_sets_size() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(1)),
+            "__addtf3",
+            vec![PseudoId(0)],
+            vec![types.double_id],
+            types.double_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        // Size should be set from ret_type
+        assert_eq!(insn.size, types.size_bits(types.double_id));
+    }
 }
diff --git a/cc/ir/test_linearize.rs b/cc/ir/test_linearize.rs
index 710ee9e3..52ab2e6f 100644
--- a/cc/ir/test_linearize.rs
+++ b/cc/ir/test_linearize.rs
@@ -3789,16 +3789,19 @@ fn test_float16_to_float_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __extendhfsf2
+    // Float16→float now emits FCvtF with src_typ=Float16 (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
+    let has_fcvtf = func.blocks.iter().any(|bb| {
         bb.insns.iter().any(|insn| {
-            insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__extendhfsf2")
+            insn.op == Opcode::FCvtF
+                && insn
+                    .src_typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
         })
     });
     assert!(
-        has_rtlib_call,
-        "Float16 to float conversion should call __extendhfsf2"
+        has_fcvtf,
+        "Float16 to float conversion should emit FCvtF with Float16 src_typ"
     );
 }
 
@@ -3838,16 +3841,19 @@ fn test_float_to_float16_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __truncsfhf2
+    // float→Float16 now emits FCvtF with Float16 dst type (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
+    let has_fcvtf = func.blocks.iter().any(|bb| {
         bb.insns.iter().any(|insn| {
-            insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__truncsfhf2")
+            insn.op == Opcode::FCvtF
+                && insn
+                    .typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
         })
     });
     assert!(
-        has_rtlib_call,
-        "Float to Float16 conversion should call __truncsfhf2"
+        has_fcvtf,
+        "Float to Float16 conversion should emit FCvtF with Float16 dst type"
     );
 }
 
@@ -3887,16 +3893,19 @@ fn test_float16_to_int_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __fixhfsi
+    // Float16→int now emits FCvtS with Float16 src_typ (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
-        bb.insns
-            .iter()
-            .any(|insn| insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__fixhfsi"))
+    let has_fcvts = func.blocks.iter().any(|bb| {
+        bb.insns.iter().any(|insn| {
+            insn.op == Opcode::FCvtS
+                && insn
+                    .src_typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
+        })
     });
     assert!(
-        has_rtlib_call,
-        "Float16 to int conversion should call __fixhfsi"
+        has_fcvts,
+        "Float16 to int conversion should emit FCvtS with Float16 src_typ"
     );
 }
 
@@ -3936,16 +3945,19 @@ fn test_int_to_float16_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __floatsihf
+    // int→Float16 now emits SCvtF with Float16 dst type (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
-        bb.insns
-            .iter()
-            .any(|insn| insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__floatsihf"))
+    let has_scvtf = func.blocks.iter().any(|bb| {
+        bb.insns.iter().any(|insn| {
+            insn.op == Opcode::SCvtF
+                && insn
+                    .typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
+        })
     });
     assert!(
-        has_rtlib_call,
-        "Int to Float16 conversion should call __floatsihf"
+        has_scvtf,
+        "Int to Float16 conversion should emit SCvtF with Float16 dst type"
     );
 }
 
diff --git a/cc/kw.rs b/cc/kw.rs
new file mode 100644
index 00000000..a656f4ff
--- /dev/null
+++ b/cc/kw.rs
@@ -0,0 +1,659 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// Pre-interned keyword system for pcc C99 compiler
+//
+// All well-known strings (C keywords, builtins, attribute names, preprocessor
+// directives) are pre-interned at StringTable creation time. Each gets a
+// deterministic StringId and a u32 tag bitmask for O(1) set-membership queries.
+//
+// This eliminates string comparisons in hot paths (is_declaration_start,
+// parse_type_specifiers, parse_statement, is_builtin, etc.) by replacing them
+// with integer comparisons.
+//
+
+use crate::strings::StringId;
+
+// ============================================================================
+// Tag bit constants (u32, 14 of 32 used)
+// ============================================================================
+
+pub const TYPE_SPEC: u32 = 1 << 0;
+pub const STORAGE: u32 = 1 << 1;
+pub const QUALIFIER: u32 = 1 << 2;
+pub const INLINE_KW: u32 = 1 << 3;
+pub const NORETURN_KW: u32 = 1 << 4;
+pub const ATTR_KW: u32 = 1 << 5;
+pub const ASM_KW: u32 = 1 << 6;
+pub const ASSERT_KW: u32 = 1 << 7;
+pub const NULLABILITY: u32 = 1 << 8;
+pub const STMT_KW: u32 = 1 << 9;
+pub const BUILTIN: u32 = 1 << 10;
+pub const SUPPORTED_ATTR: u32 = 1 << 11;
+pub const ALIGNAS_KW: u32 = 1 << 12;
+pub const TYPE_KEYWORD: u32 = 1 << 13;
+
+/// Composite: all tags that start a declaration
+pub const DECL_START: u32 =
+    TYPE_SPEC | STORAGE | QUALIFIER | INLINE_KW | NORETURN_KW | ATTR_KW | ASSERT_KW | ALIGNAS_KW;
+
+// ============================================================================
+// Keyword definition macros
+// ============================================================================
+
+/// Helper macro: recursive counter that assigns sequential StringId values starting from 1.
+/// Entries named `_` are anonymous — they get interned and tagged but no `pub const` is emitted.
+macro_rules! define_ids {
+    // Base case: no more entries
+    ($counter:expr; ) => {};
+    // Anonymous entry (name is `_`): skip const, just recurse
+    ($counter:expr; (_, $str:literal, $tags:expr) $(, ($name_rest:tt, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
+        define_ids!($counter + 1; $(($name_rest, $str_rest, $tags_rest)),*);
+    };
+    // Named entry: emit pub const, then recurse
+    ($counter:expr; ($name:ident, $str:literal, $tags:expr) $(, ($name_rest:tt, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
+        pub const $name: StringId = StringId($counter);
+        define_ids!($counter + 1; $(($name_rest, $str_rest, $tags_rest)),*);
+    };
+}
+
+/// Main keyword definition macro. Generates:
+/// - KEYWORD_COUNT: total number of keywords
+/// - One `pub const NAME: StringId` per named keyword (entries with `_` are anonymous)
+/// - KEYWORD_STRINGS: array of string literals (all entries)
+/// - KEYWORD_TAGS: array of tag bitmasks (all entries)
+macro_rules! define_keywords {
+    ( $( ($name:tt, $str:literal, $tags:expr) ),* $(,)? ) => {
+        pub const KEYWORD_COUNT: usize = [ $( $str ),* ].len();
+        define_ids!(1u32; $( ($name, $str, $tags) ),* );
+        pub(crate) const KEYWORD_STRINGS: [&str; KEYWORD_COUNT] = [ $( $str ),* ];
+        pub(crate) const KEYWORD_TAGS: [u32; KEYWORD_COUNT] = [ $( $tags ),* ];
+    };
+}
+
+// ============================================================================
+// Keyword table — single source of truth
+// ============================================================================
+//
+// Naming convention:
+//   FOO          — standard C keyword: const, inline, _Noreturn
+//   GNU_FOO      — __foo__ (double-underscore-wrapped GNU spelling)
+//   GNU_FOO2     — __foo (leading-underscore-only GNU spelling)
+//   FOO_C23      — C23 spelling: static_assert
+//   BUILTIN_*    — __builtin_* compiler builtins
+//   C11_ATOMIC_* — __c11_atomic_* builtins
+//   ATTR_*       — attribute names (plain)
+//   GNU_ATTR_*   — attribute names (__foo__ form)
+//   PP_*         — preprocessor directives that conflict with Rust keywords
+
+define_keywords! {
+    // ---- Type specifiers (TYPE_SPEC) ----
+    (VOID,              "void",              TYPE_SPEC | TYPE_KEYWORD),
+    (CHAR,              "char",              TYPE_SPEC | TYPE_KEYWORD),
+    (SHORT,             "short",             TYPE_SPEC | TYPE_KEYWORD),
+    (INT,               "int",               TYPE_SPEC | TYPE_KEYWORD),
+    (LONG,              "long",              TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT,             "float",             TYPE_SPEC | TYPE_KEYWORD),
+    (DOUBLE,            "double",            TYPE_SPEC | TYPE_KEYWORD),
+    (SIGNED,            "signed",            TYPE_SPEC | TYPE_KEYWORD),
+    (UNSIGNED,          "unsigned",          TYPE_SPEC | TYPE_KEYWORD),
+    (BOOL,              "_Bool",             TYPE_SPEC | TYPE_KEYWORD),
+    (COMPLEX,           "_Complex",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT16,           "_Float16",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT32,           "_Float32",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT64,           "_Float64",          TYPE_SPEC | TYPE_KEYWORD),
+    (INT128,            "__int128",          TYPE_SPEC | TYPE_KEYWORD),
+    (INT128_T,          "__int128_t",        TYPE_SPEC | TYPE_KEYWORD),
+    (UINT128_T,         "__uint128_t",       TYPE_SPEC | TYPE_KEYWORD),
+    (BUILTIN_VA_LIST,   "__builtin_va_list", TYPE_SPEC | TYPE_KEYWORD | BUILTIN),
+    (STRUCT,            "struct",            TYPE_SPEC | TYPE_KEYWORD),
+    (UNION,             "union",             TYPE_SPEC | TYPE_KEYWORD),
+    (ENUM,              "enum",              TYPE_SPEC | TYPE_KEYWORD),
+    (TYPEOF,            "typeof",            TYPE_SPEC | TYPE_KEYWORD),
+    (GNU_TYPEOF,        "__typeof__",        TYPE_SPEC | TYPE_KEYWORD),
+    (GNU_TYPEOF2,       "__typeof",          TYPE_SPEC | TYPE_KEYWORD),
+    (ATOMIC,            "_Atomic",           TYPE_SPEC | QUALIFIER | TYPE_KEYWORD),
+
+    // ---- Storage class (STORAGE) ----
+    (STATIC,            "static",            STORAGE),
+    (EXTERN,            "extern",            STORAGE),
+    (AUTO,              "auto",              STORAGE),
+    (REGISTER,          "register",          STORAGE),
+    (TYPEDEF,           "typedef",           STORAGE),
+    (THREAD_LOCAL,      "_Thread_local",     STORAGE),
+    (GNU_THREAD,        "__thread",          STORAGE),
+
+    // ---- Type qualifiers (QUALIFIER) ----
+    (CONST,             "const",             QUALIFIER | TYPE_KEYWORD),
+    (VOLATILE,          "volatile",          QUALIFIER | TYPE_KEYWORD),
+    (RESTRICT,          "restrict",          QUALIFIER),
+    (GNU_CONST,         "__const__",         QUALIFIER),
+    (GNU_CONST2,        "__const",           QUALIFIER),
+    (GNU_VOLATILE,      "__volatile__",      QUALIFIER),
+    (GNU_VOLATILE2,     "__volatile",        QUALIFIER),
+    (GNU_RESTRICT,      "__restrict__",      QUALIFIER),
+    (GNU_RESTRICT2,     "__restrict",        QUALIFIER),
+
+    // ---- Inline (INLINE_KW) ----
+    (INLINE,            "inline",            INLINE_KW),
+    (GNU_INLINE,        "__inline__",        INLINE_KW),
+    (GNU_INLINE2,       "__inline",          INLINE_KW),
+
+    // ---- Noreturn (NORETURN_KW) ----
+    (NORETURN,          "_Noreturn",         NORETURN_KW),
+    (GNU_NORETURN,      "__noreturn__",      NORETURN_KW | SUPPORTED_ATTR),
+
+    // ---- Attribute keyword (ATTR_KW) ----
+    (GNU_ATTRIBUTE,     "__attribute__",     ATTR_KW),
+    (GNU_ATTRIBUTE2,    "__attribute",       ATTR_KW),
+
+    // ---- Asm keyword (ASM_KW) ----
+    (ASM,               "asm",               ASM_KW),
+    (GNU_ASM,           "__asm__",           ASM_KW),
+    (GNU_ASM2,          "__asm",             ASM_KW),
+
+    // ---- Static assert (ASSERT_KW) ----
+    (_,                 "_Static_assert",    ASSERT_KW),
+    (_,                 "static_assert",     ASSERT_KW),
+
+    // ---- Alignas (ALIGNAS_KW) ----
+    (ALIGNAS,           "_Alignas",          ALIGNAS_KW),
+
+    // ---- Nullability qualifiers (NULLABILITY) ----
+    (_,                 "_Nonnull",          NULLABILITY),
+    (_,                 "__nonnull",         NULLABILITY),
+    (_,                 "_Nullable",         NULLABILITY),
+    (_,                 "__nullable",        NULLABILITY),
+    (_,                 "_Null_unspecified",  NULLABILITY),
+    (_,                 "__null_unspecified", NULLABILITY),
+
+    // ---- Statement keywords (STMT_KW) ----
+    (IF,                "if",                STMT_KW),
+    (ELSE,              "else",              STMT_KW),
+    (WHILE,             "while",             STMT_KW),
+    (DO,                "do",                STMT_KW),
+    (FOR,               "for",               STMT_KW),
+    (RETURN,            "return",            STMT_KW),
+    (BREAK,             "break",             STMT_KW),
+    (CONTINUE,          "continue",          STMT_KW),
+    (GOTO,              "goto",              STMT_KW),
+    (SWITCH,            "switch",            STMT_KW),
+    (CASE,              "case",              STMT_KW),
+    (DEFAULT,           "default",           STMT_KW),
+
+    // ---- Sizeof / Alignof ----
+    (SIZEOF,            "sizeof",            0),
+    (ALIGNOF,           "_Alignof",          0),
+    (GNU_ALIGNOF,       "__alignof__",       0),
+    (GNU_ALIGNOF2,      "__alignof",         0),
+    (ALIGNOF_C23,       "alignof",           0),
+
+    // ---- Wide char prefix ----
+    (_,                 "L",                 0),
+
+    // ---- Preprocessor directives ----
+    (DEFINE,            "define",            0),
+    (UNDEF,             "undef",             0),
+    (IFDEF,             "ifdef",             0),
+    (IFNDEF,            "ifndef",            0),
+    (ELIF,              "elif",              0),
+    (ENDIF,             "endif",             0),
+    (INCLUDE,           "include",           0),
+    (INCLUDE_NEXT,      "include_next",      0),
+    (PP_ERROR,          "error",             0),
+    (WARNING,           "warning",           0),
+    (PRAGMA,            "pragma",            0),
+    (LINE,              "line",              0),
+
+    // ---- Preprocessor special names ----
+    (_,                 "defined",           0),
+    (_,                 "__VA_ARGS__",       0),
+    (_,                 "once",              0),
+
+    // ---- Predefined identifiers ----
+    (FUNC,              "__func__",          0),
+    (FUNCTION,          "__FUNCTION__",      0),
+    (PRETTY_FUNCTION,   "__PRETTY_FUNCTION__", 0),
+
+    // ---- Builtins (BUILTIN) ----
+    (BUILTIN_VA_START,  "__builtin_va_start", BUILTIN),
+    (BUILTIN_VA_END,    "__builtin_va_end",   BUILTIN),
+    (BUILTIN_VA_ARG,    "__builtin_va_arg",   BUILTIN),
+    (BUILTIN_VA_COPY,   "__builtin_va_copy",  BUILTIN),
+    (BUILTIN_BSWAP16,   "__builtin_bswap16",  BUILTIN),
+    (BUILTIN_BSWAP32,   "__builtin_bswap32",  BUILTIN),
+    (BUILTIN_BSWAP64,   "__builtin_bswap64",  BUILTIN),
+    (BUILTIN_CTZ,       "__builtin_ctz",      BUILTIN),
+    (BUILTIN_CTZL,      "__builtin_ctzl",     BUILTIN),
+    (BUILTIN_CTZLL,     "__builtin_ctzll",    BUILTIN),
+    (BUILTIN_CLZ,       "__builtin_clz",      BUILTIN),
+    (BUILTIN_CLZL,      "__builtin_clzl",     BUILTIN),
+    (BUILTIN_CLZLL,     "__builtin_clzll",    BUILTIN),
+    (BUILTIN_POPCOUNT,  "__builtin_popcount", BUILTIN),
+    (BUILTIN_POPCOUNTL, "__builtin_popcountl", BUILTIN),
+    (BUILTIN_POPCOUNTLL, "__builtin_popcountll", BUILTIN),
+    (BUILTIN_ALLOCA,    "__builtin_alloca",   BUILTIN),
+    (BUILTIN_MEMSET,    "__builtin_memset",   BUILTIN),
+    (BUILTIN_MEMCPY,    "__builtin_memcpy",   BUILTIN),
+    (BUILTIN_MEMMOVE,   "__builtin_memmove",  BUILTIN),
+    (BUILTIN_CONSTANT_P, "__builtin_constant_p", BUILTIN),
+    (BUILTIN_TYPES_COMPATIBLE_P, "__builtin_types_compatible_p", BUILTIN),
+    (BUILTIN_UNREACHABLE, "__builtin_unreachable", BUILTIN),
+    (BUILTIN_OFFSETOF,  "__builtin_offsetof", BUILTIN),
+    (OFFSETOF,          "offsetof",           BUILTIN),
+    (BUILTIN_INF,       "__builtin_inf",      BUILTIN),
+    (BUILTIN_INFF,      "__builtin_inff",     BUILTIN),
+    (BUILTIN_INFL,      "__builtin_infl",     BUILTIN),
+    (BUILTIN_HUGE_VAL,  "__builtin_huge_val", BUILTIN),
+    (BUILTIN_HUGE_VALF, "__builtin_huge_valf", BUILTIN),
+    (BUILTIN_HUGE_VALL, "__builtin_huge_vall", BUILTIN),
+    (BUILTIN_FABS,      "__builtin_fabs",     BUILTIN),
+    (BUILTIN_FABSF,     "__builtin_fabsf",    BUILTIN),
+    (BUILTIN_FABSL,     "__builtin_fabsl",    BUILTIN),
+    (BUILTIN_SIGNBIT,   "__builtin_signbit",  BUILTIN),
+    (BUILTIN_SIGNBITF,  "__builtin_signbitf", BUILTIN),
+    (BUILTIN_SIGNBITL,  "__builtin_signbitl", BUILTIN),
+    (BUILTIN_NAN,       "__builtin_nan",      BUILTIN),
+    (BUILTIN_NANF,      "__builtin_nanf",     BUILTIN),
+    (BUILTIN_NANL,      "__builtin_nanl",     BUILTIN),
+    (BUILTIN_NANS,      "__builtin_nans",     BUILTIN),
+    (BUILTIN_NANSF,     "__builtin_nansf",    BUILTIN),
+    (BUILTIN_NANSL,     "__builtin_nansl",    BUILTIN),
+    (BUILTIN_EXPECT,    "__builtin_expect",   BUILTIN),
+    (BUILTIN_ASSUME_ALIGNED, "__builtin_assume_aligned", BUILTIN),
+    (BUILTIN_PREFETCH,  "__builtin_prefetch", BUILTIN),
+    (BUILTIN_FLT_ROUNDS, "__builtin_flt_rounds", BUILTIN),
+    (BUILTIN_FRAME_ADDRESS, "__builtin_frame_address", BUILTIN),
+    (BUILTIN_RETURN_ADDRESS, "__builtin_return_address", BUILTIN),
+    (BUILTIN_OBJECT_SIZE, "__builtin_object_size", BUILTIN),
+    (_, "__builtin___snprintf_chk", BUILTIN),
+    (_, "__builtin___vsnprintf_chk", BUILTIN),
+    (_, "__builtin___sprintf_chk", BUILTIN),
+    (_, "__builtin___fprintf_chk", BUILTIN),
+    (_, "__builtin___printf_chk", BUILTIN),
+    (_, "__builtin___memcpy_chk", BUILTIN),
+    (_, "__builtin___memmove_chk", BUILTIN),
+    (_, "__builtin___memset_chk", BUILTIN),
+    (_, "__builtin___stpcpy_chk", BUILTIN),
+    (_, "__builtin___strcat_chk", BUILTIN),
+    (_, "__builtin___strcpy_chk", BUILTIN),
+    (_, "__builtin___strncat_chk", BUILTIN),
+    (_, "__builtin___strncpy_chk", BUILTIN),
+
+    // ---- C11 atomic builtins (BUILTIN) ----
+    (C11_ATOMIC_INIT,    "__c11_atomic_init",    BUILTIN),
+    (C11_ATOMIC_LOAD,    "__c11_atomic_load",    BUILTIN),
+    (C11_ATOMIC_STORE,   "__c11_atomic_store",   BUILTIN),
+    (C11_ATOMIC_EXCHANGE, "__c11_atomic_exchange", BUILTIN),
+    (C11_ATOMIC_COMPARE_EXCHANGE_STRONG, "__c11_atomic_compare_exchange_strong", BUILTIN),
+    (C11_ATOMIC_COMPARE_EXCHANGE_WEAK, "__c11_atomic_compare_exchange_weak", BUILTIN),
+    (C11_ATOMIC_FETCH_ADD, "__c11_atomic_fetch_add", BUILTIN),
+    (C11_ATOMIC_FETCH_SUB, "__c11_atomic_fetch_sub", BUILTIN),
+    (C11_ATOMIC_FETCH_AND, "__c11_atomic_fetch_and", BUILTIN),
+    (C11_ATOMIC_FETCH_OR,  "__c11_atomic_fetch_or",  BUILTIN),
+    (C11_ATOMIC_FETCH_XOR, "__c11_atomic_fetch_xor", BUILTIN),
+    (C11_ATOMIC_THREAD_FENCE, "__c11_atomic_thread_fence", BUILTIN),
+    (C11_ATOMIC_SIGNAL_FENCE, "__c11_atomic_signal_fence", BUILTIN),
+
+    // ---- setjmp/longjmp (special-cased in parser, not true builtins) ----
+    (SETJMP,            "setjmp",            0),
+    (SETJMP2,           "_setjmp",           0),
+    (LONGJMP,           "longjmp",           0),
+    (LONGJMP2,          "_longjmp",          0),
+
+    // ---- Supported attribute names (SUPPORTED_ATTR) ----
+    // Plain forms
+    (_, "noreturn",             SUPPORTED_ATTR),
+    (_, "unused",               SUPPORTED_ATTR),
+    (_, "aligned",              SUPPORTED_ATTR),
+    (_, "packed",               SUPPORTED_ATTR),
+    (_, "deprecated",           SUPPORTED_ATTR),
+    (_, "weak",                 SUPPORTED_ATTR),
+    (_, "section",              SUPPORTED_ATTR),
+    (_, "visibility",           SUPPORTED_ATTR),
+    (_, "constructor",          SUPPORTED_ATTR),
+    (_, "destructor",           SUPPORTED_ATTR),
+    (_, "used",                 SUPPORTED_ATTR),
+    (_, "noinline",             SUPPORTED_ATTR),
+    (_, "always_inline",        SUPPORTED_ATTR),
+    (_, "hot",                  SUPPORTED_ATTR),
+    (_, "cold",                 SUPPORTED_ATTR),
+    (_, "warn_unused_result",   SUPPORTED_ATTR),
+    (_, "format",               SUPPORTED_ATTR),
+    (_, "fallthrough",          SUPPORTED_ATTR),
+    (_, "nonstring",            SUPPORTED_ATTR),
+    (_, "malloc",               SUPPORTED_ATTR),
+    (_, "pure",                 SUPPORTED_ATTR),
+    (_, "sentinel",             SUPPORTED_ATTR),
+    (_, "no_sanitize_memory",   SUPPORTED_ATTR),
+    (_, "no_sanitize_address",  SUPPORTED_ATTR),
+    (_, "no_sanitize_thread",   SUPPORTED_ATTR),
+    // GNU forms (__foo__)
+    // Note: __noreturn__ is already defined above with NORETURN_KW | SUPPORTED_ATTR
+    (_, "__unused__",           SUPPORTED_ATTR),
+    (_, "__aligned__",          SUPPORTED_ATTR),
+    (_, "__packed__",           SUPPORTED_ATTR),
+    (_, "__deprecated__",       SUPPORTED_ATTR),
+    (_, "__weak__",             SUPPORTED_ATTR),
+    (_, "__section__",          SUPPORTED_ATTR),
+    (_, "__visibility__",       SUPPORTED_ATTR),
+    (_, "__constructor__",      SUPPORTED_ATTR),
+    (_, "__destructor__",       SUPPORTED_ATTR),
+    (_, "__used__",             SUPPORTED_ATTR),
+    (_, "__noinline__",         SUPPORTED_ATTR),
+    (_, "__always_inline__",    SUPPORTED_ATTR),
+    (_, "__hot__",              SUPPORTED_ATTR),
+    (_, "__cold__",             SUPPORTED_ATTR),
+    (_, "__warn_unused_result__", SUPPORTED_ATTR),
+    (_, "__format__",           SUPPORTED_ATTR),
+    (_, "__fallthrough__",      SUPPORTED_ATTR),
+    (_, "__nonstring__",        SUPPORTED_ATTR),
+    (_, "__malloc__",           SUPPORTED_ATTR),
+    (_, "__pure__",             SUPPORTED_ATTR),
+    (_, "__sentinel__",         SUPPORTED_ATTR),
+}
+
+// ============================================================================
+// Tag query API
+// ============================================================================
+
+/// Check if a StringId has any of the given tag bits set.
+/// Returns false for non-keyword IDs (dynamic strings interned after keywords).
+#[inline]
+pub fn has_tag(id: StringId, mask: u32) -> bool {
+    let idx = id.0 as usize;
+    idx > 0 && idx <= KEYWORD_COUNT && KEYWORD_TAGS[idx - 1] & mask != 0
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::strings::StringTable;
+    use std::collections::HashSet;
+
+    /// Look up a pre-interned keyword by string, panicking if not found.
+    fn id(table: &StringTable, s: &str) -> StringId {
+        table
+            .lookup(s)
+            .unwrap_or_else(|| panic!("keyword '{}' not interned", s))
+    }
+
+    #[test]
+    fn test_keyword_ids_deterministic() {
+        let table = StringTable::new();
+        assert_eq!(table.get(VOID), "void");
+        assert_eq!(table.get(CHAR), "char");
+        assert_eq!(table.get(INT), "int");
+        assert_eq!(table.get(STATIC), "static");
+        assert_eq!(table.get(CONST), "const");
+        assert_eq!(table.get(INLINE), "inline");
+        assert_eq!(table.get(IF), "if");
+        assert_eq!(table.get(RETURN), "return");
+        assert_eq!(table.get(BUILTIN_VA_START), "__builtin_va_start");
+        assert_eq!(table.get(C11_ATOMIC_LOAD), "__c11_atomic_load");
+        // Anonymous entries verified via lookup
+        assert!(table.lookup("noreturn").is_some());
+        assert!(table.lookup("__packed__").is_some());
+    }
+
+    #[test]
+    fn test_no_duplicate_strings() {
+        let mut seen = HashSet::new();
+        for (i, &s) in KEYWORD_STRINGS.iter().enumerate() {
+            assert!(
+                seen.insert(s),
+                "duplicate keyword string '{}' at index {}",
+                s,
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_type_spec() {
+        for &s in &[
+            "void",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "signed",
+            "unsigned",
+            "_Bool",
+            "_Complex",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "struct",
+            "union",
+            "enum",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+            "_Atomic",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, TYPE_SPEC), "'{}' should have TYPE_SPEC", s);
+        }
+    }
+
+    #[test]
+    fn test_tags_qualifier() {
+        for &s in &[
+            "const",
+            "volatile",
+            "restrict",
+            "_Atomic",
+            "__const__",
+            "__const",
+            "__volatile__",
+            "__volatile",
+            "__restrict__",
+            "__restrict",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, QUALIFIER), "'{}' should have QUALIFIER", s);
+        }
+    }
+
+    #[test]
+    fn test_tags_type_keyword() {
+        for &s in &[
+            "void",
+            "_Bool",
+            "_Complex",
+            "_Atomic",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "signed",
+            "unsigned",
+            "const",
+            "volatile",
+            "struct",
+            "union",
+            "enum",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(
+                has_tag(sid, TYPE_KEYWORD),
+                "'{}' should have TYPE_KEYWORD",
+                s
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_decl_start() {
+        for &s in &[
+            "void",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "_Complex",
+            "_Atomic",
+            "_Alignas",
+            "signed",
+            "unsigned",
+            "const",
+            "volatile",
+            "static",
+            "extern",
+            "auto",
+            "register",
+            "typedef",
+            "inline",
+            "__inline",
+            "__inline__",
+            "_Noreturn",
+            "__noreturn__",
+            "struct",
+            "union",
+            "enum",
+            "_Bool",
+            "__attribute__",
+            "__attribute",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+            "_Thread_local",
+            "__thread",
+            "_Static_assert",
+            "static_assert",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, DECL_START), "'{}' should have DECL_START", s);
+        }
+    }
+
+    #[test]
+    fn test_tags_nullability() {
+        for &s in &[
+            "_Nonnull",
+            "__nonnull",
+            "_Nullable",
+            "__nullable",
+            "_Null_unspecified",
+            "__null_unspecified",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, NULLABILITY), "'{}' should have NULLABILITY", s);
+        }
+    }
+
+    #[test]
+    fn test_tags_builtin() {
+        // Spot-check some builtins (named constants)
+        let builtins = [
+            BUILTIN_VA_START,
+            BUILTIN_VA_END,
+            BUILTIN_VA_ARG,
+            BUILTIN_VA_COPY,
+            BUILTIN_BSWAP16,
+            BUILTIN_MEMCPY,
+            BUILTIN_UNREACHABLE,
+            BUILTIN_EXPECT,
+            BUILTIN_VA_LIST,
+            OFFSETOF,
+            BUILTIN_OBJECT_SIZE,
+            C11_ATOMIC_LOAD,
+            C11_ATOMIC_STORE,
+            C11_ATOMIC_EXCHANGE,
+        ];
+        for &bid in &builtins {
+            assert!(
+                has_tag(bid, BUILTIN),
+                "'{}' should have BUILTIN",
+                KEYWORD_STRINGS[bid.0 as usize - 1]
+            );
+        }
+        // Count total builtins
+        let builtin_count = KEYWORD_TAGS.iter().filter(|&&t| t & BUILTIN != 0).count();
+        assert!(
+            builtin_count >= 68,
+            "expected at least 68 builtins, got {}",
+            builtin_count
+        );
+    }
+
+    #[test]
+    fn test_tags_supported_attr() {
+        for &s in &[
+            "noreturn",
+            "__noreturn__",
+            "unused",
+            "__unused__",
+            "aligned",
+            "__aligned__",
+            "packed",
+            "__packed__",
+            "always_inline",
+            "__always_inline__",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(
+                has_tag(sid, SUPPORTED_ATTR),
+                "'{}' should have SUPPORTED_ATTR",
+                s
+            );
+        }
+    }
+
+    #[test]
+    fn test_has_tag_returns_false_for_dynamic() {
+        assert!(!has_tag(StringId(9999), TYPE_SPEC));
+        assert!(!has_tag(StringId(9999), BUILTIN));
+        assert!(!has_tag(StringId(9999), DECL_START));
+    }
+
+    #[test]
+    fn test_has_tag_returns_false_for_empty() {
+        assert!(!has_tag(StringId::EMPTY, TYPE_SPEC));
+        assert!(!has_tag(StringId::EMPTY, BUILTIN));
+        assert!(!has_tag(StringId::EMPTY, DECL_START));
+    }
+}
diff --git a/cc/lib.rs b/cc/lib.rs
index f8b663ae..4c382964 100644
--- a/cc/lib.rs
+++ b/cc/lib.rs
@@ -12,12 +12,15 @@
 // for use by other crates (cflow, ctags, cxref).
 //
 
+#![recursion_limit = "512"]
+
 pub mod abi;
 pub mod arch;
 pub mod builtin_headers;
 pub mod builtins;
 pub mod diag;
 pub mod ir;
+pub mod kw;
 pub mod opt;
 pub mod os;
 pub mod parse;
diff --git a/cc/main.rs b/cc/main.rs
index 30dc7dcc..44635767 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -9,12 +9,15 @@
 // pcc - A POSIX C99 compiler
 //
 
+#![recursion_limit = "512"]
+
 mod abi;
 mod arch;
 mod builtin_headers;
 mod builtins;
 mod diag;
 mod ir;
+mod kw;
 mod opt;
 mod os;
 mod parse;
@@ -91,9 +94,16 @@ struct Args {
     #[arg(long = "dump-ast", help = gettext("Parse and dump AST to stdout"))]
     dump_ast: bool,
 
-    /// Dump IR (for debugging linearizer)
-    #[arg(long = "dump-ir", help = gettext("Linearize and dump IR to stdout"))]
-    dump_ir: bool,
+    /// Dump IR at a named stage (for debugging)
+    /// Stages: post-linearize, post-mapping, post-opt, post-lower, all
+    /// Bare --dump-ir = post-opt (backward compat)
+    #[arg(long = "dump-ir", value_name = "stage", default_missing_value = "post-opt",
+          num_args = 0..=1, help = gettext("Dump IR at stage (post-linearize, post-mapping, post-opt, post-lower, all)"))]
+    dump_ir: Option<String>,
+
+    /// Filter IR dumps to a specific function name
+    #[arg(long = "dump-ir-func", value_name = "name", help = gettext("Only dump IR for this function"))]
+    dump_ir_func: Option<String>,
 
     /// Verbose output (include position info)
     #[arg(
@@ -212,6 +222,55 @@ struct Args {
     unsupported_mflags: Vec<String>,
 }
 
+/// Valid stage names for --dump-ir.
+const DUMP_IR_STAGES: &[&str] = &[
+    "post-linearize",
+    "post-mapping",
+    "post-opt",
+    "post-lower",
+    "all",
+];
+
+/// Validate --dump-ir stage name. Returns error message if invalid.
+fn validate_dump_ir_stage(stage: &str) -> Result<(), String> {
+    if DUMP_IR_STAGES.contains(&stage) {
+        Ok(())
+    } else {
+        Err(format!(
+            "unknown --dump-ir stage '{}'. Valid stages: {}",
+            stage,
+            DUMP_IR_STAGES.join(", ")
+        ))
+    }
+}
+
+/// Check if IR should be dumped at the given stage.
+fn should_dump_ir(args: &Args, stage: &str) -> bool {
+    match args.dump_ir.as_deref() {
+        Some("all") => true,
+        Some(s) => s == stage,
+        None => false,
+    }
+}
+
+/// Dump IR at a named pipeline stage.
+fn dump_ir(args: &Args, module: &ir::Module, stage: &str) {
+    if !should_dump_ir(args, stage) {
+        return;
+    }
+    eprintln!("=== {} ===", stage);
+    match &args.dump_ir_func {
+        Some(name) => {
+            for func in &module.functions {
+                if func.name == *name {
+                    print!("{}", func);
+                }
+            }
+        }
+        None => print!("{}", module),
+    }
+}
+
 /// Print compilation statistics for capacity tuning
 fn print_stats(
     path: &str,
@@ -405,6 +464,12 @@ fn process_file(
         ));
     }
 
+    if let Some(stage) = &args.dump_ir {
+        if let Err(msg) = validate_dump_ir_stage(stage) {
+            return Err(io::Error::new(io::ErrorKind::InvalidInput, msg));
+        }
+    }
+
     if args.dump_ast {
         println!("{:#?}", ast);
         return Ok(());
@@ -433,19 +498,33 @@ fn process_file(
         .ok()
         .map(|p| p.to_string_lossy().to_string());
 
+    dump_ir(args, &module, "post-linearize");
+
+    // Hardware mapping pass — centralized target-specific lowering decisions
+    arch::mapping::run_mapping(&mut module, &types, target);
+
+    dump_ir(args, &module, "post-mapping");
+
     // Optimize IR (if enabled)
     if args.opt_level > 0 {
         opt::optimize_module(&mut module, args.opt_level);
     }
 
-    if args.dump_ir {
-        print!("{}", module);
+    dump_ir(args, &module, "post-opt");
+
+    if args.dump_ir.is_some() && !should_dump_ir(args, "post-lower") {
         return Ok(());
     }
 
     // Lower IR (phi elimination, etc.)
     ir::lower::lower_module(&mut module);
 
+    dump_ir(args, &module, "post-lower");
+
+    if args.dump_ir.is_some() {
+        return Ok(());
+    }
+
     // Generate assembly
     let emit_unwind_tables = !args.no_unwind_tables;
     let pie_mode = pie_enabled(args, target);
diff --git a/cc/parse/expression.rs b/cc/parse/expression.rs
index 561305f4..fd97274b 100644
--- a/cc/parse/expression.rs
+++ b/cc/parse/expression.rs
@@ -687,16 +687,18 @@ impl<'a> Parser<'a> {
 
         // sizeof and _Alignof
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "sizeof" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::SIZEOF {
                     self.advance();
                     return self.parse_sizeof();
                 }
-                if name == "_Alignof"
-                    || name == "__alignof__"
-                    || name == "__alignof"
-                    || name == "alignof"
-                {
+                if matches!(
+                    name_id,
+                    crate::kw::ALIGNOF
+                        | crate::kw::GNU_ALIGNOF
+                        | crate::kw::GNU_ALIGNOF2
+                        | crate::kw::ALIGNOF_C23
+                ) {
                     self.advance();
                     return self.parse_alignof();
                 }
@@ -780,37 +782,8 @@ impl<'a> Parser<'a> {
     }
 
     /// Check if identifier is a type-starting keyword (for cast/sizeof disambiguation)
-    pub(crate) fn is_type_keyword(name: &str) -> bool {
-        matches!(
-            name,
-            "void"
-                | "_Bool"
-                | "_Complex"
-                | "_Atomic"
-                | "char"
-                | "short"
-                | "int"
-                | "long"
-                | "float"
-                | "double"
-                | "_Float16"
-                | "_Float32"
-                | "_Float64"
-                | "signed"
-                | "unsigned"
-                | "const"
-                | "volatile"
-                | "struct"
-                | "union"
-                | "enum"
-                | "__int128"
-                | "__int128_t"
-                | "__uint128_t"
-                | "__builtin_va_list"
-                | "typeof"
-                | "__typeof__"
-                | "__typeof"
-        )
+    pub(crate) fn is_type_keyword(id: crate::strings::StringId) -> bool {
+        crate::kw::has_tag(id, crate::kw::TYPE_KEYWORD)
     }
 
     /// Consume type qualifiers (const, volatile, restrict)
@@ -823,25 +796,24 @@ impl<'a> Parser<'a> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-            match name {
-                "const" | "__const" | "__const__" => {
+            match name_id {
+                crate::kw::CONST | crate::kw::GNU_CONST2 | crate::kw::GNU_CONST => {
                     self.advance();
                     mods |= TypeModifiers::CONST;
                 }
-                "volatile" | "__volatile" | "__volatile__" => {
+                crate::kw::VOLATILE | crate::kw::GNU_VOLATILE2 | crate::kw::GNU_VOLATILE => {
                     self.advance();
                     mods |= TypeModifiers::VOLATILE;
                 }
-                "restrict" | "__restrict" | "__restrict__" => {
+                crate::kw::RESTRICT | crate::kw::GNU_RESTRICT2 | crate::kw::GNU_RESTRICT => {
                     self.advance();
                     mods |= TypeModifiers::RESTRICT;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     mods |= TypeModifiers::ATOMIC;
                 }
-                n if super::is_nullability_qualifier(n) => {
+                _ if super::is_nullability_qualifier(name_id) => {
                     self.advance();
                 }
                 _ => break,
@@ -891,8 +863,7 @@ impl<'a> Parser<'a> {
 
         // Check if this looks like a type name (keyword or typedef)
         let name_id = self.get_ident_id(self.current())?;
-        let name = self.str(name_id);
-        if !Self::is_type_keyword(name) && self.symbols.lookup_typedef(name_id).is_none() {
+        if !Self::is_type_keyword(name_id) && self.symbols.lookup_typedef(name_id).is_none() {
             // Not a type keyword and not a typedef
             return None;
         }
@@ -914,35 +885,33 @@ impl<'a> Parser<'a> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-
-            match name {
-                "const" => {
+            match name_id {
+                crate::kw::CONST => {
                     self.advance();
                     modifiers |= TypeModifiers::CONST;
                     parsed_something = true;
                 }
-                "volatile" => {
+                crate::kw::VOLATILE => {
                     self.advance();
                     modifiers |= TypeModifiers::VOLATILE;
                     parsed_something = true;
                 }
-                "signed" => {
+                crate::kw::SIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::SIGNED;
                     parsed_something = true;
                 }
-                "unsigned" => {
+                crate::kw::UNSIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     parsed_something = true;
                 }
-                "_Complex" => {
+                crate::kw::COMPLEX => {
                     self.advance();
                     modifiers |= TypeModifiers::COMPLEX;
                     parsed_something = true;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     // _Atomic can be:
                     // 1. Type specifier: _Atomic(type-name)
@@ -971,7 +940,7 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "short" => {
+                crate::kw::SHORT => {
                     self.advance();
                     modifiers |= TypeModifiers::SHORT;
                     if base_kind.is_none() {
@@ -979,7 +948,7 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "long" => {
+                crate::kw::LONG => {
                     self.advance();
                     if modifiers.contains(TypeModifiers::LONG) {
                         modifiers |= TypeModifiers::LONGLONG;
@@ -995,17 +964,17 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "void" => {
+                crate::kw::VOID => {
                     self.advance();
                     base_kind = Some(TypeKind::Void);
                     parsed_something = true;
                 }
-                "char" => {
+                crate::kw::CHAR => {
                     self.advance();
                     base_kind = Some(TypeKind::Char);
                     parsed_something = true;
                 }
-                "int" => {
+                crate::kw::INT => {
                     self.advance();
                     if base_kind.is_none()
                         || !matches!(
@@ -1017,12 +986,12 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "float" => {
+                crate::kw::FLOAT => {
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                     parsed_something = true;
                 }
-                "double" => {
+                crate::kw::DOUBLE => {
                     self.advance();
                     // Handle long double
                     if modifiers.contains(TypeModifiers::LONG) {
@@ -1032,50 +1001,50 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "_Float16" => {
+                crate::kw::FLOAT16 => {
                     self.advance();
                     base_kind = Some(TypeKind::Float16);
                     parsed_something = true;
                 }
-                "_Float32" => {
+                crate::kw::FLOAT32 => {
                     // _Float32 is an alias for float (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                     parsed_something = true;
                 }
-                "_Float64" => {
+                crate::kw::FLOAT64 => {
                     // _Float64 is an alias for double (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Double);
                     parsed_something = true;
                 }
-                "_Bool" => {
+                crate::kw::BOOL => {
                     self.advance();
                     base_kind = Some(TypeKind::Bool);
                     parsed_something = true;
                 }
-                "__int128" => {
+                crate::kw::INT128 => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__int128_t" => {
+                crate::kw::INT128_T => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__uint128_t" => {
+                crate::kw::UINT128_T => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__builtin_va_list" => {
+                crate::kw::BUILTIN_VA_LIST => {
                     self.advance();
                     base_kind = Some(TypeKind::VaList);
                     parsed_something = true;
                 }
-                "typeof" | "__typeof__" | "__typeof" => {
+                crate::kw::TYPEOF | crate::kw::GNU_TYPEOF | crate::kw::GNU_TYPEOF2 => {
                     self.advance(); // consume typeof
                     if !self.is_special(b'(') {
                         return None;
@@ -1105,7 +1074,7 @@ impl<'a> Parser<'a> {
                     let expr_type = expr.typ.unwrap_or(self.types.int_id);
                     return Some(self.parse_pointer_chain(expr_type));
                 }
-                "struct" => {
+                crate::kw::STRUCT => {
                     self.advance(); // consume 'struct'
                                     // For struct tag reference, look up directly in symbol table
                     if let Some(tag_name) = self.get_ident_id(self.current()) {
@@ -1168,7 +1137,7 @@ impl<'a> Parser<'a> {
                     }
                     return None;
                 }
-                "union" => {
+                crate::kw::UNION => {
                     self.advance(); // consume 'union'
                                     // For union tag reference, look up directly in symbol table
                     if let Some(tag_name) = self.get_ident_id(self.current()) {
@@ -1231,7 +1200,7 @@ impl<'a> Parser<'a> {
                     }
                     return None;
                 }
-                "enum" => {
+                crate::kw::ENUM => {
                     if let Ok(enum_type) = self.parse_enum_specifier() {
                         let mut typ = enum_type;
                         typ.modifiers |= modifiers | self.consume_type_qualifiers();
@@ -1779,11 +1748,10 @@ impl<'a> Parser<'a> {
                 let token_pos = token.pos;
                 if let TokenValue::Ident(id) = &token.value {
                     let name_id = *id;
-                    let name_str = self.idents.get_opt(name_id).unwrap_or("");
 
                     // Check for varargs builtins that need special parsing
-                    match name_str {
-                        "__builtin_va_start" => {
+                    match name_id {
+                        crate::kw::BUILTIN_VA_START => {
                             // __builtin_va_start(ap, last_param)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1800,7 +1768,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_arg" => {
+                        crate::kw::BUILTIN_VA_ARG => {
                             // __builtin_va_arg(ap, type)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1817,7 +1785,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_end" => {
+                        crate::kw::BUILTIN_VA_END => {
                             // __builtin_va_end(ap)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1828,7 +1796,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_copy" => {
+                        crate::kw::BUILTIN_VA_COPY => {
                             // __builtin_va_copy(dest, src)
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -1844,7 +1812,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap16" => {
+                        crate::kw::BUILTIN_BSWAP16 => {
                             // __builtin_bswap16(x) - returns uint16_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1855,7 +1823,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap32" => {
+                        crate::kw::BUILTIN_BSWAP32 => {
                             // __builtin_bswap32(x) - returns uint32_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1866,7 +1834,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap64" => {
+                        crate::kw::BUILTIN_BSWAP64 => {
                             // __builtin_bswap64(x) - returns uint64_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1877,7 +1845,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctz" => {
+                        crate::kw::BUILTIN_CTZ => {
                             // __builtin_ctz(x) - returns int, counts trailing zeros in unsigned int
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1889,7 +1857,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctzl" => {
+                        crate::kw::BUILTIN_CTZL => {
                             // __builtin_ctzl(x) - returns int, counts trailing zeros in unsigned long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1901,7 +1869,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctzll" => {
+                        crate::kw::BUILTIN_CTZLL => {
                             // __builtin_ctzll(x) - returns int, counts trailing zeros in unsigned long long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1913,7 +1881,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clz" => {
+                        crate::kw::BUILTIN_CLZ => {
                             // __builtin_clz(x) - returns int, counts leading zeros in unsigned int
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1925,7 +1893,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clzl" => {
+                        crate::kw::BUILTIN_CLZL => {
                             // __builtin_clzl(x) - returns int, counts leading zeros in unsigned long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1937,7 +1905,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clzll" => {
+                        crate::kw::BUILTIN_CLZLL => {
                             // __builtin_clzll(x) - returns int, counts leading zeros in unsigned long long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1949,7 +1917,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcount" => {
+                        crate::kw::BUILTIN_POPCOUNT => {
                             // __builtin_popcount(x) - returns int, counts set bits in unsigned int
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1960,7 +1928,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcountl" => {
+                        crate::kw::BUILTIN_POPCOUNTL => {
                             // __builtin_popcountl(x) - returns int, counts set bits in unsigned long
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1971,7 +1939,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcountll" => {
+                        crate::kw::BUILTIN_POPCOUNTLL => {
                             // __builtin_popcountll(x) - returns int, counts set bits in unsigned long long
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1982,7 +1950,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_alloca" => {
+                        crate::kw::BUILTIN_ALLOCA => {
                             // __builtin_alloca(size) - returns void*
                             self.expect_special(b'(')?;
                             let size = self.parse_assignment_expr()?;
@@ -1996,7 +1964,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Memory builtins - generate calls to C library functions
-                        "__builtin_memset" => {
+                        crate::kw::BUILTIN_MEMSET => {
                             // __builtin_memset(dest, c, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2015,7 +1983,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_memcpy" => {
+                        crate::kw::BUILTIN_MEMCPY => {
                             // __builtin_memcpy(dest, src, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2034,7 +2002,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_memmove" => {
+                        crate::kw::BUILTIN_MEMMOVE => {
                             // __builtin_memmove(dest, src, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2054,7 +2022,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Infinity builtins - return float constants
-                        "__builtin_inf" | "__builtin_huge_val" => {
+                        crate::kw::BUILTIN_INF | crate::kw::BUILTIN_HUGE_VAL => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2063,7 +2031,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_inff" | "__builtin_huge_valf" => {
+                        crate::kw::BUILTIN_INFF | crate::kw::BUILTIN_HUGE_VALF => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2072,7 +2040,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_infl" | "__builtin_huge_vall" => {
+                        crate::kw::BUILTIN_INFL | crate::kw::BUILTIN_HUGE_VALL => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2083,7 +2051,7 @@ impl<'a> Parser<'a> {
                         }
                         // NaN builtins - returns quiet NaN
                         // The string argument is typically empty "" for quiet NaN
-                        "__builtin_nan" | "__builtin_nans" => {
+                        crate::kw::BUILTIN_NAN | crate::kw::BUILTIN_NANS => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2093,7 +2061,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_nanf" | "__builtin_nansf" => {
+                        crate::kw::BUILTIN_NANF | crate::kw::BUILTIN_NANSF => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2103,7 +2071,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_nanl" | "__builtin_nansl" => {
+                        crate::kw::BUILTIN_NANL | crate::kw::BUILTIN_NANSL => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2114,7 +2082,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // FLT_ROUNDS - returns current rounding mode (1 = to nearest)
-                        "__builtin_flt_rounds" => {
+                        crate::kw::BUILTIN_FLT_ROUNDS => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2124,7 +2092,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Fabs builtins - absolute value for floats
-                        "__builtin_fabs" => {
+                        crate::kw::BUILTIN_FABS => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2134,7 +2102,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_fabsf" => {
+                        crate::kw::BUILTIN_FABSF => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2144,7 +2112,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_fabsl" => {
+                        crate::kw::BUILTIN_FABSL => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2155,7 +2123,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Signbit builtins - test sign bit of floats
-                        "__builtin_signbit" => {
+                        crate::kw::BUILTIN_SIGNBIT => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2165,7 +2133,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_signbitf" => {
+                        crate::kw::BUILTIN_SIGNBITF => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2175,7 +2143,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_signbitl" => {
+                        crate::kw::BUILTIN_SIGNBITL => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2185,7 +2153,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_unreachable" => {
+                        crate::kw::BUILTIN_UNREACHABLE => {
                             // __builtin_unreachable() - marks code as unreachable
                             // Takes no arguments, returns void
                             // Behavior is undefined if actually reached at runtime
@@ -2197,7 +2165,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_constant_p" => {
+                        crate::kw::BUILTIN_CONSTANT_P => {
                             // __builtin_constant_p(expr) - returns 1 if expr is a constant, 0 otherwise
                             // This is evaluated at compile time, not runtime
                             self.expect_special(b'(')?;
@@ -2211,7 +2179,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_expect" => {
+                        crate::kw::BUILTIN_EXPECT => {
                             // __builtin_expect(expr, c) - branch prediction hint
                             // Returns expr, the second argument is the expected value (for optimization hints)
                             // We just return expr since we don't do branch prediction optimization
@@ -2222,7 +2190,7 @@ impl<'a> Parser<'a> {
                             self.expect_special(b')')?;
                             return Ok(expr);
                         }
-                        "__builtin_assume_aligned" => {
+                        crate::kw::BUILTIN_ASSUME_ALIGNED => {
                             // __builtin_assume_aligned(ptr, align) or
                             // __builtin_assume_aligned(ptr, align, offset)
                             // Returns ptr, hints that ptr is aligned to align bytes
@@ -2239,7 +2207,7 @@ impl<'a> Parser<'a> {
                             self.expect_special(b')')?;
                             return Ok(ptr);
                         }
-                        "__builtin_prefetch" => {
+                        crate::kw::BUILTIN_PREFETCH => {
                             // __builtin_prefetch(addr) or
                             // __builtin_prefetch(addr, rw) or
                             // __builtin_prefetch(addr, rw, locality)
@@ -2264,7 +2232,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_types_compatible_p" => {
+                        crate::kw::BUILTIN_TYPES_COMPATIBLE_P => {
                             // __builtin_types_compatible_p(type1, type2) - returns 1 if types are compatible
                             // This is evaluated at compile time, ignoring top-level qualifiers
                             self.expect_special(b'(')?;
@@ -2280,7 +2248,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_frame_address" => {
+                        crate::kw::BUILTIN_FRAME_ADDRESS => {
                             // __builtin_frame_address(level) - returns void*, address of frame at level
                             // Level 0 is the current frame, 1 is the caller's frame, etc.
                             // Returns NULL for invalid levels (beyond stack bounds)
@@ -2295,7 +2263,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_return_address" => {
+                        crate::kw::BUILTIN_RETURN_ADDRESS => {
                             // __builtin_return_address(level) - returns void*, return address at level
                             // Level 0 is the current function's return address
                             // Returns NULL for invalid levels (beyond stack bounds)
@@ -2310,7 +2278,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "setjmp" | "_setjmp" => {
+                        crate::kw::SETJMP | crate::kw::SETJMP2 => {
                             // setjmp(env) - saves execution context, returns int
                             // Returns 0 on direct call, non-zero when returning via longjmp
                             self.expect_special(b'(')?;
@@ -2322,7 +2290,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "longjmp" | "_longjmp" => {
+                        crate::kw::LONGJMP | crate::kw::LONGJMP2 => {
                             // longjmp(env, val) - restores execution context (never returns)
                             // Causes corresponding setjmp to return val (or 1 if val == 0)
                             self.expect_special(b'(')?;
@@ -2339,7 +2307,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_offsetof" | "offsetof" => {
+                        crate::kw::BUILTIN_OFFSETOF | crate::kw::OFFSETOF => {
                             // __builtin_offsetof(type, member-designator)
                             // Returns the byte offset of a member within a struct/union
                             // member-designator can be .field or [index] chains
@@ -2395,7 +2363,7 @@ impl<'a> Parser<'a> {
                         // ================================================================
                         // Atomic builtins (Clang __c11_atomic_* for C11 stdatomic.h)
                         // ================================================================
-                        "__c11_atomic_init" => {
+                        crate::kw::C11_ATOMIC_INIT => {
                             // __c11_atomic_init(ptr, val) - initialize atomic (no ordering)
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2411,7 +2379,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_load" => {
+                        crate::kw::C11_ATOMIC_LOAD => {
                             // __c11_atomic_load(ptr, order) - returns *ptr atomically
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2431,7 +2399,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_store" => {
+                        crate::kw::C11_ATOMIC_STORE => {
                             // __c11_atomic_store(ptr, val, order) - *ptr = val atomically
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2450,7 +2418,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_exchange" => {
+                        crate::kw::C11_ATOMIC_EXCHANGE => {
                             // __c11_atomic_exchange(ptr, val, order) - swap and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2473,7 +2441,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_compare_exchange_strong" => {
+                        crate::kw::C11_ATOMIC_COMPARE_EXCHANGE_STRONG => {
                             // __c11_atomic_compare_exchange_strong(ptr, expected, desired, succ, fail)
                             // Note: fail_order is parsed but ignored (we use succ_order for both)
                             self.expect_special(b'(')?;
@@ -2499,7 +2467,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_compare_exchange_weak" => {
+                        crate::kw::C11_ATOMIC_COMPARE_EXCHANGE_WEAK => {
                             // __c11_atomic_compare_exchange_weak(ptr, expected, desired, succ, fail)
                             // Note: Implemented as strong (no spurious failures)
                             self.expect_special(b'(')?;
@@ -2525,7 +2493,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_add" => {
+                        crate::kw::C11_ATOMIC_FETCH_ADD => {
                             // __c11_atomic_fetch_add(ptr, val, order) - add and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2548,7 +2516,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_sub" => {
+                        crate::kw::C11_ATOMIC_FETCH_SUB => {
                             // __c11_atomic_fetch_sub(ptr, val, order) - subtract and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2571,7 +2539,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_and" => {
+                        crate::kw::C11_ATOMIC_FETCH_AND => {
                             // __c11_atomic_fetch_and(ptr, val, order) - AND and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2594,7 +2562,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_or" => {
+                        crate::kw::C11_ATOMIC_FETCH_OR => {
                             // __c11_atomic_fetch_or(ptr, val, order) - OR and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2617,7 +2585,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_xor" => {
+                        crate::kw::C11_ATOMIC_FETCH_XOR => {
                             // __c11_atomic_fetch_xor(ptr, val, order) - XOR and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2640,7 +2608,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_thread_fence" => {
+                        crate::kw::C11_ATOMIC_THREAD_FENCE => {
                             // __c11_atomic_thread_fence(order) - memory fence
                             self.expect_special(b'(')?;
                             let order = self.parse_assignment_expr()?;
@@ -2653,7 +2621,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_signal_fence" => {
+                        crate::kw::C11_ATOMIC_SIGNAL_FENCE => {
                             // __c11_atomic_signal_fence(order) - compiler barrier
                             self.expect_special(b'(')?;
                             let order = self.parse_assignment_expr()?;
@@ -2666,7 +2634,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_object_size" => {
+                        crate::kw::BUILTIN_OBJECT_SIZE => {
                             // __builtin_object_size(ptr, type) - returns (size_t)-1
                             // at compile time without optimization (conservative "don't know")
                             self.expect_special(b'(')?;
@@ -2680,63 +2648,70 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        name if name.starts_with("__builtin___") => {
-                            // Fortified builtins: __builtin___snprintf_chk etc.
-                            // Strip __builtin_ prefix → __snprintf_chk, which is a
-                            // real libc function (declared by macOS/glibc headers).
-                            let real_name = &name["__builtin_".len()..];
-                            // Parse arguments first (must consume tokens regardless)
-                            self.expect_special(b'(')?;
-                            let mut args = Vec::new();
-                            if !self.is_special(b')') {
-                                args.push(self.parse_assignment_expr()?);
-                                while self.is_special(b',') {
-                                    self.advance();
+                        _ => {
+                            let name_str = self.idents.get_opt(name_id).unwrap_or("");
+                            if name_str.starts_with("__builtin___") {
+                                // Fortified builtins: __builtin___snprintf_chk etc.
+                                // Strip __builtin_ prefix → __snprintf_chk, which is a
+                                // real libc function (declared by macOS/glibc headers).
+                                let real_name = &name_str["__builtin_".len()..];
+                                // Parse arguments first (must consume tokens regardless)
+                                self.expect_special(b'(')?;
+                                let mut args = Vec::new();
+                                if !self.is_special(b')') {
                                     args.push(self.parse_assignment_expr()?);
+                                    while self.is_special(b',') {
+                                        self.advance();
+                                        args.push(self.parse_assignment_expr()?);
+                                    }
                                 }
-                            }
-                            self.expect_special(b')')?;
-                            // Look up the real function by its de-prefixed name
-                            let real_name_id = self.idents.lookup(real_name);
-                            let symbol_id = real_name_id.and_then(|id| {
-                                self.symbols
-                                    .lookup_id(id, crate::symbol::Namespace::Ordinary)
-                            });
-                            if let Some(symbol_id) = symbol_id {
-                                let func_type = self.symbols.get(symbol_id).typ;
-                                let ret_type =
-                                    self.types.base_type(func_type).unwrap_or(self.types.int_id);
-                                let func_expr = Self::typed_expr(
-                                    ExprKind::Ident(symbol_id),
-                                    func_type,
+                                self.expect_special(b')')?;
+                                // Look up the real function by its de-prefixed name
+                                let real_name_id = self.idents.lookup(real_name);
+                                let symbol_id = real_name_id.and_then(|id| {
+                                    self.symbols
+                                        .lookup_id(id, crate::symbol::Namespace::Ordinary)
+                                });
+                                if let Some(symbol_id) = symbol_id {
+                                    let func_type = self.symbols.get(symbol_id).typ;
+                                    let ret_type = self
+                                        .types
+                                        .base_type(func_type)
+                                        .unwrap_or(self.types.int_id);
+                                    let func_expr = Self::typed_expr(
+                                        ExprKind::Ident(symbol_id),
+                                        func_type,
+                                        token_pos,
+                                    );
+                                    return Ok(Self::typed_expr(
+                                        ExprKind::Call {
+                                            func: Box::new(func_expr),
+                                            args,
+                                        },
+                                        ret_type,
+                                        token_pos,
+                                    ));
+                                }
+                                // Not declared — return 0 as fallback
+                                diag::error(
                                     token_pos,
+                                    &format!("undeclared function '{}'", real_name),
                                 );
                                 return Ok(Self::typed_expr(
-                                    ExprKind::Call {
-                                        func: Box::new(func_expr),
-                                        args,
-                                    },
-                                    ret_type,
+                                    ExprKind::IntLit(0),
+                                    self.types.int_id,
                                     token_pos,
                                 ));
                             }
-                            // Not declared — return 0 as fallback
-                            diag::error(token_pos, &format!("undeclared function '{}'", real_name));
-                            return Ok(Self::typed_expr(
-                                ExprKind::IntLit(0),
-                                self.types.int_id,
-                                token_pos,
-                            ));
                         }
-                        _ => {}
                     }
 
                     // Look up symbol to get type (during parsing, symbol is in scope)
                     // C99 6.4.2.2: __func__ is a predefined identifier with type const char[]
                     // GCC extensions: __FUNCTION__ and __PRETTY_FUNCTION__ behave similarly
-                    if name_str == "__func__"
-                        || name_str == "__FUNCTION__"
-                        || name_str == "__PRETTY_FUNCTION__"
+                    if name_id == crate::kw::FUNC
+                        || name_id == crate::kw::FUNCTION
+                        || name_id == crate::kw::PRETTY_FUNCTION
                     {
                         // These behave like a string literal (const char[])
                         // Linearization handles mapping to __func__ behavior
@@ -2766,6 +2741,7 @@ impl<'a> Parser<'a> {
                     } else {
                         // C99 6.5.1: Undeclared identifier is an error
                         // (implicit int was removed in C99)
+                        let name_str = self.idents.get_opt(name_id).unwrap_or("");
                         diag::error(token_pos, &format!("undeclared identifier '{}'", name_str));
                         // Return a dummy expression to continue parsing
                         Ok(Self::typed_expr(
diff --git a/cc/parse/mod.rs b/cc/parse/mod.rs
index 86049761..8cc41111 100644
--- a/cc/parse/mod.rs
+++ b/cc/parse/mod.rs
@@ -19,16 +19,8 @@ mod test_parser;
 // Re-export parser used by main.rs
 pub use parser::Parser;
 
-/// Check if a name is a C11 nullability qualifier.
+/// Check if a StringId is a C11 nullability qualifier.
 /// Single source of truth — used by all qualifier-parsing paths.
-pub(crate) fn is_nullability_qualifier(name: &str) -> bool {
-    matches!(
-        name,
-        "_Nonnull"
-            | "__nonnull"
-            | "_Nullable"
-            | "__nullable"
-            | "_Null_unspecified"
-            | "__null_unspecified"
-    )
+pub(crate) fn is_nullability_qualifier(id: crate::strings::StringId) -> bool {
+    crate::kw::has_tag(id, crate::kw::NULLABILITY)
 }
diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs
index aed2475a..39621595 100644
--- a/cc/parse/parser.rs
+++ b/cc/parse/parser.rs
@@ -391,7 +391,7 @@ impl<'a> Parser<'a> {
         if self.peek() == TokenType::Ident {
             if let Some(name_id) = self.get_ident_id(self.current()) {
                 let is_type = self.symbols.lookup_typedef(name_id).is_some()
-                    || Self::is_type_keyword(self.str(name_id));
+                    || crate::kw::has_tag(name_id, crate::kw::TYPE_KEYWORD);
                 // If not a type, this is a grouped declarator
                 return !is_type;
             }
@@ -491,8 +491,8 @@ impl<'a> Parser<'a> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name) = self.get_ident_name(self.current()) {
-            name == "__attribute__" || name == "__attribute"
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ATTR_KW)
         } else {
             false
         }
@@ -645,8 +645,8 @@ impl<'a> Parser<'a> {
     fn is_nullability_qualifier(&self) -> bool {
         self.peek() == TokenType::Ident
             && self
-                .get_ident_name(self.current())
-                .is_some_and(|n| super::is_nullability_qualifier(n.as_str()))
+                .get_ident_id(self.current())
+                .is_some_and(super::is_nullability_qualifier)
     }
 
     /// Check if current token is __asm or __asm__
@@ -654,8 +654,8 @@ impl<'a> Parser<'a> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name) = self.get_ident_name(self.current()) {
-            name == "__asm__" || name == "__asm" || name == "asm"
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ASM_KW)
         } else {
             false
         }
@@ -695,18 +695,16 @@ impl<'a> Parser<'a> {
         let mut is_volatile = false;
         let mut _is_goto = false;
         while self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                match name.as_str() {
-                    "volatile" | "__volatile__" => {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                match name_id {
+                    crate::kw::VOLATILE | crate::kw::GNU_VOLATILE => {
                         is_volatile = true;
                         self.advance();
                     }
-                    "inline" | "__inline__" => {
-                        // inline qualifier - just consume it (affects inlining decisions)
+                    crate::kw::INLINE | crate::kw::GNU_INLINE => {
                         self.advance();
                     }
-                    "goto" => {
-                        // goto qualifier - indicates asm can jump to C labels
+                    crate::kw::GOTO => {
                         _is_goto = true;
                         self.advance();
                     }
@@ -948,34 +946,34 @@ impl Parser<'_> {
     pub fn parse_statement(&mut self) -> ParseResult<Stmt> {
         // Check for keywords
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                match name.as_str() {
-                    "if" => return self.parse_if_stmt(),
-                    "while" => return self.parse_while_stmt(),
-                    "do" => return self.parse_do_while_stmt(),
-                    "for" => return self.parse_for_stmt(),
-                    "return" => return self.parse_return_stmt(),
-                    "break" => {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                match name_id {
+                    crate::kw::IF => return self.parse_if_stmt(),
+                    crate::kw::WHILE => return self.parse_while_stmt(),
+                    crate::kw::DO => return self.parse_do_while_stmt(),
+                    crate::kw::FOR => return self.parse_for_stmt(),
+                    crate::kw::RETURN => return self.parse_return_stmt(),
+                    crate::kw::BREAK => {
                         self.advance();
                         self.expect_special(b';')?;
                         return Ok(Stmt::Break);
                     }
-                    "continue" => {
+                    crate::kw::CONTINUE => {
                         self.advance();
                         self.expect_special(b';')?;
                         return Ok(Stmt::Continue);
                     }
-                    "goto" => {
+                    crate::kw::GOTO => {
                         self.advance();
                         let label = self.expect_identifier()?;
                         self.expect_special(b';')?;
                         return Ok(Stmt::Goto(label));
                     }
-                    "switch" => return self.parse_switch_stmt(),
-                    "case" => return self.parse_case_label(),
-                    "default" => return self.parse_default_label(),
+                    crate::kw::SWITCH => return self.parse_switch_stmt(),
+                    crate::kw::CASE => return self.parse_case_label(),
+                    crate::kw::DEFAULT => return self.parse_default_label(),
                     // GCC extended inline assembly
-                    "__asm__" | "__asm" | "asm" => {
+                    crate::kw::ASM | crate::kw::GNU_ASM | crate::kw::GNU_ASM2 => {
                         return self.parse_asm_statement();
                     }
                     _ => {}
@@ -1026,8 +1024,8 @@ impl Parser<'_> {
         let then_stmt = self.parse_statement()?;
 
         let else_stmt = if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "else" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::ELSE {
                     self.advance();
                     Some(Box::new(self.parse_statement()?))
                 } else {
@@ -1303,55 +1301,7 @@ impl Parser<'_> {
         }
 
         if let Some(name_id) = self.get_ident_id(self.current()) {
-            let name = self.str(name_id);
-            // Check for type keywords first
-            if matches!(
-                name,
-                "void"
-                    | "char"
-                    | "short"
-                    | "int"
-                    | "long"
-                    | "float"
-                    | "double"
-                    | "_Float16"
-                    | "_Float32"
-                    | "_Float64"
-                    | "_Complex"
-                    | "_Atomic"
-                    | "_Alignas"
-                    | "signed"
-                    | "unsigned"
-                    | "const"
-                    | "volatile"
-                    | "static"
-                    | "extern"
-                    | "auto"
-                    | "register"
-                    | "typedef"
-                    | "inline"
-                    | "__inline"
-                    | "__inline__"
-                    | "_Noreturn"
-                    | "__noreturn__"
-                    | "struct"
-                    | "union"
-                    | "enum"
-                    | "_Bool"
-                    | "__attribute__"
-                    | "__attribute"
-                    | "__int128"
-                    | "__int128_t"
-                    | "__uint128_t"
-                    | "__builtin_va_list"
-                    | "typeof"
-                    | "__typeof__"
-                    | "__typeof"
-                    | "_Thread_local"
-                    | "__thread"
-                    | "_Static_assert"
-                    | "static_assert"
-            ) {
+            if crate::kw::has_tag(name_id, crate::kw::DECL_START) {
                 return true;
             }
             // Also check for typedef names
@@ -1716,67 +1666,65 @@ impl Parser<'_> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-
-            match name {
+            match name_id {
                 // Skip __attribute__ in the type specifier loop
-                "__attribute__" | "__attribute" => {
+                crate::kw::GNU_ATTRIBUTE | crate::kw::GNU_ATTRIBUTE2 => {
                     self.skip_extensions();
                     continue;
                 }
-                "const" => {
+                crate::kw::CONST => {
                     self.advance();
                     modifiers |= TypeModifiers::CONST;
                 }
-                "volatile" => {
+                crate::kw::VOLATILE => {
                     self.advance();
                     modifiers |= TypeModifiers::VOLATILE;
                 }
-                "static" => {
+                crate::kw::STATIC => {
                     self.advance();
                     modifiers |= TypeModifiers::STATIC;
                 }
-                "extern" => {
+                crate::kw::EXTERN => {
                     self.advance();
                     modifiers |= TypeModifiers::EXTERN;
                 }
-                "register" => {
+                crate::kw::REGISTER => {
                     self.advance();
                     modifiers |= TypeModifiers::REGISTER;
                 }
-                "auto" => {
+                crate::kw::AUTO => {
                     self.advance();
                     modifiers |= TypeModifiers::AUTO;
                 }
-                "typedef" => {
+                crate::kw::TYPEDEF => {
                     self.advance();
                     modifiers |= TypeModifiers::TYPEDEF;
                 }
-                "_Thread_local" | "__thread" => {
+                crate::kw::THREAD_LOCAL | crate::kw::GNU_THREAD => {
                     self.advance();
                     modifiers |= TypeModifiers::THREAD_LOCAL;
                 }
-                "inline" | "__inline" | "__inline__" => {
+                crate::kw::INLINE | crate::kw::GNU_INLINE2 | crate::kw::GNU_INLINE => {
                     self.advance();
                     modifiers |= TypeModifiers::INLINE;
                 }
-                "_Noreturn" | "__noreturn__" => {
+                crate::kw::NORETURN | crate::kw::GNU_NORETURN => {
                     self.advance();
                     modifiers |= TypeModifiers::NORETURN;
                 }
-                "signed" => {
+                crate::kw::SIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::SIGNED;
                 }
-                "unsigned" => {
+                crate::kw::UNSIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                 }
-                "_Complex" => {
+                crate::kw::COMPLEX => {
                     self.advance();
                     modifiers |= TypeModifiers::COMPLEX;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     // _Atomic can be:
                     // 1. Type specifier: _Atomic(type-name)
@@ -1803,7 +1751,7 @@ impl Parser<'_> {
                         modifiers |= TypeModifiers::ATOMIC;
                     }
                 }
-                "_Alignas" => {
+                crate::kw::ALIGNAS => {
                     // C11 alignment specifier: _Alignas(type-name) or _Alignas(constant-expression)
                     let alignas_pos = self.current_pos();
                     self.advance();
@@ -1837,14 +1785,14 @@ impl Parser<'_> {
                         }
                     }
                 }
-                "short" => {
+                crate::kw::SHORT => {
                     self.advance();
                     modifiers |= TypeModifiers::SHORT;
                     if base_kind.is_none() {
                         base_kind = Some(TypeKind::Short);
                     }
                 }
-                "long" => {
+                crate::kw::LONG => {
                     self.advance();
                     if modifiers.contains(TypeModifiers::LONG) {
                         modifiers |= TypeModifiers::LONGLONG;
@@ -1859,15 +1807,15 @@ impl Parser<'_> {
                         }
                     }
                 }
-                "void" => {
+                crate::kw::VOID => {
                     self.advance();
                     base_kind = Some(TypeKind::Void);
                 }
-                "char" => {
+                crate::kw::CHAR => {
                     self.advance();
                     base_kind = Some(TypeKind::Char);
                 }
-                "int" => {
+                crate::kw::INT => {
                     self.advance();
                     if base_kind.is_none()
                         || !matches!(
@@ -1878,11 +1826,11 @@ impl Parser<'_> {
                         base_kind = Some(TypeKind::Int);
                     }
                 }
-                "float" => {
+                crate::kw::FLOAT => {
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                 }
-                "double" => {
+                crate::kw::DOUBLE => {
                     self.advance();
                     // Handle long double
                     if modifiers.contains(TypeModifiers::LONG) {
@@ -1891,42 +1839,42 @@ impl Parser<'_> {
                         base_kind = Some(TypeKind::Double);
                     }
                 }
-                "_Float16" => {
+                crate::kw::FLOAT16 => {
                     self.advance();
                     base_kind = Some(TypeKind::Float16);
                 }
-                "_Float32" => {
+                crate::kw::FLOAT32 => {
                     // _Float32 is an alias for float (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                 }
-                "_Float64" => {
+                crate::kw::FLOAT64 => {
                     // _Float64 is an alias for double (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Double);
                 }
-                "_Bool" => {
+                crate::kw::BOOL => {
                     self.advance();
                     base_kind = Some(TypeKind::Bool);
                 }
-                "__int128" => {
+                crate::kw::INT128 => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__int128_t" => {
+                crate::kw::INT128_T => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__uint128_t" => {
+                crate::kw::UINT128_T => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__builtin_va_list" => {
+                crate::kw::BUILTIN_VA_LIST => {
                     self.advance();
                     base_kind = Some(TypeKind::VaList);
                 }
-                "typeof" | "__typeof__" | "__typeof" => {
+                crate::kw::TYPEOF | crate::kw::GNU_TYPEOF | crate::kw::GNU_TYPEOF2 => {
                     self.advance(); // consume typeof
                     self.expect_special(b'(')?;
 
@@ -1954,7 +1902,7 @@ impl Parser<'_> {
                         ..result_type
                     });
                 }
-                "enum" => {
+                crate::kw::ENUM => {
                     let mut enum_type = self.parse_enum_specifier()?;
                     // Consume trailing qualifiers (e.g., "enum foo const")
                     let trailing_mods = self.consume_type_qualifiers();
@@ -1962,14 +1910,14 @@ impl Parser<'_> {
                     enum_type.modifiers |= modifiers | trailing_mods;
                     return Ok(enum_type);
                 }
-                "struct" => {
+                crate::kw::STRUCT => {
                     let mut struct_type = self.parse_struct_or_union_specifier(false)?;
                     // Consume trailing qualifiers (e.g., "struct foo const")
                     let trailing_mods = self.consume_type_qualifiers();
                     struct_type.modifiers |= modifiers | trailing_mods;
                     return Ok(struct_type);
                 }
-                "union" => {
+                crate::kw::UNION => {
                     let mut union_type = self.parse_struct_or_union_specifier(true)?;
                     // Consume trailing qualifiers (e.g., "union foo const")
                     let trailing_mods = self.consume_type_qualifiers();
@@ -2418,25 +2366,25 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers (const, volatile, restrict, _Atomic, nullability)
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        "_Atomic" => {
+                        crate::kw::ATOMIC => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::ATOMIC;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
@@ -2509,9 +2457,12 @@ impl Parser<'_> {
             // Parse optional qualifiers and static (C99 6.7.5.3)
             // These are valid in function parameter array declarators
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "static" | "const" | "volatile" | "restrict" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::STATIC
+                        | crate::kw::CONST
+                        | crate::kw::VOLATILE
+                        | crate::kw::RESTRICT => {
                             self.advance();
                         }
                         _ => break,
@@ -2770,21 +2721,21 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
@@ -2875,8 +2826,8 @@ impl Parser<'_> {
 
         // Check for (void)
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "void" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::VOID {
                     let saved_pos = self.pos;
                     self.advance();
                     if self.is_special(b')') {
@@ -2993,9 +2944,8 @@ impl Parser<'_> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name_id) = self.get_ident_id(self.current()) {
-            let name = self.str(name_id);
-            matches!(name, "_Static_assert" | "static_assert")
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ASSERT_KW)
         } else {
             false
         }
@@ -3249,21 +3199,21 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
diff --git a/cc/rtlib.rs b/cc/rtlib.rs
index 5d94e43b..a154b783 100644
--- a/cc/rtlib.rs
+++ b/cc/rtlib.rs
@@ -27,7 +27,6 @@
 //
 
 use crate::target::{Arch, Os, Target};
-use crate::types::TypeKind;
 
 /// ABI used for Float16 parameters/returns in rtlib functions.
 ///
@@ -65,19 +64,6 @@ impl<'a> RtlibNames<'a> {
         Self { target }
     }
 
-    /// Returns true if long double is the same as double on this platform.
-    /// On macOS aarch64 (Apple Silicon), long double is 64-bit (same as double).
-    pub fn longdouble_is_double(&self) -> bool {
-        self.target.arch == Arch::Aarch64 && self.target.os == Os::MacOS
-    }
-
-    /// Returns true if Float16 operations need soft-float emulation.
-    /// On x86-64, there are no native FP16 arithmetic instructions.
-    /// On AArch64, native FP16 instructions exist (FADD, FSUB, etc.).
-    pub fn float16_needs_softfloat(&self) -> bool {
-        self.target.arch == Arch::X86_64
-    }
-
     /// Returns the ABI used by this rtlib for Float16 parameters/returns.
     ///
     /// This is an rtlib attribute - different runtime libraries have different
@@ -145,430 +131,12 @@ impl<'a> RtlibNames<'a> {
             _ => None,
         }
     }
-
-    // ========================================================================
-    // Complex operations
-    // ========================================================================
-
-    /// Get function name for complex multiplication
-    ///
-    /// Complex multiply: result = __mulXc3(a_real, a_imag, b_real, b_imag)
-    pub fn complex_mul(&self, base_kind: TypeKind) -> &'static str {
-        match base_kind {
-            TypeKind::Float => "__mulsc3",
-            TypeKind::Double => "__muldc3",
-            TypeKind::LongDouble => {
-                if self.longdouble_is_double() {
-                    "__muldc3" // macOS aarch64: long double == double
-                } else {
-                    match self.target.arch {
-                        Arch::X86_64 => "__mulxc3",  // x87 80-bit
-                        Arch::Aarch64 => "__multc3", // IEEE quad 128-bit
-                    }
-                }
-            }
-            _ => "__muldc3", // fallback
-        }
-    }
-
-    /// Get function name for complex division
-    ///
-    /// Complex divide: result = __divXc3(a_real, a_imag, b_real, b_imag)
-    /// Uses Smith's method for robust overflow handling.
-    pub fn complex_div(&self, base_kind: TypeKind) -> &'static str {
-        match base_kind {
-            TypeKind::Float => "__divsc3",
-            TypeKind::Double => "__divdc3",
-            TypeKind::LongDouble => {
-                if self.longdouble_is_double() {
-                    "__divdc3"
-                } else {
-                    match self.target.arch {
-                        Arch::X86_64 => "__divxc3",
-                        Arch::Aarch64 => "__divtc3",
-                    }
-                }
-            }
-            _ => "__divdc3", // fallback
-        }
-    }
-
-    // ========================================================================
-    // Long double operations
-    // ========================================================================
-
-    /// Get function name for long double binary operation.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// Note: x86-64 uses native x87 FPU instructions for long double,
-    /// so rtlib is NOT used. Only AArch64/Linux needs rtlib for 128-bit IEEE quad.
-    pub fn longdouble_binop(&self, op: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf (128-bit IEEE quad) functions
-        match op {
-            "add" => Some("__addtf3"),
-            "sub" => Some("__subtf3"),
-            "mul" => Some("__multf3"),
-            "div" => Some("__divtf3"),
-            _ => None,
-        }
-    }
-
-    /// Get function name for long double negation.
-    /// Returns None if native FP instructions should be used.
-    pub fn longdouble_neg(&self) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf function
-        Some("__negtf2")
-    }
-
-    /// Get function name for long double comparison.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// The comparison functions return:
-    /// - __lttf2/__letf2: < 0 if a < b (or a <= b), >= 0 otherwise
-    /// - __gttf2/__getf2: > 0 if a > b (or a >= b), <= 0 otherwise
-    /// - __eqtf2: 0 if a == b, non-zero otherwise
-    /// - __netf2: 0 if a == b, non-zero otherwise (same as __eqtf2)
-    pub fn longdouble_cmp(&self, cmp_kind: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf comparison functions
-        match cmp_kind {
-            "lt" => Some("__lttf2"),
-            "le" => Some("__letf2"),
-            "gt" => Some("__gttf2"),
-            "ge" => Some("__getf2"),
-            "eq" => Some("__eqtf2"),
-            "ne" => Some("__netf2"),
-            _ => None,
-        }
-    }
-
-    // ========================================================================
-    // Long double conversions
-    // ========================================================================
-
-    /// Get function name for long double conversion.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// Note: x86-64 uses native x87 FPU instructions for long double,
-    /// so rtlib is NOT used. Only AArch64/Linux needs rtlib for 128-bit IEEE quad.
-    ///
-    /// Suffix convention:
-    /// - sf = single float (32-bit)
-    /// - df = double float (64-bit)
-    /// - tf = 128-bit IEEE quad (AArch64/Linux long double)
-    /// - si = signed 32-bit integer
-    /// - di = signed 64-bit integer
-    /// - usi = unsigned 32-bit integer
-    /// - udi = unsigned 64-bit integer
-    pub fn longdouble_convert(&self, from: &str, to: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf conversion functions
-        match (from, to) {
-            // float <-> long double
-            ("sf", "tf") => Some("__extendsftf2"),
-            ("tf", "sf") => Some("__trunctfsf2"),
-
-            // double <-> long double
-            ("df", "tf") => Some("__extenddftf2"),
-            ("tf", "df") => Some("__trunctfdf2"),
-
-            // signed int32 <-> long double
-            ("si", "tf") => Some("__floatsitf"),
-            ("tf", "si") => Some("__fixtfsi"),
-
-            // signed int64 <-> long double
-            ("di", "tf") => Some("__floatditf"),
-            ("tf", "di") => Some("__fixtfdi"),
-
-            // unsigned int32 <-> long double
-            ("usi", "tf") => Some("__floatunsitf"),
-            ("tf", "usi") => Some("__fixunstfsi"),
-
-            // unsigned int64 <-> long double
-            ("udi", "tf") => Some("__floatunditf"),
-            ("tf", "udi") => Some("__fixunstfdi"),
-
-            _ => None,
-        }
-    }
-
-    // ========================================================================
-    // Int128 operations
-    // ========================================================================
-
-    /// Get function name for __int128 division/modulo.
-    ///
-    /// Suffix convention: ti = 128-bit integer
-    pub fn int128_divmod(&self, op: &str, unsigned: bool) -> &'static str {
-        match (op, unsigned) {
-            ("div", false) => "__divti3",
-            ("mod", false) => "__modti3",
-            ("div", true) => "__udivti3",
-            ("mod", true) => "__umodti3",
-            _ => panic!("invalid int128 divmod op: {}", op),
-        }
-    }
-
-    /// Get function name for __int128 ↔ float/double conversions.
-    ///
-    /// Suffix convention:
-    /// - ti = signed int128, uti = unsigned int128
-    /// - sf = float, df = double, xf = x87 extended, tf = quad, hf = half
-    pub fn int128_convert(&self, from: &str, to: &str) -> Option<&'static str> {
-        match (from, to) {
-            // int128 -> float types
-            ("ti", "sf") => Some("__floattisf"),
-            ("ti", "df") => Some("__floattidf"),
-            ("ti", "xf") => Some("__floattixf"),
-            ("ti", "tf") => Some("__floattitf"),
-            // Note: no hf (Float16) entries — __floattihf etc. don't exist in
-            // libgcc/compiler-rt. Float16↔Int128 goes through intermediate double.
-
-            // uint128 -> float types
-            ("uti", "sf") => Some("__floatuntisf"),
-            ("uti", "df") => Some("__floatuntidf"),
-            ("uti", "xf") => Some("__floatuntixf"),
-            ("uti", "tf") => Some("__floatuntitf"),
-
-            // float types -> int128
-            ("sf", "ti") => Some("__fixsfti"),
-            ("df", "ti") => Some("__fixdfti"),
-            ("xf", "ti") => Some("__fixxfti"),
-            ("tf", "ti") => Some("__fixtfti"),
-
-            // float types -> uint128
-            ("sf", "uti") => Some("__fixunssfti"),
-            ("df", "uti") => Some("__fixunsdfti"),
-            ("xf", "uti") => Some("__fixunsxfti"),
-            ("tf", "uti") => Some("__fixunstfti"),
-
-            _ => None,
-        }
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    #[test]
-    fn test_complex_mul_float() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::Float), "__mulsc3");
-    }
-
-    #[test]
-    fn test_complex_mul_double() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::Double), "__muldc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__mulxc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__multc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_aarch64_macos() {
-        // On macOS aarch64, long double == double
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__muldc3");
-    }
-
-    #[test]
-    fn test_longdouble_is_double() {
-        let macos_arm = Target::new(Arch::Aarch64, Os::MacOS);
-        let linux_arm = Target::new(Arch::Aarch64, Os::Linux);
-        let linux_x86 = Target::new(Arch::X86_64, Os::Linux);
-
-        assert!(RtlibNames::new(&macos_arm).longdouble_is_double());
-        assert!(!RtlibNames::new(&linux_arm).longdouble_is_double());
-        assert!(!RtlibNames::new(&linux_x86).longdouble_is_double());
-    }
-
-    #[test]
-    fn test_longdouble_binop_x86_64() {
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), None);
-        assert_eq!(rtlib.longdouble_binop("sub"), None);
-        assert_eq!(rtlib.longdouble_binop("mul"), None);
-        assert_eq!(rtlib.longdouble_binop("div"), None);
-    }
-
-    #[test]
-    fn test_longdouble_binop_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), Some("__addtf3"));
-        assert_eq!(rtlib.longdouble_binop("sub"), Some("__subtf3"));
-        assert_eq!(rtlib.longdouble_binop("mul"), Some("__multf3"));
-        assert_eq!(rtlib.longdouble_binop("div"), Some("__divtf3"));
-    }
-
-    #[test]
-    fn test_longdouble_binop_aarch64_macos() {
-        // On macOS aarch64, long double ops use native double instructions
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), None);
-        assert_eq!(rtlib.longdouble_binop("mul"), None);
-    }
-
-    #[test]
-    fn test_longdouble_cmp() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-
-        // x86-64 uses native x87 FPU - no soft-float rtlib
-        assert_eq!(RtlibNames::new(&x86).longdouble_cmp("lt"), None);
-        assert_eq!(RtlibNames::new(&x86).longdouble_cmp("eq"), None);
-
-        // AArch64/Linux uses tf comparison functions
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("lt"),
-            Some("__lttf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("le"),
-            Some("__letf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("gt"),
-            Some("__gttf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("ge"),
-            Some("__getf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("eq"),
-            Some("__eqtf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("ne"),
-            Some("__netf2")
-        );
-
-        // macOS aarch64: long double == double, no rtlib needed
-        assert_eq!(RtlibNames::new(&arm_macos).longdouble_cmp("lt"), None);
-    }
-
-    #[test]
-    fn test_longdouble_neg() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-
-        // x86-64 uses native x87 FPU - no soft-float rtlib
-        assert_eq!(RtlibNames::new(&x86).longdouble_neg(), None);
-
-        // AArch64/Linux uses tf negation function
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_neg(),
-            Some("__negtf2")
-        );
-
-        // macOS aarch64: long double == double, no rtlib needed
-        assert_eq!(RtlibNames::new(&arm_macos).longdouble_neg(), None);
-    }
-
-    #[test]
-    fn test_longdouble_convert_x86_64() {
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // All conversions return None - use native x87
-        assert_eq!(rtlib.longdouble_convert("sf", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "sf"), None);
-        assert_eq!(rtlib.longdouble_convert("df", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "df"), None);
-        assert_eq!(rtlib.longdouble_convert("si", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "si"), None);
-    }
-
-    #[test]
-    fn test_longdouble_convert_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // float <-> long double
-        assert_eq!(rtlib.longdouble_convert("sf", "tf"), Some("__extendsftf2"));
-        assert_eq!(rtlib.longdouble_convert("tf", "sf"), Some("__trunctfsf2"));
-
-        // double <-> long double
-        assert_eq!(rtlib.longdouble_convert("df", "tf"), Some("__extenddftf2"));
-        assert_eq!(rtlib.longdouble_convert("tf", "df"), Some("__trunctfdf2"));
-
-        // signed int32 <-> long double
-        assert_eq!(rtlib.longdouble_convert("si", "tf"), Some("__floatsitf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "si"), Some("__fixtfsi"));
-
-        // signed int64 <-> long double
-        assert_eq!(rtlib.longdouble_convert("di", "tf"), Some("__floatditf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "di"), Some("__fixtfdi"));
-
-        // unsigned int32 <-> long double
-        assert_eq!(rtlib.longdouble_convert("usi", "tf"), Some("__floatunsitf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "usi"), Some("__fixunstfsi"));
-
-        // unsigned int64 <-> long double
-        assert_eq!(rtlib.longdouble_convert("udi", "tf"), Some("__floatunditf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "udi"), Some("__fixunstfdi"));
-    }
-
-    #[test]
-    fn test_longdouble_convert_aarch64_macos() {
-        // macOS aarch64: long double == double, no rtlib needed
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-
-        assert_eq!(rtlib.longdouble_convert("sf", "tf"), None);
-        assert_eq!(rtlib.longdouble_convert("df", "tf"), None);
-        assert_eq!(rtlib.longdouble_convert("si", "tf"), None);
-    }
-
     // ========================================================================
     // Float16 (_Float16) rtlib tests
     // ========================================================================
@@ -615,55 +183,4 @@ mod tests {
     }
 
     // ========================================================================
-    // Int128 (__int128 / __uint128_t) rtlib tests
-    // ========================================================================
-
-    #[test]
-    fn test_int128_divmod() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        assert_eq!(rtlib.int128_divmod("div", false), "__divti3");
-        assert_eq!(rtlib.int128_divmod("mod", false), "__modti3");
-        assert_eq!(rtlib.int128_divmod("div", true), "__udivti3");
-        assert_eq!(rtlib.int128_divmod("mod", true), "__umodti3");
-    }
-
-    #[test]
-    fn test_int128_convert() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // int128 -> float types
-        assert_eq!(rtlib.int128_convert("ti", "sf"), Some("__floattisf"));
-        assert_eq!(rtlib.int128_convert("ti", "df"), Some("__floattidf"));
-        assert_eq!(rtlib.int128_convert("ti", "xf"), Some("__floattixf"));
-        assert_eq!(rtlib.int128_convert("ti", "tf"), Some("__floattitf"));
-        // Float16 (hf) ↔ Int128: no direct rtlib function exists
-        assert_eq!(rtlib.int128_convert("ti", "hf"), None);
-
-        // uint128 -> float types
-        assert_eq!(rtlib.int128_convert("uti", "sf"), Some("__floatuntisf"));
-        assert_eq!(rtlib.int128_convert("uti", "df"), Some("__floatuntidf"));
-        assert_eq!(rtlib.int128_convert("uti", "xf"), Some("__floatuntixf"));
-        assert_eq!(rtlib.int128_convert("uti", "tf"), Some("__floatuntitf"));
-        assert_eq!(rtlib.int128_convert("uti", "hf"), None);
-
-        // float types -> int128
-        assert_eq!(rtlib.int128_convert("sf", "ti"), Some("__fixsfti"));
-        assert_eq!(rtlib.int128_convert("df", "ti"), Some("__fixdfti"));
-        assert_eq!(rtlib.int128_convert("xf", "ti"), Some("__fixxfti"));
-        assert_eq!(rtlib.int128_convert("tf", "ti"), Some("__fixtfti"));
-        assert_eq!(rtlib.int128_convert("hf", "ti"), None);
-
-        // float types -> uint128
-        assert_eq!(rtlib.int128_convert("sf", "uti"), Some("__fixunssfti"));
-        assert_eq!(rtlib.int128_convert("df", "uti"), Some("__fixunsdfti"));
-        assert_eq!(rtlib.int128_convert("xf", "uti"), Some("__fixunsxfti"));
-        assert_eq!(rtlib.int128_convert("tf", "uti"), Some("__fixunstfti"));
-        assert_eq!(rtlib.int128_convert("hf", "uti"), None);
-
-        // Invalid conversion
-        assert_eq!(rtlib.int128_convert("ti", "invalid"), None);
-    }
 }
diff --git a/cc/strings.rs b/cc/strings.rs
index 333c0293..fdbb4416 100644
--- a/cc/strings.rs
+++ b/cc/strings.rs
@@ -84,7 +84,14 @@ impl StringTable {
         };
         // Pre-intern empty string as ID 0
         let empty_id = table.intern_internal("");
-        debug_assert_eq!(empty_id, StringId::EMPTY);
+        assert_eq!(empty_id, StringId::EMPTY);
+
+        // Pre-intern all keywords at deterministic slots 1..=KEYWORD_COUNT
+        for (i, &s) in crate::kw::KEYWORD_STRINGS.iter().enumerate() {
+            let id = table.intern_internal(s);
+            assert_eq!(id.0, (i + 1) as u32, "keyword '{}' got wrong ID", s);
+        }
+
         table
     }
 
diff --git a/cc/tests/codegen/misc.rs b/cc/tests/codegen/misc.rs
index 6869b23f..4239a1b2 100644
--- a/cc/tests/codegen/misc.rs
+++ b/cc/tests/codegen/misc.rs
@@ -4685,3 +4685,303 @@ int main(void) {
         0
     );
 }
+
+#[test]
+fn codegen_float16_mega() {
+    let code = r#"
+int main(void) {
+    /* Arithmetic */
+    _Float16 a = 3.5f16;
+    _Float16 b = 2.0f16;
+
+    _Float16 sum = a + b;
+    if ((float)sum < 5.49f || (float)sum > 5.51f) return 1;
+
+    _Float16 diff = a - b;
+    if ((float)diff < 1.49f || (float)diff > 1.51f) return 2;
+
+    _Float16 prod = a * b;
+    if ((float)prod < 6.99f || (float)prod > 7.01f) return 3;
+
+    _Float16 quot = a / b;
+    if ((float)quot < 1.74f || (float)quot > 1.76f) return 4;
+
+    /* Negation */
+    _Float16 neg = -a;
+    if ((float)neg > -3.49f || (float)neg < -3.51f) return 5;
+
+    /* Comparisons */
+    if (!(a == a)) return 10;
+    if (a != a) return 11;
+    if (!(a > b)) return 12;
+    if (!(b < a)) return 13;
+    if (!(a >= b)) return 14;
+    if (!(b <= a)) return 15;
+    if (a == b) return 16;
+    if (!(a != b)) return 17;
+
+    /* Float16 <-> float conversions */
+    float f = (float)a;
+    if (f < 3.49f || f > 3.51f) return 20;
+
+    _Float16 from_float = (_Float16)f;
+    if ((float)from_float < 3.49f || (float)from_float > 3.51f) return 21;
+
+    /* Float16 <-> double conversions */
+    double d = (double)a;
+    if (d < 3.49 || d > 3.51) return 22;
+
+    _Float16 from_double = (_Float16)d;
+    if ((float)from_double < 3.49f || (float)from_double > 3.51f) return 23;
+
+    /* Float16 <-> int via float intermediary (avoids __fixhfsi) */
+    float fa = (float)a;
+    int i = (int)fa;
+    if (i != 3) return 30;
+
+    _Float16 from_int = (_Float16)(float)42;
+    if ((float)from_int < 41.9f || (float)from_int > 42.1f) return 31;
+
+    /* Compound assignment */
+    _Float16 ca = 10.0f16;
+    ca += 5.0f16;
+    if ((float)ca < 14.9f || (float)ca > 15.1f) return 40;
+
+    ca -= 3.0f16;
+    if ((float)ca < 11.9f || (float)ca > 12.1f) return 41;
+
+    ca *= 2.0f16;
+    if ((float)ca < 23.9f || (float)ca > 24.1f) return 42;
+
+    ca /= 4.0f16;
+    if ((float)ca < 5.9f || (float)ca > 6.1f) return 43;
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("codegen_float16_mega", code, &[]), 0);
+}
+
+/// Test that the AddC→AdcC carry chain survives optimization.
+/// The optimizer must not insert flag-clobbering instructions between
+/// the add-with-carry pair. This test exercises large int128 values
+/// that require actual carry propagation.
+#[test]
+fn codegen_int128_carry_chain_optimized() {
+    let code = r#"
+typedef __int128 int128;
+typedef unsigned __int128 uint128;
+
+int main(void) {
+    /* Add with carry: build 0xFFFFFFFFFFFFFFFF via runtime to avoid
+       constant-folding into a negative i128 literal */
+    unsigned long long max64 = ~0ULL;
+    uint128 a = (uint128)max64;
+    uint128 b = 1;
+    uint128 sum = a + b;
+    /* sum should be 0x0000000000000001_0000000000000000 */
+    if ((unsigned long long)sum != 0) return 1;
+    if ((unsigned long long)(sum >> 64) != 1) return 2;
+
+    /* Sub with borrow: 0x1_0000000000000000 - 1 must borrow */
+    uint128 f = (uint128)1 << 64;
+    uint128 g = f - 1;
+    if ((unsigned long long)g != 0xFFFFFFFFFFFFFFFFULL) return 5;
+    if ((unsigned long long)(g >> 64) != 0) return 6;
+
+    /* Negation of 1: should produce all-1s */
+    int128 h = 1;
+    int128 neg_h = -h;
+    if (neg_h != -1) return 7;
+
+    /* Multiply with carry: (2^63) * 2 = 2^64 (crosses lo/hi boundary) */
+    unsigned long long half = 0x8000000000000000ULL;
+    uint128 i = (uint128)half;
+    uint128 j = 2;
+    uint128 prod = i * j;
+    if ((unsigned long long)prod != 0) return 8;
+    if ((unsigned long long)(prod >> 64) != 1) return 9;
+
+    return 0;
+}
+"#;
+    assert_eq!(
+        compile_and_run_optimized("codegen_int128_carry_chain_optimized", code),
+        0
+    );
+}
+
+/// Test uint128 large constant sign-extension bug fix.
+/// Verifies that (uint128)0xFFFFFFFFFFFFFFFFULL has hi=0, lo=max64.
+#[test]
+fn codegen_uint128_large_constant() {
+    let code = r#"
+typedef unsigned __int128 uint128;
+
+int main(void) {
+    /* Build 0xFFFFFFFFFFFFFFFF via runtime to ensure it's not constant-folded
+       differently. */
+    unsigned long long max64 = ~0ULL;
+    uint128 val = (uint128)max64;
+
+    /* lo half should be all 1s, hi half should be 0 */
+    unsigned long long lo = (unsigned long long)val;
+    unsigned long long hi = (unsigned long long)(val >> 64);
+    if (lo != max64) return 1;
+    if (hi != 0) return 2;
+
+    /* Zero */
+    uint128 z = 0;
+    if ((unsigned long long)z != 0) return 3;
+    if ((unsigned long long)(z >> 64) != 0) return 4;
+
+    /* Value 1 */
+    uint128 one = 1;
+    if ((unsigned long long)one != 1) return 5;
+    if ((unsigned long long)(one >> 64) != 0) return 6;
+
+    /* Constant that fills both halves */
+    uint128 full = ((uint128)max64 << 64) | (uint128)max64;
+    if ((unsigned long long)full != max64) return 7;
+    if ((unsigned long long)(full >> 64) != max64) return 8;
+
+    /* Value that fits in 64 bits exactly */
+    uint128 mid = (uint128)0x123456789ABCDEF0ULL;
+    if ((unsigned long long)mid != 0x123456789ABCDEF0ULL) return 9;
+    if ((unsigned long long)(mid >> 64) != 0) return 10;
+
+    return 0;
+}
+"#;
+    assert_eq!(
+        compile_and_run("codegen_uint128_large_constant", code, &[]),
+        0
+    );
+}
+
+/// Test int128 constant shifts (Shl/Lsr/Asr) decomposed in the mapping pass.
+#[test]
+fn codegen_int128_const_shifts() {
+    let code = r#"
+typedef unsigned __int128 uint128;
+typedef __int128 int128;
+
+int main(void) {
+    unsigned long long max64 = ~0ULL;
+
+    /* ===== SHL tests (returns 1-19) ===== */
+    {
+        uint128 a = 1;
+
+        /* shift by 0: identity */
+        uint128 r = a << 0;
+        if ((unsigned long long)r != 1) return 1;
+        if ((unsigned long long)(r >> 64) != 0) return 2;
+
+        /* shift by 1 */
+        r = a << 1;
+        if ((unsigned long long)r != 2) return 3;
+
+        /* shift by 32 */
+        r = a << 32;
+        if ((unsigned long long)r != (1ULL << 32)) return 4;
+
+        /* shift by 63: crosses lo/hi boundary */
+        r = a << 63;
+        if ((unsigned long long)r != (1ULL << 63)) return 5;
+        if ((unsigned long long)(r >> 64) != 0) return 6;
+
+        /* shift by 64: lo moves to hi entirely */
+        r = a << 64;
+        if ((unsigned long long)r != 0) return 7;
+        if ((unsigned long long)(r >> 64) != 1) return 8;
+
+        /* shift by 65 */
+        r = a << 65;
+        if ((unsigned long long)r != 0) return 9;
+        if ((unsigned long long)(r >> 64) != 2) return 10;
+
+        /* shift by 127 */
+        r = a << 127;
+        if ((unsigned long long)r != 0) return 11;
+        if ((unsigned long long)(r >> 64) != (1ULL << 63)) return 12;
+    }
+
+    /* ===== LSR tests (returns 20-39) ===== */
+    {
+        /* Start with hi bit set */
+        uint128 a = (uint128)1 << 127;
+
+        /* shift by 0: identity */
+        uint128 r = a >> 0;
+        if ((unsigned long long)(r >> 64) != (1ULL << 63)) return 20;
+
+        /* shift by 1 */
+        r = a >> 1;
+        if ((unsigned long long)(r >> 64) != (1ULL << 62)) return 21;
+
+        /* shift by 32 */
+        r = a >> 32;
+        if ((unsigned long long)(r >> 64) != (1ULL << 31)) return 22;
+
+        /* shift by 63 */
+        r = a >> 63;
+        if ((unsigned long long)(r >> 64) != 1) return 23;
+        if ((unsigned long long)r != 0) return 24;
+
+        /* shift by 64 */
+        r = a >> 64;
+        if ((unsigned long long)(r >> 64) != 0) return 25;
+        if ((unsigned long long)r != (1ULL << 63)) return 26;
+
+        /* shift by 65 */
+        r = a >> 65;
+        if ((unsigned long long)r != (1ULL << 62)) return 27;
+
+        /* shift by 127 */
+        r = a >> 127;
+        if ((unsigned long long)r != 1) return 28;
+        if ((unsigned long long)(r >> 64) != 0) return 29;
+    }
+
+    /* ===== ASR tests (returns 40-59) ===== */
+    {
+        /* Negative int128 */
+        int128 neg = -1;
+
+        /* shift by 0: identity */
+        int128 r = neg >> 0;
+        if (r != -1) return 40;
+
+        /* shift by 1: still all 1s */
+        r = neg >> 1;
+        if (r != -1) return 41;
+
+        /* shift by 63 */
+        r = neg >> 63;
+        if (r != -1) return 42;
+
+        /* shift by 64 */
+        r = neg >> 64;
+        if (r != -1) return 43;
+
+        /* shift by 127 */
+        r = neg >> 127;
+        if (r != -1) return 44;
+
+        /* Negative with specific pattern: -2 = 0xFFF...FFFE */
+        int128 neg2 = -2;
+        r = neg2 >> 1;
+        if (r != -1) return 45;
+
+        /* Large positive shifted right arithmetically stays positive */
+        int128 big = (int128)1 << 126;  /* 0x40...0 */
+        r = big >> 1;
+        if ((unsigned long long)(r >> 64) != (1ULL << 61)) return 46;
+    }
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("codegen_int128_const_shifts", code, &[]), 0);
+}
diff --git a/cc/token/lexer.rs b/cc/token/lexer.rs
index 13f089f4..f387d6af 100644
--- a/cc/token/lexer.rs
+++ b/cc/token/lexer.rs
@@ -177,15 +177,17 @@ const HEX: u8 = 4;
 const EXP: u8 = 8;
 const DOT: u8 = 16;
 const VALID_SECOND: u8 = 32; // Can be second char of 2-char operator
+const QUOTE: u8 = 64; // ' "
+const COMMENT: u8 = 128; // /
 
-/// Character classification table
-fn char_class(c: u8) -> u8 {
+/// Classify a single byte (mirrors the old match arms exactly, plus QUOTE and COMMENT).
+const fn classify_char(c: u8) -> u8 {
     match c {
         b'0'..=b'9' => DIGIT | HEX,
         b'A'..=b'D' | b'F' => LETTER | HEX,
-        b'E' => LETTER | HEX | EXP, // E for exponent
+        b'E' => LETTER | HEX | EXP,
         b'G'..=b'O' => LETTER,
-        b'P' => LETTER | EXP, // P for hex float exponent
+        b'P' => LETTER | EXP,
         b'Q'..=b'Z' => LETTER,
         b'a'..=b'd' | b'f' => LETTER | HEX,
         b'e' => LETTER | HEX | EXP,
@@ -195,10 +197,32 @@ fn char_class(c: u8) -> u8 {
         b'_' => LETTER,
         b'.' => DOT | VALID_SECOND,
         b'=' | b'+' | b'-' | b'>' | b'<' | b'&' | b'|' | b'#' => VALID_SECOND,
+        b'\'' | b'"' => QUOTE,
+        b'/' => COMMENT,
         _ => 0,
     }
 }
 
+/// Build the 256-byte lookup table at compile time.
+const fn build_char_table() -> [u8; 256] {
+    let mut table = [0u8; 256];
+    let mut i: usize = 0;
+    while i < 256 {
+        table[i] = classify_char(i as u8);
+        i += 1;
+    }
+    table
+}
+
+/// Compile-time character classification table.
+const CHAR_TABLE: [u8; 256] = build_char_table();
+
+/// Character classification via table lookup.
+#[inline(always)]
+fn char_class(c: u8) -> u8 {
+    CHAR_TABLE[c as usize]
+}
+
 #[inline]
 fn is_digit(c: u8) -> bool {
     char_class(c) & DIGIT != 0
@@ -770,15 +794,12 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
     }
 
     /// Get a special token (operator/punctuator)
-    fn get_special(&mut self, first: u8) -> Option<Token> {
+    fn get_special(&mut self, first: u8, class: u8) -> Option<Token> {
         let pos = self.pos();
 
         // Check for string/char literals
-        if first == b'"' {
-            return Some(self.get_string_or_char(b'"', false));
-        }
-        if first == b'\'' {
-            return Some(self.get_string_or_char(b'\'', false));
+        if class & QUOTE != 0 {
+            return Some(self.get_string_or_char(first, false));
         }
 
         // Check for .digit (floating point number)
@@ -790,10 +811,10 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
         }
 
         // Check for comments (mode-dependent)
-        match self.mode {
-            LexerMode::C => {
-                // C mode: // and /* */ comments
-                if first == b'/' {
+        if class & COMMENT != 0 {
+            match self.mode {
+                LexerMode::C => {
+                    // C mode: // and /* */ comments
                     let next = self.peekchar();
                     if next == b'/' as i32 {
                         self.nextchar();
@@ -806,12 +827,9 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
                         return None; // No token, continue tokenizing
                     }
                 }
-            }
-            LexerMode::Assembly => {
-                // Assembly mode: do not treat ';' as a line comment delimiter.
-                // Different assemblers (e.g., GAS, Apple as) use ';' with different
-                // meanings (statement separator vs. comment). Comment handling is
-                // left to the assembler.
+                LexerMode::Assembly => {
+                    // Assembly mode: comment handling is left to the assembler.
+                }
             }
         }
 
@@ -996,7 +1014,7 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
             }
         }
 
-        self.get_special(c)
+        self.get_special(c, class)
     }
 
     /// Tokenize the entire input, returning all tokens
@@ -2105,4 +2123,103 @@ mod tests {
             matches!(&tokens[1].value, TokenValue::Special(c) if *c == SpecialToken::HashHash as u32)
         );
     }
+
+    // ========================================================================
+    // Character classification table tests
+    // ========================================================================
+
+    #[test]
+    fn test_char_table_digits() {
+        for c in b'0'..=b'9' {
+            let cl = char_class(c);
+            assert_eq!(cl & DIGIT, DIGIT, "digit {}", c as char);
+            assert_eq!(cl & HEX, HEX, "digit hex {}", c as char);
+            assert_eq!(cl & LETTER, 0, "digit not letter {}", c as char);
+        }
+    }
+
+    #[test]
+    fn test_char_table_hex_letters() {
+        for c in [b'A', b'B', b'C', b'D', b'F', b'a', b'b', b'c', b'd', b'f'] {
+            let cl = char_class(c);
+            assert_eq!(cl & LETTER, LETTER, "hex letter {}", c as char);
+            assert_eq!(cl & HEX, HEX, "hex flag {}", c as char);
+        }
+    }
+
+    #[test]
+    fn test_char_table_exp_letters() {
+        for c in [b'E', b'e', b'P', b'p'] {
+            let cl = char_class(c);
+            assert_eq!(cl & EXP, EXP, "exp {}", c as char);
+            assert_eq!(cl & LETTER, LETTER, "exp letter {}", c as char);
+        }
+        // E and e are also hex
+        assert_ne!(char_class(b'E') & HEX, 0);
+        assert_ne!(char_class(b'e') & HEX, 0);
+        // P and p are NOT hex
+        assert_eq!(char_class(b'P') & HEX, 0);
+        assert_eq!(char_class(b'p') & HEX, 0);
+    }
+
+    #[test]
+    fn test_char_table_plain_letters() {
+        // Non-hex, non-exp uppercase
+        for c in b'G'..=b'O' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain upper {}", c as char);
+        }
+        for c in b'Q'..=b'Z' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain upper {}", c as char);
+        }
+        // Non-hex, non-exp lowercase
+        for c in b'g'..=b'o' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain lower {}", c as char);
+        }
+        for c in b'q'..=b'z' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain lower {}", c as char);
+        }
+        assert_eq!(char_class(b'_'), LETTER);
+    }
+
+    #[test]
+    fn test_char_table_dot() {
+        let cl = char_class(b'.');
+        assert_ne!(cl & DOT, 0);
+        assert_ne!(cl & VALID_SECOND, 0);
+    }
+
+    #[test]
+    fn test_char_table_valid_second() {
+        for c in [b'=', b'+', b'-', b'>', b'<', b'&', b'|', b'#'] {
+            assert_ne!(
+                char_class(c) & VALID_SECOND,
+                0,
+                "valid_second {}",
+                c as char
+            );
+        }
+    }
+
+    #[test]
+    fn test_char_table_quote() {
+        assert_ne!(char_class(b'\'') & QUOTE, 0);
+        assert_ne!(char_class(b'"') & QUOTE, 0);
+    }
+
+    #[test]
+    fn test_char_table_comment() {
+        assert_ne!(char_class(b'/') & COMMENT, 0);
+    }
+
+    #[test]
+    fn test_char_table_zero_for_others() {
+        // Control characters, whitespace, misc punctuation not in the table
+        for c in [0u8, b' ', b'\t', b'\n', b'@', b'$', b'`', b'~', 0x80, 0xFF] {
+            assert_eq!(char_class(c), 0, "zero for byte {:#x}", c);
+        }
+    }
 }
diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs
index 6ec5a839..19d46836 100644
--- a/cc/token/preprocess.rs
+++ b/cc/token/preprocess.rs
@@ -895,11 +895,11 @@ impl<'a> Preprocessor<'a> {
             None => return, // Empty directive, ignore
         };
 
-        // Get directive name
-        let directive_name = match &directive_token.typ {
+        // Get directive StringId
+        let directive_id = match &directive_token.typ {
             TokenType::Ident => {
                 if let TokenValue::Ident(id) = &directive_token.value {
-                    idents.get_opt(*id).map(|s| s.to_string())
+                    Some(*id)
                 } else {
                     None
                 }
@@ -907,8 +907,8 @@ impl<'a> Preprocessor<'a> {
             _ => None,
         };
 
-        let directive = match directive_name {
-            Some(name) => name,
+        let directive_id = match directive_id {
+            Some(id) => id,
             None => {
                 // Consume rest of line
                 self.skip_to_eol(iter);
@@ -916,27 +916,28 @@ impl<'a> Preprocessor<'a> {
             }
         };
 
-        match directive.as_str() {
-            "define" => self.handle_define(iter, idents),
-            "undef" => self.handle_undef(iter, idents),
-            "ifdef" => self.handle_ifdef(iter, idents, hash_token.pos),
-            "ifndef" => self.handle_ifndef(iter, idents, hash_token.pos),
-            "if" => self.handle_if(iter, idents, hash_token.pos),
-            "elif" => self.handle_elif(iter, idents),
-            "else" => self.handle_else(iter),
-            "endif" => self.handle_endif(iter),
-            "include" => self.handle_include(iter, output, idents, hash_token, false),
-            "include_next" => self.handle_include(iter, output, idents, hash_token, true),
-            "error" => self.handle_error(iter, &hash_token.pos, idents),
-            "warning" => self.handle_warning(iter, &hash_token.pos, idents),
-            "pragma" => self.handle_pragma(iter, idents),
-            "line" => self.handle_line(iter, idents),
+        match directive_id {
+            crate::kw::DEFINE => self.handle_define(iter, idents),
+            crate::kw::UNDEF => self.handle_undef(iter, idents),
+            crate::kw::IFDEF => self.handle_ifdef(iter, idents, hash_token.pos),
+            crate::kw::IFNDEF => self.handle_ifndef(iter, idents, hash_token.pos),
+            crate::kw::IF => self.handle_if(iter, idents, hash_token.pos),
+            crate::kw::ELIF => self.handle_elif(iter, idents),
+            crate::kw::ELSE => self.handle_else(iter),
+            crate::kw::ENDIF => self.handle_endif(iter),
+            crate::kw::INCLUDE => self.handle_include(iter, output, idents, hash_token, false),
+            crate::kw::INCLUDE_NEXT => self.handle_include(iter, output, idents, hash_token, true),
+            crate::kw::PP_ERROR => self.handle_error(iter, &hash_token.pos, idents),
+            crate::kw::WARNING => self.handle_warning(iter, &hash_token.pos, idents),
+            crate::kw::PRAGMA => self.handle_pragma(iter, idents),
+            crate::kw::LINE => self.handle_line(iter, idents),
             _ => {
                 // Unknown directive
                 if !self.is_skipping() {
+                    let name = idents.get_opt(directive_id).unwrap_or("unknown");
                     diag::warning(
                         hash_token.pos,
-                        &format!("unknown preprocessor directive #{}", directive),
+                        &format!("unknown preprocessor directive #{}", name),
                     );
                 }
                 self.skip_to_eol(iter);
@@ -3205,20 +3206,38 @@ impl<'a> Preprocessor<'a> {
             return false;
         }
 
-        // Get the argument name
-        let name = if let Some(tok) = args[0].first() {
-            self.token_to_string(tok, idents)
+        // Try to get the first token from the argument list
+        let first_tok = match args[0].first() {
+            Some(tok) => tok,
+            None => return false,
+        };
+
+        // Try to get StringId directly for O(1) tag-based lookup
+        let arg_id = if let TokenValue::Ident(id) = &first_tok.value {
+            Some(*id)
         } else {
-            return false;
+            None
         };
 
         match builtin {
-            BuiltinMacro::HasAttribute => is_supported_attribute(&name),
+            BuiltinMacro::HasAttribute => {
+                if let Some(id) = arg_id {
+                    crate::kw::has_tag(id, crate::kw::SUPPORTED_ATTR)
+                } else {
+                    let name = self.token_to_string(first_tok, idents);
+                    is_supported_attribute(&name)
+                }
+            }
             BuiltinMacro::HasBuiltin => {
-                // Use centralized builtin registry
-                crate::builtins::is_builtin(name.as_str())
+                if let Some(id) = arg_id {
+                    crate::builtins::is_builtin_id(id)
+                } else {
+                    let name = self.token_to_string(first_tok, idents);
+                    crate::builtins::is_builtin(name.as_str())
+                }
             }
             BuiltinMacro::HasFeature | BuiltinMacro::HasExtension => {
+                let name = self.token_to_string(first_tok, idents);
                 // Return true for features/extensions we implement
                 matches!(
                     name.as_str(),