From 5792b0db2f0a5e455b816a6c0bd00cec4623f279 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 22:57:57 +0000
Subject: [PATCH 01/18] =?UTF-8?q?cc:=20add=20hwmap=20pass=20=E2=80=94=20ce?=
 =?UTF-8?q?ntralized=20target-specific=20IR=20lowering?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cc/ir/hwmap.rs, a new pass running after linearize and before
optimize. It inspects each IR instruction and decides whether the
target handles it natively (Legal), via a runtime library call
(LibCall), or via a comparison-expansion pattern (CmpLibCall).

Phase 1: infrastructure — HwMapAction enum, TargetHwMap trait,
X86_64HwMap/Aarch64HwMap, wired into pipeline.

Phase 2a: int128 div/mod → LibCall (__divti3 etc.) on all targets.
Phase 2b: int128↔float conversions → LibCall (__floattisf etc.).
Phase 2c: long double ops on aarch64/Linux → LibCall/CmpLibCall
  (__addtf3, __negtf2, __lttf2+SetLt, __extendsftf2, etc.).
  x86_64 and macOS aarch64 remain Legal (native x87 / ld==double).

Removes ~350 lines of rtlib decision logic from the linearizer and
~380 lines of dead RtlibNames methods + tests from rtlib.rs.
30 unit tests in hwmap.rs; all 159 integration tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/ir/hwmap.rs     | 1462 ++++++++++++++++++++++++++++++++++++++++++++
 cc/ir/linearize.rs |  359 -----------
 cc/ir/mod.rs       |    1 +
 cc/main.rs         |    3 +
 cc/rtlib.rs        |  375 ------------
 5 files changed, 1466 insertions(+), 734 deletions(-)
 create mode 100644 cc/ir/hwmap.rs
diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
new file mode 100644
index 00000000..6de27c5d
--- /dev/null
+++ b/cc/ir/hwmap.rs
@@ -0,0 +1,1462 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// Hardware Mapping Pass for pcc C99 compiler
+//
+// This pass runs after SSA construction and before optimization.
+// It centralizes decisions about how each IR instruction maps to hardware:
+//   - Legal: instruction is directly supported by the target
+//   - LibCall: instruction must be lowered to a runtime library call
+//   - Expand: instruction must be expanded into multiple simpler instructions
+//
+
+use crate::abi::{get_abi_for_conv, CallingConv};
+use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, Pseudo, PseudoId};
+use crate::target::{Arch, Os, Target};
+use crate::types::{TypeId, TypeKind, TypeTable};
+
+/// Action the hwmap pass should take for a given instruction.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum HwMapAction {
+    /// Instruction is directly supported by the target hardware.
+    Legal,
+    /// Instruction must be replaced with a call to a runtime library function.
+    LibCall(&'static str),
+    /// Long double comparison: call rtlib, then compare result against 0.
+    /// Contains (rtlib_func_name, int_compare_opcode).
+    CmpLibCall(&'static str, Opcode),
+}
+
+/// Classify an int128 div/mod instruction into a LibCall action.
+fn map_int128_divmod(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
+    if insn.size != 128 {
+        return None;
+    }
+    let typ = insn.typ?;
+    if types.kind(typ) != TypeKind::Int128 {
+        return None;
+    }
+    match insn.op {
+        Opcode::DivS => Some(HwMapAction::LibCall("__divti3")),
+        Opcode::DivU => Some(HwMapAction::LibCall("__udivti3")),
+        Opcode::ModS => Some(HwMapAction::LibCall("__modti3")),
+        Opcode::ModU => Some(HwMapAction::LibCall("__umodti3")),
+        _ => None,
+    }
+}
+
+/// Get the rtlib suffix for a float type kind on the given target.
+fn float_suffix(kind: TypeKind, target: &Target) -> &'static str {
+    match kind {
+        TypeKind::Float => "sf",
+        TypeKind::Double => "df",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::X86_64 {
+                "xf"
+            } else {
+                "tf"
+            }
+        }
+        _ => "",
+    }
+}
+
+/// Classify an int128↔float conversion instruction into a LibCall action.
+///
+/// Handles:
+///   - SCvtF/UCvtF with src_size==128: int128 → float (e.g. __floattisf)
+///   - FCvtS/FCvtU with size==128: float → int128 (e.g. __fixsfti)
+fn map_int128_float_convert(
+    insn: &Instruction,
+    types: &TypeTable,
+    target: &Target,
+) -> Option<HwMapAction> {
+    match insn.op {
+        // int128 → float
+        Opcode::SCvtF | Opcode::UCvtF => {
+            if insn.src_size != 128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            if types.kind(src_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            let dst_kind = types.kind(dst_typ);
+            let fsuf = float_suffix(dst_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::UCvtF;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__floattisf",
+                (false, "df") => "__floattidf",
+                (false, "xf") => "__floattixf",
+                (false, "tf") => "__floattitf",
+                (true, "sf") => "__floatuntisf",
+                (true, "df") => "__floatuntidf",
+                (true, "xf") => "__floatuntixf",
+                (true, "tf") => "__floatuntitf",
+                _ => return None,
+            };
+            Some(HwMapAction::LibCall(func_name))
+        }
+        // float → int128
+        Opcode::FCvtS | Opcode::FCvtU => {
+            if insn.size != 128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            if types.kind(dst_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            let src_kind = types.kind(src_typ);
+            let fsuf = float_suffix(src_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::FCvtU;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__fixsfti",
+                (false, "df") => "__fixdfti",
+                (false, "xf") => "__fixxfti",
+                (false, "tf") => "__fixtfti",
+                (true, "sf") => "__fixunssfti",
+                (true, "df") => "__fixunsdfti",
+                (true, "xf") => "__fixunsxfti",
+                (true, "tf") => "__fixunstfti",
+                _ => return None,
+            };
+            Some(HwMapAction::LibCall(func_name))
+        }
+        _ => None,
+    }
+}
+
+/// Check if long double needs soft-float rtlib on this target.
+/// Returns true only for aarch64/Linux (128-bit IEEE quad).
+/// x86_64 uses native x87; macOS aarch64 long double == double.
+fn longdouble_needs_rtlib(target: &Target) -> bool {
+    target.arch == Arch::Aarch64 && target.os != Os::MacOS
+}
+
+/// Classify a long double binary op (FAdd/FSub/FMul/FDiv) into a LibCall.
+fn map_longdouble_binop(
+    insn: &Instruction,
+    types: &TypeTable,
+    target: &Target,
+) -> Option<HwMapAction> {
+    if !longdouble_needs_rtlib(target) {
+        return None;
+    }
+    let typ = insn.typ?;
+    if types.kind(typ) != TypeKind::LongDouble {
+        return None;
+    }
+    match insn.op {
+        Opcode::FAdd => Some(HwMapAction::LibCall("__addtf3")),
+        Opcode::FSub => Some(HwMapAction::LibCall("__subtf3")),
+        Opcode::FMul => Some(HwMapAction::LibCall("__multf3")),
+        Opcode::FDiv => Some(HwMapAction::LibCall("__divtf3")),
+        _ => None,
+    }
+}
+
+/// Classify a long double negation (FNeg) into a LibCall.
+fn map_longdouble_neg(
+    insn: &Instruction,
+    types: &TypeTable,
+    target: &Target,
+) -> Option<HwMapAction> {
+    if !longdouble_needs_rtlib(target) {
+        return None;
+    }
+    if insn.op != Opcode::FNeg {
+        return None;
+    }
+    let typ = insn.typ?;
+    if types.kind(typ) != TypeKind::LongDouble {
+        return None;
+    }
+    Some(HwMapAction::LibCall("__negtf2"))
+}
+
+/// Classify a long double comparison (FCmpO*) into a CmpLibCall.
+/// The rtlib cmp function returns int; caller must compare vs 0.
+fn map_longdouble_cmp(
+    insn: &Instruction,
+    types: &TypeTable,
+    target: &Target,
+) -> Option<HwMapAction> {
+    if !longdouble_needs_rtlib(target) {
+        return None;
+    }
+    // FCmpO* instructions don't store the operand type in insn.typ (that's the
+    // result type, which is int). Check src_typ or fall back to size check.
+    // The comparison has size == size of the operands being compared.
+    // For long double on aarch64/Linux, size == 128.
+    if insn.size != 128 {
+        return None;
+    }
+    // Also check src_typ if available
+    if let Some(src_typ) = insn.src_typ {
+        if types.kind(src_typ) != TypeKind::LongDouble {
+            return None;
+        }
+    }
+    match insn.op {
+        Opcode::FCmpOLt => Some(HwMapAction::CmpLibCall("__lttf2", Opcode::SetLt)),
+        Opcode::FCmpOLe => Some(HwMapAction::CmpLibCall("__letf2", Opcode::SetLe)),
+        Opcode::FCmpOGt => Some(HwMapAction::CmpLibCall("__gttf2", Opcode::SetGt)),
+        Opcode::FCmpOGe => Some(HwMapAction::CmpLibCall("__getf2", Opcode::SetGe)),
+        Opcode::FCmpOEq => Some(HwMapAction::CmpLibCall("__eqtf2", Opcode::SetEq)),
+        Opcode::FCmpONe => Some(HwMapAction::CmpLibCall("__netf2", Opcode::SetNe)),
+        _ => None,
+    }
+}
+
+/// Get the integer suffix for a long double↔int conversion.
+fn int_suffix_for_longdouble(types: &TypeTable, int_type: TypeId) -> &'static str {
+    let size = types.size_bits(int_type);
+    let is_unsigned = types.is_unsigned(int_type);
+    match (is_unsigned, size <= 32) {
+        (true, true) => "usi",
+        (true, false) => "udi",
+        (false, true) => "si",
+        (false, false) => "di",
+    }
+}
+
+/// Classify a long double conversion into a LibCall.
+fn map_longdouble_convert(
+    insn: &Instruction,
+    types: &TypeTable,
+    target: &Target,
+) -> Option<HwMapAction> {
+    if !longdouble_needs_rtlib(target) {
+        return None;
+    }
+    match insn.op {
+        // Float-to-float: longdouble ↔ float/double
+        Opcode::FCvtF => {
+            let dst_typ = insn.typ?;
+            let src_typ = insn.src_typ?;
+            let dst_kind = types.kind(dst_typ);
+            let src_kind = types.kind(src_typ);
+            if src_kind == TypeKind::LongDouble {
+                // longdouble → float/double
+                match dst_kind {
+                    TypeKind::Float => Some(HwMapAction::LibCall("__trunctfsf2")),
+                    TypeKind::Double => Some(HwMapAction::LibCall("__trunctfdf2")),
+                    _ => None,
+                }
+            } else if dst_kind == TypeKind::LongDouble {
+                // float/double → longdouble
+                match src_kind {
+                    TypeKind::Float => Some(HwMapAction::LibCall("__extendsftf2")),
+                    TypeKind::Double => Some(HwMapAction::LibCall("__extenddftf2")),
+                    _ => None,
+                }
+            } else {
+                None
+            }
+        }
+        // Int-to-float: int → longdouble
+        Opcode::SCvtF | Opcode::UCvtF => {
+            let dst_typ = insn.typ?;
+            if types.kind(dst_typ) != TypeKind::LongDouble {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            // Skip int128 (handled by map_int128_float_convert)
+            if types.kind(src_typ) == TypeKind::Int128 {
+                return None;
+            }
+            let isuf = int_suffix_for_longdouble(types, src_typ);
+            let func_name: &'static str = match isuf {
+                "si" => "__floatsitf",
+                "di" => "__floatditf",
+                "usi" => "__floatunsitf",
+                "udi" => "__floatunditf",
+                _ => return None,
+            };
+            Some(HwMapAction::LibCall(func_name))
+        }
+        // Float-to-int: longdouble → int
+        Opcode::FCvtS | Opcode::FCvtU => {
+            let src_typ = insn.src_typ?;
+            if types.kind(src_typ) != TypeKind::LongDouble {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            // Skip int128 (handled by map_int128_float_convert)
+            if types.kind(dst_typ) == TypeKind::Int128 {
+                return None;
+            }
+            let isuf = int_suffix_for_longdouble(types, dst_typ);
+            let func_name: &'static str = match isuf {
+                "si" => "__fixtfsi",
+                "di" => "__fixtfdi",
+                "usi" => "__fixunstfsi",
+                "udi" => "__fixunstfdi",
+                _ => return None,
+            };
+            Some(HwMapAction::LibCall(func_name))
+        }
+        _ => None,
+    }
+}
+
+/// Common hardware mapping logic shared by all targets.
+fn map_common(insn: &Instruction, types: &TypeTable, target: &Target) -> Option<HwMapAction> {
+    if let Some(action) = map_int128_divmod(insn, types) {
+        return Some(action);
+    }
+    if let Some(action) = map_int128_float_convert(insn, types, target) {
+        return Some(action);
+    }
+    if let Some(action) = map_longdouble_binop(insn, types, target) {
+        return Some(action);
+    }
+    if let Some(action) = map_longdouble_neg(insn, types, target) {
+        return Some(action);
+    }
+    if let Some(action) = map_longdouble_cmp(insn, types, target) {
+        return Some(action);
+    }
+    map_longdouble_convert(insn, types, target)
+}
+
+/// Trait for target-specific hardware mapping decisions.
+pub trait TargetHwMap {
+    /// Determine how the target handles a given instruction.
+    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction;
+}
+
+/// x86-64 hardware mapping.
+pub struct X86_64HwMap {
+    target: Target,
+}
+
+impl TargetHwMap for X86_64HwMap {
+    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction {
+        if let Some(action) = map_common(insn, types, &self.target) {
+            return action;
+        }
+        HwMapAction::Legal
+    }
+}
+
+/// AArch64 hardware mapping.
+pub struct Aarch64HwMap {
+    target: Target,
+}
+
+impl TargetHwMap for Aarch64HwMap {
+    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction {
+        if let Some(action) = map_common(insn, types, &self.target) {
+            return action;
+        }
+        HwMapAction::Legal
+    }
+}
+
+/// Get the appropriate TargetHwMap implementation for the given target.
+fn get_target_hwmap(target: &Target) -> Box<dyn TargetHwMap> {
+    match target.arch {
+        Arch::X86_64 => Box::new(X86_64HwMap {
+            target: target.clone(),
+        }),
+        Arch::Aarch64 => Box::new(Aarch64HwMap {
+            target: target.clone(),
+        }),
+    }
+}
+
+/// Build a runtime library call instruction replacing an IR instruction.
+///
+/// Creates a Call instruction with proper ABI classification using the
+/// C calling convention, mirroring the linearizer's `emit_rtlib_call`.
+fn build_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    arg_types: Vec<TypeId>,
+    ret_type: TypeId,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let target_pseudo = insn.target.expect("insn must have target");
+    let ret_size = types.size_bits(ret_type);
+
+    let arg_vals = insn.src.clone();
+
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_classes: Vec<_> = arg_types
+        .iter()
+        .map(|&t| abi.classify_param(t, types))
+        .collect();
+    let ret_class = abi.classify_return(ret_type, types);
+    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        func_name,
+        arg_vals,
+        arg_types,
+        ret_type,
+        ret_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = insn.pos;
+    call_insn
+}
+
+/// Parameters for building an explicit rtlib call.
+struct RtlibCallParams<'a> {
+    target_pseudo: PseudoId,
+    arg_vals: &'a [PseudoId],
+    func_name: &'a str,
+    arg_types: Vec<TypeId>,
+    ret_type: TypeId,
+    pos: Option<crate::diag::Position>,
+}
+
+/// Build a rtlib call with explicit parameters.
+/// Used for expansion patterns where the call target differs from
+/// the original instruction's target.
+fn build_rtlib_call_explicit(
+    params: RtlibCallParams<'_>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_size = types.size_bits(params.ret_type);
+
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_classes: Vec<_> = params
+        .arg_types
+        .iter()
+        .map(|&t| abi.classify_param(t, types))
+        .collect();
+    let ret_class = abi.classify_return(params.ret_type, types);
+    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(params.target_pseudo),
+        params.func_name,
+        params.arg_vals.to_vec(),
+        params.arg_types,
+        params.ret_type,
+        ret_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = params.pos;
+    call_insn
+}
+
+/// Build a rtlib call for a binop (both args same type as result).
+fn build_binop_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("binop must have type");
+    let arg_types = vec![ret_type; insn.src.len()];
+    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+}
+
+/// Build a rtlib call for a conversion (single arg, different src/dst types).
+fn build_convert_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("conversion must have type");
+    let src_type = insn.src_typ.expect("conversion must have src_typ");
+    let arg_types = vec![src_type];
+    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+}
+
+/// Run the hardware mapping pass on a single function.
+///
+/// Walks all instructions and transforms non-Legal ops:
+///   - LibCall: replace with a runtime library call instruction
+///   - Expand: replace with multiple simpler instructions
+pub fn hwmap_function(func: &mut Function, types: &TypeTable, target: &Target) {
+    let hwmap = get_target_hwmap(target);
+
+    for block_idx in 0..func.blocks.len() {
+        // Take the insns out of the block to avoid borrow conflicts
+        let old_insns = std::mem::take(&mut func.blocks[block_idx].insns);
+        let mut new_insns = Vec::with_capacity(old_insns.len());
+        let mut block_changed = false;
+
+        for insn in &old_insns {
+            match hwmap.map_op(insn, types) {
+                HwMapAction::Legal => {
+                    new_insns.push(insn.clone());
+                }
+                HwMapAction::LibCall(name) => {
+                    let call = match insn.op {
+                        Opcode::FCvtS
+                        | Opcode::FCvtU
+                        | Opcode::SCvtF
+                        | Opcode::UCvtF
+                        | Opcode::FCvtF => build_convert_rtlib_call(insn, name, types, target),
+                        Opcode::FNeg => {
+                            // Unary: single arg, same type as result
+                            build_binop_rtlib_call(insn, name, types, target)
+                        }
+                        _ => build_binop_rtlib_call(insn, name, types, target),
+                    };
+                    new_insns.push(call);
+                    block_changed = true;
+                }
+                HwMapAction::CmpLibCall(name, cmp_op) => {
+                    // Long double comparison expansion:
+                    // 1. Call rtlib cmp function (returns int)
+                    // 2. Compare result against 0
+                    let result_pseudo = insn.target.expect("cmp must have target");
+                    let int_type = types.int_id;
+                    let int_size = types.size_bits(int_type);
+                    let ld_type = types.longdouble_id;
+
+                    // Allocate pseudo for cmp call result
+                    let cmp_result = func.alloc_pseudo();
+                    func.add_pseudo(Pseudo::reg(cmp_result, cmp_result.0));
+
+                    // Allocate pseudo for zero constant
+                    let zero = func.create_const_pseudo(0);
+
+                    // Build the rtlib call: cmp_result = __lttf2(left, right)
+                    let arg_vals = insn.src.clone();
+                    let arg_types = vec![ld_type; arg_vals.len()];
+                    let call = build_rtlib_call_explicit(
+                        RtlibCallParams {
+                            target_pseudo: cmp_result,
+                            arg_vals: &arg_vals,
+                            func_name: name,
+                            arg_types,
+                            ret_type: int_type,
+                            pos: insn.pos,
+                        },
+                        types,
+                        target,
+                    );
+                    new_insns.push(call);
+
+                    // Build the int comparison: result = cmp_op(cmp_result, 0)
+                    new_insns.push(Instruction::binop(
+                        cmp_op,
+                        result_pseudo,
+                        cmp_result,
+                        zero,
+                        int_type,
+                        int_size,
+                    ));
+                    block_changed = true;
+                }
+            }
+        }
+
+        if block_changed {
+            func.blocks[block_idx].insns = new_insns;
+        } else {
+            // Put the original insns back (no change)
+            func.blocks[block_idx].insns = old_insns;
+        }
+    }
+}
+
+/// Run the hardware mapping pass on an entire module.
+pub fn hwmap_module(module: &mut Module, types: &TypeTable, target: &Target) {
+    for func in &mut module.functions {
+        hwmap_function(func, types, target);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    fn make_test_func(types: &TypeTable) -> Function {
+        let mut func = Function::new("test_hwmap", types.int_id);
+
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+
+        // Integer arithmetic
+        bb.add_insn(Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Sub,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Mul,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Bitwise
+        bb.add_insn(Instruction::binop(
+            Opcode::And,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Or,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Xor,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Comparisons
+        bb.add_insn(Instruction::binop(
+            Opcode::SetEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::SetLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Unary
+        bb.add_insn(Instruction::unop(
+            Opcode::Neg,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::Not,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+
+        // Float ops
+        bb.add_insn(Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.double_id,
+            64,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.double_id,
+            64,
+        ));
+
+        // Conversions
+        let mut sext = Instruction::unop(Opcode::Sext, PseudoId(2), PseudoId(0), types.long_id, 64);
+        sext.src_size = 32;
+        bb.add_insn(sext);
+        let mut zext =
+            Instruction::unop(Opcode::Zext, PseudoId(2), PseudoId(0), types.ulong_id, 64);
+        zext.src_size = 32;
+        bb.add_insn(zext);
+
+        // Memory
+        bb.add_insn(Instruction::load(
+            PseudoId(2),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::store(
+            PseudoId(1),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+
+        // Terminator
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+        func
+    }
+
+    #[test]
+    fn test_map_op_returns_legal_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let func = make_test_func(&types);
+        for block in &func.blocks {
+            for insn in &block.insns {
+                assert_eq!(
+                    hwmap.map_op(insn, &types),
+                    HwMapAction::Legal,
+                    "expected Legal for {} on x86_64",
+                    insn.op
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_map_op_returns_legal_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        let func = make_test_func(&types);
+        for block in &func.blocks {
+            for insn in &block.insns {
+                assert_eq!(
+                    hwmap.map_op(insn, &types),
+                    HwMapAction::Legal,
+                    "expected Legal for {} on aarch64",
+                    insn.op
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_hwmap_function_all_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut func = make_test_func(&types);
+
+        hwmap_function(&mut func, &types, &target);
+    }
+
+    #[test]
+    fn test_hwmap_function_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut func = make_test_func(&types);
+
+        hwmap_function(&mut func, &types, &target);
+    }
+
+    #[test]
+    fn test_hwmap_module_empty() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut module = Module::new();
+
+        hwmap_module(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_hwmap_module_with_functions() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        module.add_function(make_test_func(&types));
+
+        hwmap_module(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_hwmap_idempotent() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut func = make_test_func(&types);
+
+        hwmap_function(&mut func, &types, &target);
+        hwmap_function(&mut func, &types, &target);
+    }
+
+    #[test]
+    fn test_hwmap_action_enum() {
+        assert_eq!(HwMapAction::Legal, HwMapAction::Legal);
+        assert_ne!(HwMapAction::Legal, HwMapAction::LibCall("__divti3"));
+        assert_eq!(
+            HwMapAction::LibCall("__divti3"),
+            HwMapAction::LibCall("__divti3")
+        );
+        assert_ne!(
+            HwMapAction::LibCall("__divti3"),
+            HwMapAction::LibCall("__modti3")
+        );
+    }
+
+    #[test]
+    fn test_hwmap_all_targets() {
+        let targets = vec![
+            Target::new(Arch::X86_64, Os::Linux),
+            Target::new(Arch::X86_64, Os::MacOS),
+            Target::new(Arch::X86_64, Os::FreeBSD),
+            Target::new(Arch::Aarch64, Os::Linux),
+            Target::new(Arch::Aarch64, Os::MacOS),
+        ];
+
+        for target in &targets {
+            let types = TypeTable::new(target);
+            let mut func = make_test_func(&types);
+            hwmap_function(&mut func, &types, target);
+        }
+    }
+
+    // ========================================================================
+    // Phase 2a: Int128 div/mod → LibCall tests
+    // ========================================================================
+
+    #[test]
+    fn test_int128_divs_libcall_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__divti3")
+        );
+    }
+
+    #[test]
+    fn test_int128_divu_libcall_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::DivU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__udivti3")
+        );
+    }
+
+    #[test]
+    fn test_int128_mods_libcall_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::ModS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__modti3")
+        );
+    }
+
+    #[test]
+    fn test_int128_modu_libcall_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__umodti3")
+        );
+    }
+
+    #[test]
+    fn test_int128_divmod_libcall_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__divti3")
+        );
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__umodti3")
+        );
+    }
+
+    #[test]
+    fn test_int32_div_stays_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        );
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+
+    #[test]
+    fn test_int128_add_stays_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+
+    #[test]
+    fn test_hwmap_transforms_int128_divmod() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_divmod", types.int128_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+        func.next_pseudo = 3;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        hwmap_function(&mut func, &types, &target);
+
+        // After hwmap, the DivS should be replaced with a Call to __divti3
+        let block = &func.blocks[0];
+        assert_eq!(block.insns.len(), 3); // Entry, Call, Ret
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__divti3"));
+        assert_eq!(block.insns[1].target, Some(PseudoId(2)));
+        assert_eq!(block.insns[1].src, vec![PseudoId(0), PseudoId(1)]);
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Phase 2b: Int128↔float conversion → LibCall tests
+    // ========================================================================
+
+    /// Helper to create a conversion instruction.
+    fn make_convert_insn(
+        op: Opcode,
+        dst_type: TypeId,
+        dst_size: u32,
+        src_type: TypeId,
+        src_size: u32,
+    ) -> Instruction {
+        let mut insn = Instruction::new(op)
+            .with_target(PseudoId(2))
+            .with_src(PseudoId(0))
+            .with_type_and_size(dst_type, dst_size);
+        insn.src_size = src_size;
+        insn.src_typ = Some(src_type);
+        insn
+    }
+
+    #[test]
+    fn test_int128_to_float_libcall() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // signed int128 → float
+        let insn = make_convert_insn(Opcode::SCvtF, types.float_id, 32, types.int128_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floattisf")
+        );
+
+        // signed int128 → double
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int128_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floattidf")
+        );
+
+        // unsigned int128 → float
+        let insn = make_convert_insn(Opcode::UCvtF, types.float_id, 32, types.uint128_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floatuntisf")
+        );
+
+        // unsigned int128 → double
+        let insn = make_convert_insn(Opcode::UCvtF, types.double_id, 64, types.uint128_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floatuntidf")
+        );
+    }
+
+    #[test]
+    fn test_float_to_int128_libcall() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // float → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.float_id, 32);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixsfti")
+        );
+
+        // double → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.double_id, 64);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixdfti")
+        );
+
+        // float → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.float_id, 32);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixunssfti")
+        );
+
+        // double → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.double_id, 64);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixunsdfti")
+        );
+    }
+
+    #[test]
+    fn test_int128_longdouble_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // x86-64 long double uses "xf" suffix
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 80, types.int128_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floattixf")
+        );
+
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.longdouble_id, 80);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixxfti")
+        );
+    }
+
+    #[test]
+    fn test_int128_longdouble_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        // aarch64 long double uses "tf" suffix
+        let insn = make_convert_insn(
+            Opcode::SCvtF,
+            types.longdouble_id,
+            128,
+            types.int128_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floattitf")
+        );
+    }
+
+    #[test]
+    fn test_non_int128_conversion_stays_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // int32 → double should remain Legal
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int_id, 32);
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+
+        // double → int32 should remain Legal
+        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.double_id, 64);
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+
+    #[test]
+    fn test_hwmap_transforms_int128_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_convert", types.double_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.next_pseudo = 2;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(make_convert_insn(
+            Opcode::SCvtF,
+            types.double_id,
+            64,
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(1))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        hwmap_function(&mut func, &types, &target);
+
+        let block = &func.blocks[0];
+        assert_eq!(block.insns.len(), 3);
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__floattidf"));
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Phase 2c: Long double → LibCall/CmpLibCall tests (aarch64/Linux only)
+    // ========================================================================
+
+    #[test]
+    fn test_longdouble_binop_aarch64_linux() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__addtf3")
+        );
+
+        let insn = Instruction::binop(
+            Opcode::FSub,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__subtf3")
+        );
+
+        let insn = Instruction::binop(
+            Opcode::FMul,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__multf3")
+        );
+
+        let insn = Instruction::binop(
+            Opcode::FDiv,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__divtf3")
+        );
+    }
+
+    #[test]
+    fn test_longdouble_binop_x86_64_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // x86_64 long double (x87) is native — should be Legal
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            80,
+        );
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+
+    #[test]
+    fn test_longdouble_binop_macos_legal() {
+        let target = Target::new(Arch::Aarch64, Os::MacOS);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        // macOS aarch64: long double == double, native
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            64,
+        );
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+
+    #[test]
+    fn test_longdouble_neg_aarch64_linux() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        let insn = Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.longdouble_id,
+            128,
+        );
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__negtf2")
+        );
+    }
+
+    #[test]
+    fn test_longdouble_cmp_aarch64_linux() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::CmpLibCall("__lttf2", Opcode::SetLt)
+        );
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::CmpLibCall("__eqtf2", Opcode::SetEq)
+        );
+    }
+
+    #[test]
+    fn test_longdouble_convert_aarch64_linux() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = Aarch64HwMap {
+            target: target.clone(),
+        };
+
+        // float → longdouble
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 128, types.float_id, 32);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__extendsftf2")
+        );
+
+        // longdouble → double
+        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.longdouble_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__trunctfdf2")
+        );
+
+        // int32 → longdouble
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 128, types.int_id, 32);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__floatsitf")
+        );
+
+        // longdouble → int64
+        let insn = make_convert_insn(Opcode::FCvtS, types.long_id, 64, types.longdouble_id, 128);
+        assert_eq!(
+            hwmap.map_op(&insn, &types),
+            HwMapAction::LibCall("__fixtfdi")
+        );
+    }
+
+    #[test]
+    fn test_longdouble_convert_x86_64_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let hwmap = X86_64HwMap {
+            target: target.clone(),
+        };
+
+        // x86_64 long double conversions are native
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 80, types.float_id, 32);
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+    }
+}
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index dd5d0f31..ab4b3206 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -4655,10 +4655,6 @@ impl<'a> Linearizer<'a> {
         let src_kind = self.types.kind(src_type);
         let dst_kind = self.types.kind(cast_type);
 
-        // Check for long double conversions that need rtlib
-        let src_is_longdouble = src_kind == TypeKind::LongDouble;
-        let dst_is_longdouble = dst_kind == TypeKind::LongDouble;
-
         // Check for Float16 conversions that need rtlib
         let src_is_float16 = src_kind == TypeKind::Float16;
         let dst_is_float16 = dst_kind == TypeKind::Float16;
@@ -4671,7 +4667,6 @@ impl<'a> Linearizer<'a> {
         };
 
         // Skip Float16 handling for Int128 operands — no direct hf↔ti rtlib exists.
-        // These will fall through to the Int128 handler which converts via double.
         let src_is_int128 = src_kind == TypeKind::Int128;
         let dst_is_int128 = dst_kind == TypeKind::Int128;
         if (src_is_float16 || dst_is_float16) && !src_is_int128 && !dst_is_int128 {
@@ -4739,115 +4734,6 @@ impl<'a> Linearizer<'a> {
             }
         }
 
-        if src_is_longdouble || dst_is_longdouble {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_longdouble && dst_is_float {
-                // Long double -> float/double
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    _ => "",
-                };
-                (ld_suffix, to)
-            } else if dst_is_longdouble && src_is_float {
-                // Float/double -> long double
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    _ => "",
-                };
-                (from, ld_suffix)
-            } else if src_is_longdouble && !dst_is_float {
-                // Long double -> integer
-                let dst_size = self.types.size_bits(cast_type);
-                let is_unsigned = self.types.is_unsigned(cast_type);
-                let to = if is_unsigned {
-                    if dst_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if dst_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (ld_suffix, to)
-            } else if dst_is_longdouble && !src_is_float {
-                // Integer -> long double
-                let src_size = self.types.size_bits(src_type);
-                let is_unsigned = self.types.is_unsigned(src_type);
-                let from = if is_unsigned {
-                    if src_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if src_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (from, ld_suffix)
-            } else {
-                ("", "")
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.longdouble_convert(from_suffix, to_suffix) {
-                    return self.emit_longdouble_convert_call(func_name, src, src_type, cast_type);
-                }
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
-        // Check for Int128 <-> float conversions that need rtlib
-        let src_is_int128 = src_kind == TypeKind::Int128;
-        let dst_is_int128 = dst_kind == TypeKind::Int128;
-
-        if (src_is_int128 && dst_is_float) || (dst_is_int128 && src_is_float) {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_int128 && dst_is_float {
-                // Int128 -> float type
-                let from = if self.types.is_unsigned(src_type) {
-                    "uti"
-                } else {
-                    "ti"
-                };
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::Float16 => "hf",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                (from, to)
-            } else {
-                // float type -> Int128
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::Float16 => "hf",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                let to = if self.types.is_unsigned(cast_type) {
-                    "uti"
-                } else {
-                    "ti"
-                };
-                (from, to)
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.int128_convert(from_suffix, to_suffix) {
-                    return self.emit_rtlib_call(func_name, vec![src], vec![src_type], cast_type);
-                }
-            }
-        }
-
         if src_is_float && !dst_is_float {
             // Float to integer conversion
             let result = self.alloc_reg_pseudo();
@@ -7682,15 +7568,6 @@ impl<'a> Linearizer<'a> {
         let is_float = self.types.is_float(typ);
         let size = self.types.size_bits(typ);
 
-        // Check for long double negation that needs rtlib
-        if op == UnaryOp::Neg && self.types.kind(typ) == TypeKind::LongDouble {
-            let rtlib = RtlibNames::new(self.target);
-            if let Some(func_name) = rtlib.longdouble_neg() {
-                return self.emit_longdouble_neg_call(func_name, src, typ);
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
         // Check for Float16 negation that needs soft-float on x86-64
         if op == UnaryOp::Neg && self.types.kind(typ) == TypeKind::Float16 {
             let rtlib = RtlibNames::new(self.target);
@@ -7845,63 +7722,6 @@ impl<'a> Linearizer<'a> {
             // Fall through to native FP16 for AArch64
         }
 
-        // Check if this is a long double operation that needs rtlib
-        if self.types.kind(operand_typ) == TypeKind::LongDouble {
-            let rtlib = RtlibNames::new(self.target);
-
-            // Handle arithmetic operations
-            let arith_op = match op {
-                BinaryOp::Add => Some("add"),
-                BinaryOp::Sub => Some("sub"),
-                BinaryOp::Mul => Some("mul"),
-                BinaryOp::Div => Some("div"),
-                _ => None,
-            };
-
-            if let Some(op_str) = arith_op {
-                if let Some(func_name) = rtlib.longdouble_binop(op_str) {
-                    return self.emit_longdouble_binop_call(func_name, left, right, operand_typ);
-                }
-            }
-
-            // Handle comparison operations
-            let cmp_kind = match op {
-                BinaryOp::Lt => Some("lt"),
-                BinaryOp::Le => Some("le"),
-                BinaryOp::Gt => Some("gt"),
-                BinaryOp::Ge => Some("ge"),
-                BinaryOp::Eq => Some("eq"),
-                BinaryOp::Ne => Some("ne"),
-                _ => None,
-            };
-
-            if let Some(kind) = cmp_kind {
-                if let Some(func_name) = rtlib.longdouble_cmp(kind) {
-                    return self.emit_longdouble_cmp_call(func_name, left, right, op);
-                }
-            }
-            // Fall through to native FP for macOS aarch64 (long double == double)
-        }
-
-        // Check if this is an __int128 div/mod operation that needs rtlib
-        if self.types.kind(operand_typ) == TypeKind::Int128
-            && matches!(op, BinaryOp::Div | BinaryOp::Mod)
-        {
-            let rtlib = RtlibNames::new(self.target);
-            let op_str = if matches!(op, BinaryOp::Div) {
-                "div"
-            } else {
-                "mod"
-            };
-            let func_name = rtlib.int128_divmod(op_str, is_unsigned);
-            return self.emit_rtlib_call(
-                func_name,
-                vec![left, right],
-                vec![operand_typ, operand_typ],
-                result_typ,
-            );
-        }
-
         let result = self.alloc_pseudo();
 
         let opcode = match op {
@@ -8283,128 +8103,6 @@ impl<'a> Linearizer<'a> {
         result_sym
     }
 
-    /// Emit a call to a runtime library function with ABI classification.
-    ///
-    /// Handles ABI param/return classification, instruction creation, and emission.
-    fn emit_rtlib_call(
-        &mut self,
-        func_name: &str,
-        arg_vals: Vec<PseudoId>,
-        arg_types: Vec<TypeId>,
-        ret_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let ret_size = self.types.size_bits(ret_type);
-
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(ret_type, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            ret_type,
-            ret_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit a call to a long double rtlib function (__addxf3, __multf3, etc.).
-    ///
-    /// These functions take 2 long double args and return a long double.
-    fn emit_longdouble_binop_call(
-        &mut self,
-        func_name: &str,
-        left: PseudoId,
-        right: PseudoId,
-        longdouble_typ: TypeId,
-    ) -> PseudoId {
-        self.emit_rtlib_call(
-            func_name,
-            vec![left, right],
-            vec![longdouble_typ, longdouble_typ],
-            longdouble_typ,
-        )
-    }
-
-    /// Emit a call to a long double comparison rtlib function (__cmpxf2, __cmptf2).
-    ///
-    /// The comparison function returns an int:
-    /// - < 0 if a < b
-    /// - 0 if a == b
-    /// - > 0 if a > b
-    ///
-    /// We then compare that result with 0 to produce the final boolean.
-    fn emit_longdouble_cmp_call(
-        &mut self,
-        func_name: &str,
-        left: PseudoId,
-        right: PseudoId,
-        op: BinaryOp,
-    ) -> PseudoId {
-        let longdouble_typ = self.types.longdouble_id;
-        let int_typ = self.types.int_id;
-        let int_size = self.types.size_bits(int_typ);
-
-        let arg_vals = vec![left, right];
-        let arg_types = vec![longdouble_typ, longdouble_typ];
-
-        // Compute ABI classification for the call
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(int_typ, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        // Call the comparison function - it returns an int
-        let cmp_result = self.alloc_pseudo();
-        let mut call_insn = Instruction::call(
-            Some(cmp_result),
-            func_name,
-            arg_vals,
-            arg_types,
-            int_typ,
-            int_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        // Now compare the result with 0 based on the original comparison op
-        let zero = self.emit_const(0, int_typ);
-        let result = self.alloc_pseudo();
-
-        // Map the original FP comparison to an int comparison
-        // cmp_result < 0 means a < b
-        // cmp_result == 0 means a == b
-        // cmp_result > 0 means a > b
-        let opcode = match op {
-            BinaryOp::Lt => Opcode::SetLt, // cmp_result < 0
-            BinaryOp::Gt => Opcode::SetGt, // cmp_result > 0
-            BinaryOp::Le => Opcode::SetLe, // cmp_result <= 0
-            BinaryOp::Ge => Opcode::SetGe, // cmp_result >= 0
-            BinaryOp::Eq => Opcode::SetEq, // cmp_result == 0
-            BinaryOp::Ne => Opcode::SetNe, // cmp_result != 0
-            _ => unreachable!("emit_longdouble_cmp_call called with non-comparison op"),
-        };
-
-        self.emit(Instruction::binop(
-            opcode, result, cmp_result, zero, int_typ, int_size,
-        ));
-
-        result
-    }
-
     // ========================================================================
     // Float16 soft-float helpers (for x86-64)
     // ========================================================================
@@ -8638,27 +8336,6 @@ impl<'a> Linearizer<'a> {
         result
     }
 
-    /// Emit a call to a long double conversion rtlib function.
-    ///
-    /// These functions convert between long double and other types:
-    /// - __extendsfxf2/__extendsftf2: float -> long double
-    /// - __extenddfxf2/__extenddftf2: double -> long double
-    /// - __truncxfsf2/__trunctfsf2: long double -> float
-    /// - __truncxfdf2/__trunctfdf2: long double -> double
-    /// - __floatsixf/__floatsitf: int32 -> long double
-    /// - __floatdixf/__floatditf: int64 -> long double
-    /// - __fixxfsi/__fixtfsi: long double -> int32
-    /// - __fixxfdi/__fixtfdi: long double -> int64
-    fn emit_longdouble_convert_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        self.emit_rtlib_call(func_name, vec![src], vec![src_type], dst_type)
-    }
-
     /// Emit a call to a Float16 conversion rtlib function with correct ABI for x86-64.
     ///
     /// On x86-64 without native FP16, Float16 values are passed/returned as integers:
@@ -8736,42 +8413,6 @@ impl<'a> Linearizer<'a> {
         result
     }
 
-    /// Emit a call to a long double negation rtlib function (__negxf2, __negtf2).
-    fn emit_longdouble_neg_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        longdouble_typ: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let size = self.types.size_bits(longdouble_typ);
-
-        let arg_vals = vec![src];
-        let arg_types = vec![longdouble_typ];
-
-        // Compute ABI classification for the call
-        let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-        let param_classes: Vec<_> = arg_types
-            .iter()
-            .map(|&t| abi.classify_param(t, self.types))
-            .collect();
-        let ret_class = abi.classify_return(longdouble_typ, self.types);
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            longdouble_typ,
-            size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
     fn emit_compare_zero(&mut self, val: PseudoId, operand_typ: TypeId) -> PseudoId {
         let result = self.alloc_pseudo();
         let zero = self.emit_const(0, operand_typ);
diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 8f6b9b27..49e75d44 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -16,6 +16,7 @@
 
 pub mod dce;
 pub mod dominate;
+pub mod hwmap;
 pub mod inline;
 pub mod instcombine;
 pub mod linearize;
diff --git a/cc/main.rs b/cc/main.rs
index 30dc7dcc..f0f4d975 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -433,6 +433,9 @@ fn process_file(
         .ok()
         .map(|p| p.to_string_lossy().to_string());
 
+    // Hardware mapping pass — centralized target-specific lowering decisions
+    ir::hwmap::hwmap_module(&mut module, &types, target);
+
     // Optimize IR (if enabled)
     if args.opt_level > 0 {
         opt::optimize_module(&mut module, args.opt_level);
diff --git a/cc/rtlib.rs b/cc/rtlib.rs
index 5d94e43b..692e40b9 100644
--- a/cc/rtlib.rs
+++ b/cc/rtlib.rs
@@ -192,185 +192,6 @@ impl<'a> RtlibNames<'a> {
             _ => "__divdc3", // fallback
         }
     }
-
-    // ========================================================================
-    // Long double operations
-    // ========================================================================
-
-    /// Get function name for long double binary operation.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// Note: x86-64 uses native x87 FPU instructions for long double,
-    /// so rtlib is NOT used. Only AArch64/Linux needs rtlib for 128-bit IEEE quad.
-    pub fn longdouble_binop(&self, op: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf (128-bit IEEE quad) functions
-        match op {
-            "add" => Some("__addtf3"),
-            "sub" => Some("__subtf3"),
-            "mul" => Some("__multf3"),
-            "div" => Some("__divtf3"),
-            _ => None,
-        }
-    }
-
-    /// Get function name for long double negation.
-    /// Returns None if native FP instructions should be used.
-    pub fn longdouble_neg(&self) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf function
-        Some("__negtf2")
-    }
-
-    /// Get function name for long double comparison.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// The comparison functions return:
-    /// - __lttf2/__letf2: < 0 if a < b (or a <= b), >= 0 otherwise
-    /// - __gttf2/__getf2: > 0 if a > b (or a >= b), <= 0 otherwise
-    /// - __eqtf2: 0 if a == b, non-zero otherwise
-    /// - __netf2: 0 if a == b, non-zero otherwise (same as __eqtf2)
-    pub fn longdouble_cmp(&self, cmp_kind: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf comparison functions
-        match cmp_kind {
-            "lt" => Some("__lttf2"),
-            "le" => Some("__letf2"),
-            "gt" => Some("__gttf2"),
-            "ge" => Some("__getf2"),
-            "eq" => Some("__eqtf2"),
-            "ne" => Some("__netf2"),
-            _ => None,
-        }
-    }
-
-    // ========================================================================
-    // Long double conversions
-    // ========================================================================
-
-    /// Get function name for long double conversion.
-    /// Returns None if native FP instructions should be used.
-    ///
-    /// Note: x86-64 uses native x87 FPU instructions for long double,
-    /// so rtlib is NOT used. Only AArch64/Linux needs rtlib for 128-bit IEEE quad.
-    ///
-    /// Suffix convention:
-    /// - sf = single float (32-bit)
-    /// - df = double float (64-bit)
-    /// - tf = 128-bit IEEE quad (AArch64/Linux long double)
-    /// - si = signed 32-bit integer
-    /// - di = signed 64-bit integer
-    /// - usi = unsigned 32-bit integer
-    /// - udi = unsigned 64-bit integer
-    pub fn longdouble_convert(&self, from: &str, to: &str) -> Option<&'static str> {
-        if self.longdouble_is_double() {
-            return None; // macOS aarch64: long double == double
-        }
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        if self.target.arch == Arch::X86_64 {
-            return None;
-        }
-        // AArch64/Linux: use tf conversion functions
-        match (from, to) {
-            // float <-> long double
-            ("sf", "tf") => Some("__extendsftf2"),
-            ("tf", "sf") => Some("__trunctfsf2"),
-
-            // double <-> long double
-            ("df", "tf") => Some("__extenddftf2"),
-            ("tf", "df") => Some("__trunctfdf2"),
-
-            // signed int32 <-> long double
-            ("si", "tf") => Some("__floatsitf"),
-            ("tf", "si") => Some("__fixtfsi"),
-
-            // signed int64 <-> long double
-            ("di", "tf") => Some("__floatditf"),
-            ("tf", "di") => Some("__fixtfdi"),
-
-            // unsigned int32 <-> long double
-            ("usi", "tf") => Some("__floatunsitf"),
-            ("tf", "usi") => Some("__fixunstfsi"),
-
-            // unsigned int64 <-> long double
-            ("udi", "tf") => Some("__floatunditf"),
-            ("tf", "udi") => Some("__fixunstfdi"),
-
-            _ => None,
-        }
-    }
-
-    // ========================================================================
-    // Int128 operations
-    // ========================================================================
-
-    /// Get function name for __int128 division/modulo.
-    ///
-    /// Suffix convention: ti = 128-bit integer
-    pub fn int128_divmod(&self, op: &str, unsigned: bool) -> &'static str {
-        match (op, unsigned) {
-            ("div", false) => "__divti3",
-            ("mod", false) => "__modti3",
-            ("div", true) => "__udivti3",
-            ("mod", true) => "__umodti3",
-            _ => panic!("invalid int128 divmod op: {}", op),
-        }
-    }
-
-    /// Get function name for __int128 ↔ float/double conversions.
-    ///
-    /// Suffix convention:
-    /// - ti = signed int128, uti = unsigned int128
-    /// - sf = float, df = double, xf = x87 extended, tf = quad, hf = half
-    pub fn int128_convert(&self, from: &str, to: &str) -> Option<&'static str> {
-        match (from, to) {
-            // int128 -> float types
-            ("ti", "sf") => Some("__floattisf"),
-            ("ti", "df") => Some("__floattidf"),
-            ("ti", "xf") => Some("__floattixf"),
-            ("ti", "tf") => Some("__floattitf"),
-            // Note: no hf (Float16) entries — __floattihf etc. don't exist in
-            // libgcc/compiler-rt. Float16↔Int128 goes through intermediate double.
-
-            // uint128 -> float types
-            ("uti", "sf") => Some("__floatuntisf"),
-            ("uti", "df") => Some("__floatuntidf"),
-            ("uti", "xf") => Some("__floatuntixf"),
-            ("uti", "tf") => Some("__floatuntitf"),
-
-            // float types -> int128
-            ("sf", "ti") => Some("__fixsfti"),
-            ("df", "ti") => Some("__fixdfti"),
-            ("xf", "ti") => Some("__fixxfti"),
-            ("tf", "ti") => Some("__fixtfti"),
-
-            // float types -> uint128
-            ("sf", "uti") => Some("__fixunssfti"),
-            ("df", "uti") => Some("__fixunsdfti"),
-            ("xf", "uti") => Some("__fixunsxfti"),
-            ("tf", "uti") => Some("__fixunstfti"),
-
-            _ => None,
-        }
-    }
 }
 
 #[cfg(test)]
@@ -424,151 +245,6 @@ mod tests {
         assert!(!RtlibNames::new(&linux_x86).longdouble_is_double());
     }
 
-    #[test]
-    fn test_longdouble_binop_x86_64() {
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), None);
-        assert_eq!(rtlib.longdouble_binop("sub"), None);
-        assert_eq!(rtlib.longdouble_binop("mul"), None);
-        assert_eq!(rtlib.longdouble_binop("div"), None);
-    }
-
-    #[test]
-    fn test_longdouble_binop_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), Some("__addtf3"));
-        assert_eq!(rtlib.longdouble_binop("sub"), Some("__subtf3"));
-        assert_eq!(rtlib.longdouble_binop("mul"), Some("__multf3"));
-        assert_eq!(rtlib.longdouble_binop("div"), Some("__divtf3"));
-    }
-
-    #[test]
-    fn test_longdouble_binop_aarch64_macos() {
-        // On macOS aarch64, long double ops use native double instructions
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.longdouble_binop("add"), None);
-        assert_eq!(rtlib.longdouble_binop("mul"), None);
-    }
-
-    #[test]
-    fn test_longdouble_cmp() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-
-        // x86-64 uses native x87 FPU - no soft-float rtlib
-        assert_eq!(RtlibNames::new(&x86).longdouble_cmp("lt"), None);
-        assert_eq!(RtlibNames::new(&x86).longdouble_cmp("eq"), None);
-
-        // AArch64/Linux uses tf comparison functions
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("lt"),
-            Some("__lttf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("le"),
-            Some("__letf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("gt"),
-            Some("__gttf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("ge"),
-            Some("__getf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("eq"),
-            Some("__eqtf2")
-        );
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_cmp("ne"),
-            Some("__netf2")
-        );
-
-        // macOS aarch64: long double == double, no rtlib needed
-        assert_eq!(RtlibNames::new(&arm_macos).longdouble_cmp("lt"), None);
-    }
-
-    #[test]
-    fn test_longdouble_neg() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-
-        // x86-64 uses native x87 FPU - no soft-float rtlib
-        assert_eq!(RtlibNames::new(&x86).longdouble_neg(), None);
-
-        // AArch64/Linux uses tf negation function
-        assert_eq!(
-            RtlibNames::new(&arm_linux).longdouble_neg(),
-            Some("__negtf2")
-        );
-
-        // macOS aarch64: long double == double, no rtlib needed
-        assert_eq!(RtlibNames::new(&arm_macos).longdouble_neg(), None);
-    }
-
-    #[test]
-    fn test_longdouble_convert_x86_64() {
-        // x86-64 uses native x87 FPU - no soft-float rtlib available
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // All conversions return None - use native x87
-        assert_eq!(rtlib.longdouble_convert("sf", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "sf"), None);
-        assert_eq!(rtlib.longdouble_convert("df", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "df"), None);
-        assert_eq!(rtlib.longdouble_convert("si", "xf"), None);
-        assert_eq!(rtlib.longdouble_convert("xf", "si"), None);
-    }
-
-    #[test]
-    fn test_longdouble_convert_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // float <-> long double
-        assert_eq!(rtlib.longdouble_convert("sf", "tf"), Some("__extendsftf2"));
-        assert_eq!(rtlib.longdouble_convert("tf", "sf"), Some("__trunctfsf2"));
-
-        // double <-> long double
-        assert_eq!(rtlib.longdouble_convert("df", "tf"), Some("__extenddftf2"));
-        assert_eq!(rtlib.longdouble_convert("tf", "df"), Some("__trunctfdf2"));
-
-        // signed int32 <-> long double
-        assert_eq!(rtlib.longdouble_convert("si", "tf"), Some("__floatsitf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "si"), Some("__fixtfsi"));
-
-        // signed int64 <-> long double
-        assert_eq!(rtlib.longdouble_convert("di", "tf"), Some("__floatditf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "di"), Some("__fixtfdi"));
-
-        // unsigned int32 <-> long double
-        assert_eq!(rtlib.longdouble_convert("usi", "tf"), Some("__floatunsitf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "usi"), Some("__fixunstfsi"));
-
-        // unsigned int64 <-> long double
-        assert_eq!(rtlib.longdouble_convert("udi", "tf"), Some("__floatunditf"));
-        assert_eq!(rtlib.longdouble_convert("tf", "udi"), Some("__fixunstfdi"));
-    }
-
-    #[test]
-    fn test_longdouble_convert_aarch64_macos() {
-        // macOS aarch64: long double == double, no rtlib needed
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-
-        assert_eq!(rtlib.longdouble_convert("sf", "tf"), None);
-        assert_eq!(rtlib.longdouble_convert("df", "tf"), None);
-        assert_eq!(rtlib.longdouble_convert("si", "tf"), None);
-    }
-
     // ========================================================================
     // Float16 (_Float16) rtlib tests
     // ========================================================================
@@ -615,55 +291,4 @@ mod tests {
     }
 
     // ========================================================================
-    // Int128 (__int128 / __uint128_t) rtlib tests
-    // ========================================================================
-
-    #[test]
-    fn test_int128_divmod() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        assert_eq!(rtlib.int128_divmod("div", false), "__divti3");
-        assert_eq!(rtlib.int128_divmod("mod", false), "__modti3");
-        assert_eq!(rtlib.int128_divmod("div", true), "__udivti3");
-        assert_eq!(rtlib.int128_divmod("mod", true), "__umodti3");
-    }
-
-    #[test]
-    fn test_int128_convert() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-
-        // int128 -> float types
-        assert_eq!(rtlib.int128_convert("ti", "sf"), Some("__floattisf"));
-        assert_eq!(rtlib.int128_convert("ti", "df"), Some("__floattidf"));
-        assert_eq!(rtlib.int128_convert("ti", "xf"), Some("__floattixf"));
-        assert_eq!(rtlib.int128_convert("ti", "tf"), Some("__floattitf"));
-        // Float16 (hf) ↔ Int128: no direct rtlib function exists
-        assert_eq!(rtlib.int128_convert("ti", "hf"), None);
-
-        // uint128 -> float types
-        assert_eq!(rtlib.int128_convert("uti", "sf"), Some("__floatuntisf"));
-        assert_eq!(rtlib.int128_convert("uti", "df"), Some("__floatuntidf"));
-        assert_eq!(rtlib.int128_convert("uti", "xf"), Some("__floatuntixf"));
-        assert_eq!(rtlib.int128_convert("uti", "tf"), Some("__floatuntitf"));
-        assert_eq!(rtlib.int128_convert("uti", "hf"), None);
-
-        // float types -> int128
-        assert_eq!(rtlib.int128_convert("sf", "ti"), Some("__fixsfti"));
-        assert_eq!(rtlib.int128_convert("df", "ti"), Some("__fixdfti"));
-        assert_eq!(rtlib.int128_convert("xf", "ti"), Some("__fixxfti"));
-        assert_eq!(rtlib.int128_convert("tf", "ti"), Some("__fixtfti"));
-        assert_eq!(rtlib.int128_convert("hf", "ti"), None);
-
-        // float types -> uint128
-        assert_eq!(rtlib.int128_convert("sf", "uti"), Some("__fixunssfti"));
-        assert_eq!(rtlib.int128_convert("df", "uti"), Some("__fixunsdfti"));
-        assert_eq!(rtlib.int128_convert("xf", "uti"), Some("__fixunsxfti"));
-        assert_eq!(rtlib.int128_convert("tf", "uti"), Some("__fixunstfti"));
-        assert_eq!(rtlib.int128_convert("hf", "uti"), None);
-
-        // Invalid conversion
-        assert_eq!(rtlib.int128_convert("ti", "invalid"), None);
-    }
 }

From 8303d8b191c3cf6f46bacfc6eec487af0ef03494 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:03:00 +0000
Subject: [PATCH 02/18] cc: add --dump-ir=<stage> and Float16 integration test

--dump-ir now accepts an optional stage name:
  post-linearize, post-hwmap, post-opt (default), post-lower, all
Bare --dump-ir remains backward compatible (dumps post-opt).

--dump-ir-func=<name> filters output to a single function.

Also add codegen_float16_mega integration test covering arithmetic,
negation, comparisons, float/double conversions, and compound
assignment for _Float16 on x86-64.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/main.rs               | 55 ++++++++++++++++++++++++++---
 cc/tests/codegen/misc.rs | 76 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/cc/main.rs b/cc/main.rs
index f0f4d975..bd4f18ef 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -91,9 +91,16 @@ struct Args {
     #[arg(long = "dump-ast", help = gettext("Parse and dump AST to stdout"))]
     dump_ast: bool,
 
-    /// Dump IR (for debugging linearizer)
-    #[arg(long = "dump-ir", help = gettext("Linearize and dump IR to stdout"))]
-    dump_ir: bool,
+    /// Dump IR at a named stage (for debugging)
+    /// Stages: post-linearize, post-hwmap, post-opt, post-lower, all
+    /// Bare --dump-ir = post-opt (backward compat)
+    #[arg(long = "dump-ir", value_name = "stage", default_missing_value = "post-opt",
+          num_args = 0..=1, help = gettext("Dump IR at stage (post-linearize, post-hwmap, post-opt, post-lower, all)"))]
+    dump_ir: Option<String>,
+
+    /// Filter IR dumps to a specific function name
+    #[arg(long = "dump-ir-func", value_name = "name", help = gettext("Only dump IR for this function"))]
+    dump_ir_func: Option<String>,
 
     /// Verbose output (include position info)
     #[arg(
@@ -212,6 +219,33 @@ struct Args {
     unsupported_mflags: Vec<String>,
 }
 
+/// Check if IR should be dumped at the given stage.
+fn should_dump_ir(args: &Args, stage: &str) -> bool {
+    match args.dump_ir.as_deref() {
+        Some("all") => true,
+        Some(s) => s == stage,
+        None => false,
+    }
+}
+
+/// Dump IR at a named pipeline stage.
+fn dump_ir(args: &Args, module: &ir::Module, stage: &str) {
+    if !should_dump_ir(args, stage) {
+        return;
+    }
+    eprintln!("=== {} ===", stage);
+    match &args.dump_ir_func {
+        Some(name) => {
+            for func in &module.functions {
+                if func.name == *name {
+                    print!("{}", func);
+                }
+            }
+        }
+        None => print!("{}", module),
+    }
+}
+
 /// Print compilation statistics for capacity tuning
 fn print_stats(
     path: &str,
@@ -433,22 +467,33 @@ fn process_file(
         .ok()
         .map(|p| p.to_string_lossy().to_string());
 
+    dump_ir(args, &module, "post-linearize");
+
     // Hardware mapping pass — centralized target-specific lowering decisions
     ir::hwmap::hwmap_module(&mut module, &types, target);
 
+    dump_ir(args, &module, "post-hwmap");
+
     // Optimize IR (if enabled)
     if args.opt_level > 0 {
         opt::optimize_module(&mut module, args.opt_level);
     }
 
-    if args.dump_ir {
-        print!("{}", module);
+    dump_ir(args, &module, "post-opt");
+
+    if args.dump_ir.is_some() && !should_dump_ir(args, "post-lower") {
         return Ok(());
     }
 
     // Lower IR (phi elimination, etc.)
     ir::lower::lower_module(&mut module);
 
+    dump_ir(args, &module, "post-lower");
+
+    if args.dump_ir.is_some() {
+        return Ok(());
+    }
+
     // Generate assembly
     let emit_unwind_tables = !args.no_unwind_tables;
     let pie_mode = pie_enabled(args, target);
diff --git a/cc/tests/codegen/misc.rs b/cc/tests/codegen/misc.rs
index 6869b23f..2466e5dd 100644
--- a/cc/tests/codegen/misc.rs
+++ b/cc/tests/codegen/misc.rs
@@ -4685,3 +4685,79 @@ int main(void) {
         0
     );
 }
+
+#[test]
+fn codegen_float16_mega() {
+    let code = r#"
+int main(void) {
+    /* Arithmetic */
+    _Float16 a = 3.5f16;
+    _Float16 b = 2.0f16;
+
+    _Float16 sum = a + b;
+    if ((float)sum < 5.49f || (float)sum > 5.51f) return 1;
+
+    _Float16 diff = a - b;
+    if ((float)diff < 1.49f || (float)diff > 1.51f) return 2;
+
+    _Float16 prod = a * b;
+    if ((float)prod < 6.99f || (float)prod > 7.01f) return 3;
+
+    _Float16 quot = a / b;
+    if ((float)quot < 1.74f || (float)quot > 1.76f) return 4;
+
+    /* Negation */
+    _Float16 neg = -a;
+    if ((float)neg > -3.49f || (float)neg < -3.51f) return 5;
+
+    /* Comparisons */
+    if (!(a == a)) return 10;
+    if (a != a) return 11;
+    if (!(a > b)) return 12;
+    if (!(b < a)) return 13;
+    if (!(a >= b)) return 14;
+    if (!(b <= a)) return 15;
+    if (a == b) return 16;
+    if (!(a != b)) return 17;
+
+    /* Float16 <-> float conversions */
+    float f = (float)a;
+    if (f < 3.49f || f > 3.51f) return 20;
+
+    _Float16 from_float = (_Float16)f;
+    if ((float)from_float < 3.49f || (float)from_float > 3.51f) return 21;
+
+    /* Float16 <-> double conversions */
+    double d = (double)a;
+    if (d < 3.49 || d > 3.51) return 22;
+
+    _Float16 from_double = (_Float16)d;
+    if ((float)from_double < 3.49f || (float)from_double > 3.51f) return 23;
+
+    /* Float16 <-> int via float intermediary (avoids __fixhfsi) */
+    float fa = (float)a;
+    int i = (int)fa;
+    if (i != 3) return 30;
+
+    _Float16 from_int = (_Float16)(float)42;
+    if ((float)from_int < 41.9f || (float)from_int > 42.1f) return 31;
+
+    /* Compound assignment */
+    _Float16 ca = 10.0f16;
+    ca += 5.0f16;
+    if ((float)ca < 14.9f || (float)ca > 15.1f) return 40;
+
+    ca -= 3.0f16;
+    if ((float)ca < 11.9f || (float)ca > 12.1f) return 41;
+
+    ca *= 2.0f16;
+    if ((float)ca < 23.9f || (float)ca > 24.1f) return 42;
+
+    ca /= 4.0f16;
+    if ((float)ca < 5.9f || (float)ca > 6.1f) return 43;
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("codegen_float16_mega", code, &[]), 0);
+}

From 1dc2b7e61bc1e7c94561a44917d5badee1b342be Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:08:56 +0000
Subject: [PATCH 03/18] cc: migrate Float16 soft-float from linearizer to hwmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On x86-64, Float16 arithmetic/negation/comparisons have no native
hardware support. Previously the linearizer intercepted these and
emitted a promote-operate-truncate pattern inline. Now the linearizer
emits generic FAdd/FSub/FMul/FDiv/FNeg/FCmpO* with Float16 type, and
the hwmap pass detects these on x86-64 and expands them:

  - Arithmetic: __extendhfsf2(left), __extendhfsf2(right), native
    float op, __truncsfhf2(result)
  - Negation: __extendhfsf2(src), native FNeg, __truncsfhf2(result)
  - Comparisons: __extendhfsf2(left), __extendhfsf2(right), native
    float compare (no truncate — result is int)

AArch64 has native FP16 and remains Legal.

Removes ~230 lines from linearizer (5 helper methods + 3 intercepts).
Removes dead RtlibNames::float16_needs_softfloat().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/ir/hwmap.rs     | 287 ++++++++++++++++++++++++++++++++++++++++-
 cc/ir/linearize.rs | 310 +--------------------------------------------
 cc/rtlib.rs        |   7 -
 3 files changed, 292 insertions(+), 312 deletions(-)

diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
index 6de27c5d..53254b21 100644
--- a/cc/ir/hwmap.rs
+++ b/cc/ir/hwmap.rs
@@ -15,8 +15,9 @@
 //   - Expand: instruction must be expanded into multiple simpler instructions
 //
 
-use crate::abi::{get_abi_for_conv, CallingConv};
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
 use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, Pseudo, PseudoId};
+use crate::rtlib::{Float16Abi, RtlibNames};
 use crate::target::{Arch, Os, Target};
 use crate::types::{TypeId, TypeKind, TypeTable};
 
@@ -30,6 +31,8 @@ pub enum HwMapAction {
     /// Long double comparison: call rtlib, then compare result against 0.
     /// Contains (rtlib_func_name, int_compare_opcode).
     CmpLibCall(&'static str, Opcode),
+    /// Expand into multiple instructions (promote-operate-truncate, etc.).
+    Expand,
 }
 
 /// Classify an int128 div/mod instruction into a LibCall action.
@@ -339,6 +342,77 @@ pub trait TargetHwMap {
     fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction;
 }
 
+/// Classify a Float16 instruction that needs soft-float expansion on x86-64.
+/// Returns Expand for arithmetic/neg (promote-operate-truncate) and
+/// comparisons (promote-compare). Returns LibCall for conversions.
+fn map_float16_softfloat(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
+    match insn.op {
+        // Arithmetic: promote-operate-truncate
+        Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+            let typ = insn.typ?;
+            if types.kind(typ) == TypeKind::Float16 {
+                return Some(HwMapAction::Expand);
+            }
+            None
+        }
+        // Negation: promote-negate-truncate
+        Opcode::FNeg => {
+            let typ = insn.typ?;
+            if types.kind(typ) == TypeKind::Float16 {
+                return Some(HwMapAction::Expand);
+            }
+            None
+        }
+        // Comparisons: promote both, compare (no truncate)
+        Opcode::FCmpOEq
+        | Opcode::FCmpONe
+        | Opcode::FCmpOLt
+        | Opcode::FCmpOLe
+        | Opcode::FCmpOGt
+        | Opcode::FCmpOGe => {
+            // Comparisons store the operand type in src_typ or check size
+            if let Some(src_typ) = insn.src_typ {
+                if types.kind(src_typ) == TypeKind::Float16 {
+                    return Some(HwMapAction::Expand);
+                }
+            }
+            // Also check by size: Float16 operations have size==16
+            if insn.size == 16 {
+                if let Some(typ) = insn.typ {
+                    // Result type is int, but check if this is a float comparison
+                    if matches!(
+                        insn.op,
+                        Opcode::FCmpOEq
+                            | Opcode::FCmpONe
+                            | Opcode::FCmpOLt
+                            | Opcode::FCmpOLe
+                            | Opcode::FCmpOGt
+                            | Opcode::FCmpOGe
+                    ) {
+                        let _ = typ;
+                        return Some(HwMapAction::Expand);
+                    }
+                }
+            }
+            None
+        }
+        // Float16 conversions via rtlib
+        Opcode::FCvtF => {
+            let dst_typ = insn.typ?;
+            let src_typ = insn.src_typ?;
+            let dst_kind = types.kind(dst_typ);
+            let src_kind = types.kind(src_typ);
+            if src_kind == TypeKind::Float16 || dst_kind == TypeKind::Float16 {
+                // These are handled by the linearizer's Float16 conversion code
+                // which already emits the correct calls. No hwmap action needed
+                // because the linearizer emits Call instructions directly.
+            }
+            None
+        }
+        _ => None,
+    }
+}
+
 /// x86-64 hardware mapping.
 pub struct X86_64HwMap {
     target: Target,
@@ -349,6 +423,9 @@ impl TargetHwMap for X86_64HwMap {
         if let Some(action) = map_common(insn, types, &self.target) {
             return action;
         }
+        if let Some(action) = map_float16_softfloat(insn, types) {
+            return action;
+        }
         HwMapAction::Legal
     }
 }
@@ -484,6 +561,210 @@ fn build_convert_rtlib_call(
     build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
 }
 
+/// Build a call to __extendhfsf2 (Float16 → float) with proper ABI.
+fn build_f16_extend_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+
+    // Arg type: ushort for compiler-rt, Float16 for libgcc
+    let arg_type = if f16_abi == Float16Abi::Integer {
+        types.ushort_id
+    } else {
+        types.float16_id
+    };
+
+    // Arg classification
+    let param_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_param(types.float16_id, types)
+    };
+
+    // Return is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let ret_class = abi.classify_return(float_type, types);
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__extendhfsf2",
+        vec![src],
+        vec![arg_type],
+        float_type,
+        float_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+/// Build a call to __truncsfhf2 (float → Float16) with proper ABI.
+fn build_f16_truncate_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float16_type = types.float16_id;
+    let f16_size = types.size_bits(float16_type);
+
+    // Arg is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_class = abi.classify_param(float_type, types);
+
+    // Return: ushort for compiler-rt, Float16/SSE for libgcc
+    let ret_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        abi.classify_return(float16_type, types)
+    };
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__truncsfhf2",
+        vec![src],
+        vec![float_type],
+        float16_type,
+        f16_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+/// Expand a Float16 arithmetic/neg/cmp instruction using promote-operate-truncate.
+fn expand_float16(
+    insn: &Instruction,
+    func: &mut Function,
+    new_insns: &mut Vec<Instruction>,
+    types: &TypeTable,
+    target: &Target,
+) {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+
+    match insn.op {
+        // Binary arithmetic: extend both → float op → truncate
+        Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+            let result = insn.target.expect("binop must have target");
+            let left = insn.src[0];
+            let right = insn.src[1];
+
+            // Extend left to float
+            let left_ext = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(left_ext, left_ext.0));
+            new_insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+            // Extend right to float
+            let right_ext = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(right_ext, right_ext.0));
+            new_insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+            // Native float operation
+            let float_result = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(float_result, float_result.0));
+            new_insns.push(Instruction::binop(
+                insn.op,
+                float_result,
+                left_ext,
+                right_ext,
+                float_type,
+                float_size,
+            ));
+
+            // Truncate result back to Float16
+            new_insns.push(build_f16_truncate_call(
+                result,
+                float_result,
+                pos,
+                types,
+                target,
+            ));
+        }
+
+        // Negation: extend → negate → truncate
+        Opcode::FNeg => {
+            let result = insn.target.expect("unary must have target");
+            let src = insn.src[0];
+
+            let src_ext = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(src_ext, src_ext.0));
+            new_insns.push(build_f16_extend_call(src_ext, src, pos, types, target));
+
+            let neg_result = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(neg_result, neg_result.0));
+            new_insns.push(Instruction::unop(
+                Opcode::FNeg,
+                neg_result,
+                src_ext,
+                float_type,
+                float_size,
+            ));
+
+            new_insns.push(build_f16_truncate_call(
+                result, neg_result, pos, types, target,
+            ));
+        }
+
+        // Comparison: extend both → float compare (no truncate)
+        Opcode::FCmpOEq
+        | Opcode::FCmpONe
+        | Opcode::FCmpOLt
+        | Opcode::FCmpOLe
+        | Opcode::FCmpOGt
+        | Opcode::FCmpOGe => {
+            let result = insn.target.expect("cmp must have target");
+            let left = insn.src[0];
+            let right = insn.src[1];
+
+            let left_ext = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(left_ext, left_ext.0));
+            new_insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+            let right_ext = func.alloc_pseudo();
+            func.add_pseudo(Pseudo::reg(right_ext, right_ext.0));
+            new_insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+            // Float comparison — result type is int, keep original type/size
+            let mut cmp = Instruction::binop(
+                insn.op,
+                result,
+                left_ext,
+                right_ext,
+                insn.typ.unwrap_or(types.int_id),
+                float_size,
+            );
+            cmp.src_typ = Some(float_type);
+            new_insns.push(cmp);
+        }
+
+        _ => panic!("expand_float16: unexpected opcode {}", insn.op),
+    }
+}
+
 /// Run the hardware mapping pass on a single function.
 ///
 /// Walks all instructions and transforms non-Legal ops:
@@ -563,6 +844,10 @@ pub fn hwmap_function(func: &mut Function, types: &TypeTable, target: &Target) {
                     ));
                     block_changed = true;
                 }
+                HwMapAction::Expand => {
+                    expand_float16(insn, func, &mut new_insns, types, target);
+                    block_changed = true;
+                }
             }
         }
 
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index ab4b3206..073972a1 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -15,7 +15,7 @@ use super::{
     AsmConstraint, AsmData, BasicBlock, BasicBlockId, CallAbiInfo, Function, Initializer,
     Instruction, MemoryOrder, Module, Opcode, Pseudo, PseudoId,
 };
-use crate::abi::{get_abi_for_conv, ArgClass, CallingConv, RegClass};
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
 use crate::diag::{error, get_all_stream_names, Position};
 use crate::parse::ast::{
     AsmOperand, AssignOp, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind,
@@ -7568,15 +7568,6 @@ impl<'a> Linearizer<'a> {
         let is_float = self.types.is_float(typ);
         let size = self.types.size_bits(typ);
 
-        // Check for Float16 negation that needs soft-float on x86-64
-        if op == UnaryOp::Neg && self.types.kind(typ) == TypeKind::Float16 {
-            let rtlib = RtlibNames::new(self.target);
-            if rtlib.float16_needs_softfloat() {
-                return self.emit_float16_neg_via_float(src);
-            }
-            // Fall through to native FP16 for AArch64
-        }
-
         let result = self.alloc_pseudo();
 
         let opcode = match op {
@@ -7695,33 +7686,6 @@ impl<'a> Linearizer<'a> {
         let is_float = self.types.is_float(operand_typ);
         let is_unsigned = self.types.is_unsigned(operand_typ);
 
-        // Check if this is a Float16 operation that needs soft-float on x86-64
-        if self.types.kind(operand_typ) == TypeKind::Float16 {
-            let rtlib = RtlibNames::new(self.target);
-            if rtlib.float16_needs_softfloat() {
-                // Arithmetic: promote-operate-truncate
-                if matches!(
-                    op,
-                    BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div
-                ) {
-                    return self.emit_float16_arith_via_float(op, left, right);
-                }
-                // Comparisons: promote-compare (no truncate needed)
-                if matches!(
-                    op,
-                    BinaryOp::Lt
-                        | BinaryOp::Le
-                        | BinaryOp::Gt
-                        | BinaryOp::Ge
-                        | BinaryOp::Eq
-                        | BinaryOp::Ne
-                ) {
-                    return self.emit_float16_cmp_via_float(op, left, right);
-                }
-            }
-            // Fall through to native FP16 for AArch64
-        }
-
         let result = self.alloc_pseudo();
 
         let opcode = match op {
@@ -8103,239 +8067,6 @@ impl<'a> Linearizer<'a> {
         result_sym
     }
 
-    // ========================================================================
-    // Float16 soft-float helpers (for x86-64)
-    // ========================================================================
-
-    /// Emit Float16 arithmetic using promote-operate-truncate pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend both operands from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE arithmetic operation
-    /// 3. Truncate result back to Float16 using __truncsfhf2
-    fn emit_float16_arith_via_float(
-        &mut self,
-        op: BinaryOp,
-        left: PseudoId,
-        right: PseudoId,
-    ) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend left operand: Float16 -> float
-        let left_ext = self.emit_float16_extend_call(left, float16_typ, float_typ);
-
-        // 2. Extend right operand: Float16 -> float
-        let right_ext = self.emit_float16_extend_call(right, float16_typ, float_typ);
-
-        // 3. Perform native float operation
-        let result_float = self.alloc_reg_pseudo();
-        let opcode = match op {
-            BinaryOp::Add => Opcode::FAdd,
-            BinaryOp::Sub => Opcode::FSub,
-            BinaryOp::Mul => Opcode::FMul,
-            BinaryOp::Div => Opcode::FDiv,
-            _ => unreachable!("emit_float16_arith_via_float called with non-arithmetic op"),
-        };
-        self.emit(Instruction::binop(
-            opcode,
-            result_float,
-            left_ext,
-            right_ext,
-            float_typ,
-            float_size,
-        ));
-
-        // 4. Truncate result: float -> Float16
-        self.emit_float16_truncate_call(result_float, float_typ, float16_typ)
-    }
-
-    /// Emit Float16 comparison using promote-compare pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend both operands from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE comparison (no truncation needed - result is int)
-    fn emit_float16_cmp_via_float(
-        &mut self,
-        op: BinaryOp,
-        left: PseudoId,
-        right: PseudoId,
-    ) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend left operand: Float16 -> float
-        let left_ext = self.emit_float16_extend_call(left, float16_typ, float_typ);
-
-        // 2. Extend right operand: Float16 -> float
-        let right_ext = self.emit_float16_extend_call(right, float16_typ, float_typ);
-
-        // 3. Perform native float comparison
-        let result = self.alloc_reg_pseudo();
-        let opcode = match op {
-            BinaryOp::Lt => Opcode::FCmpOLt,
-            BinaryOp::Le => Opcode::FCmpOLe,
-            BinaryOp::Gt => Opcode::FCmpOGt,
-            BinaryOp::Ge => Opcode::FCmpOGe,
-            BinaryOp::Eq => Opcode::FCmpOEq,
-            BinaryOp::Ne => Opcode::FCmpONe,
-            _ => unreachable!("emit_float16_cmp_via_float called with non-comparison op"),
-        };
-        self.emit(Instruction::binop(
-            opcode, result, left_ext, right_ext, float_typ, float_size,
-        ));
-
-        result
-    }
-
-    /// Emit Float16 negation using promote-negate-truncate pattern.
-    ///
-    /// For x86-64 without native FP16 instructions:
-    /// 1. Extend operand from Float16 to float using __extendhfsf2
-    /// 2. Perform native SSE negation
-    /// 3. Truncate result back to Float16 using __truncsfhf2
-    fn emit_float16_neg_via_float(&mut self, src: PseudoId) -> PseudoId {
-        let float16_typ = self.types.float16_id;
-        let float_typ = self.types.float_id;
-        let float_size = self.types.size_bits(float_typ);
-
-        // 1. Extend operand: Float16 -> float
-        let src_ext = self.emit_float16_extend_call(src, float16_typ, float_typ);
-
-        // 2. Perform native float negation
-        let result_float = self.alloc_reg_pseudo();
-        self.emit(Instruction::unop(
-            Opcode::FNeg,
-            result_float,
-            src_ext,
-            float_typ,
-            float_size,
-        ));
-
-        // 3. Truncate result: float -> Float16
-        self.emit_float16_truncate_call(result_float, float_typ, float16_typ)
-    }
-
-    /// Emit call to __extendhfsf2 to extend Float16 to float.
-    ///
-    /// The ABI for Float16 parameter depends on the runtime library:
-    /// - compiler-rt: Float16 passed as 16-bit integer in RDI
-    /// - libgcc: Float16 passed in XMM (SSE ABI)
-    fn emit_float16_extend_call(
-        &mut self,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-
-        // Query rtlib for Float16 ABI
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // Set up argument type based on rtlib ABI
-        let arg_type_for_abi = if f16_abi == Float16Abi::Integer {
-            self.types.ushort_id // compiler-rt: use integer type
-        } else {
-            src_type // libgcc: use Float16 type (SSE ABI)
-        };
-
-        let arg_vals = vec![src];
-        let arg_types = vec![arg_type_for_abi];
-
-        // Set up ABI classification based on rtlib
-        let param_classes = if f16_abi == Float16Abi::Integer {
-            // compiler-rt: argument is INTEGER (16-bit)
-            vec![ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }]
-        } else {
-            // libgcc: use standard SSE ABI
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            vec![abi.classify_param(arg_type_for_abi, self.types)]
-        };
-
-        // Return is always SSE (float)
-        let ret_class = ArgClass::Direct {
-            classes: vec![RegClass::Sse],
-            size_bits: 32,
-        };
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            "__extendhfsf2",
-            arg_vals,
-            arg_types,
-            dst_type,
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
-    /// Emit call to __truncsfhf2 to truncate float to Float16.
-    ///
-    /// The ABI for Float16 return depends on the runtime library:
-    /// - compiler-rt: Float16 returned as 16-bit integer in RAX
-    /// - libgcc: Float16 returned in XMM (SSE ABI)
-    fn emit_float16_truncate_call(
-        &mut self,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-
-        let arg_vals = vec![src];
-        let arg_types = vec![src_type];
-
-        // Query rtlib for Float16 ABI
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // Argument is always SSE (float)
-        let param_classes = vec![ArgClass::Direct {
-            classes: vec![RegClass::Sse],
-            size_bits: 32,
-        }];
-
-        // Return type depends on rtlib
-        let ret_class = if f16_abi == Float16Abi::Integer {
-            // compiler-rt: return is INTEGER (16-bit in RAX)
-            ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }
-        } else {
-            // libgcc: return is SSE (Float16 in XMM)
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            abi.classify_return(dst_type, self.types)
-        };
-
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            "__truncsfhf2",
-            arg_vals,
-            arg_types,
-            dst_type, // Keep Float16 type for proper subsequent handling
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
     /// Emit a call to a Float16 conversion rtlib function with correct ABI for x86-64.
     ///
     /// On x86-64 without native FP16, Float16 values are passed/returned as integers:
@@ -8717,40 +8448,11 @@ impl<'a> Linearizer<'a> {
                     target_typ
                 };
 
-                // Check for Float16 soft-float on x86-64
-                let needs_float16_softfloat = self.types.kind(target_typ) == TypeKind::Float16
-                    && RtlibNames::new(self.target).float16_needs_softfloat();
-
-                if needs_float16_softfloat {
-                    // Use promote-operate-truncate pattern for Float16
-                    match op {
-                        AssignOp::AddAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Add, lhs, rhs)
-                        }
-                        AssignOp::SubAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Sub, lhs, rhs)
-                        }
-                        AssignOp::MulAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Mul, lhs, rhs)
-                        }
-                        AssignOp::DivAssign => {
-                            self.emit_float16_arith_via_float(BinaryOp::Div, lhs, rhs)
-                        }
-                        _ => {
-                            let arith_size = self.types.size_bits(arith_type);
-                            self.emit(Instruction::binop(
-                                opcode, result, lhs, rhs, arith_type, arith_size,
-                            ));
-                            result
-                        }
-                    }
-                } else {
-                    let arith_size = self.types.size_bits(arith_type);
-                    self.emit(Instruction::binop(
-                        opcode, result, lhs, rhs, arith_type, arith_size,
-                    ));
-                    result
-                }
+                let arith_size = self.types.size_bits(arith_type);
+                self.emit(Instruction::binop(
+                    opcode, result, lhs, rhs, arith_type, arith_size,
+                ));
+                result
             }
         };
 
diff --git a/cc/rtlib.rs b/cc/rtlib.rs
index 692e40b9..6a673479 100644
--- a/cc/rtlib.rs
+++ b/cc/rtlib.rs
@@ -71,13 +71,6 @@ impl<'a> RtlibNames<'a> {
         self.target.arch == Arch::Aarch64 && self.target.os == Os::MacOS
     }
 
-    /// Returns true if Float16 operations need soft-float emulation.
-    /// On x86-64, there are no native FP16 arithmetic instructions.
-    /// On AArch64, native FP16 instructions exist (FADD, FSUB, etc.).
-    pub fn float16_needs_softfloat(&self) -> bool {
-        self.target.arch == Arch::X86_64
-    }
-
     /// Returns the ABI used by this rtlib for Float16 parameters/returns.
     ///
     /// This is an rtlib attribute - different runtime libraries have different

From 2fa9462d763ac4191e32a679dfa3e9aaaeb7a5a4 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:16:07 +0000
Subject: [PATCH 04/18] cc: move complex mul/div rtlib name selection to hwmap

complex_mul_name() and complex_div_name() centralize the
target-dependent function name selection (__mulsc3/__muldc3/__mulxc3/
__multc3 etc.) in hwmap.rs. The linearizer calls these instead of
RtlibNames methods.

Removes complex_mul(), complex_div(), longdouble_is_double() from
RtlibNames and their tests. Adds 6 unit tests in hwmap.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/ir/hwmap.rs     | 102 +++++++++++++++++++++++++++++++++++++++++++++
 cc/ir/linearize.rs |   8 +---
 cc/rtlib.rs        | 101 --------------------------------------------
 3 files changed, 104 insertions(+), 107 deletions(-)

diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
index 53254b21..b838996c 100644
--- a/cc/ir/hwmap.rs
+++ b/cc/ir/hwmap.rs
@@ -336,6 +336,50 @@ fn map_common(insn: &Instruction, types: &TypeTable, target: &Target) -> Option<
     map_longdouble_convert(insn, types, target)
 }
 
+// ============================================================================
+// Complex number rtlib name selection
+// ============================================================================
+
+/// Get the rtlib function name for complex multiplication.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_mul_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__mulsc3",
+        TypeKind::Double => "__muldc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__muldc3" // macOS aarch64: long double == double
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__mulxc3",
+                    Arch::Aarch64 => "__multc3",
+                }
+            }
+        }
+        _ => "__muldc3",
+    }
+}
+
+/// Get the rtlib function name for complex division.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_div_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__divsc3",
+        TypeKind::Double => "__divdc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__divdc3"
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__divxc3",
+                    Arch::Aarch64 => "__divtc3",
+                }
+            }
+        }
+        _ => "__divdc3",
+    }
+}
+
 /// Trait for target-specific hardware mapping decisions.
 pub trait TargetHwMap {
     /// Determine how the target handles a given instruction.
@@ -1744,4 +1788,62 @@ mod tests {
         let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 80, types.float_id, 32);
         assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
     }
+
+    // ========================================================================
+    // Phase 2e: Complex mul/div rtlib name tests
+    // ========================================================================
+
+    #[test]
+    fn test_complex_mul_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Float, &target), "__mulsc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_double() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Double, &target), "__muldc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::LongDouble, &x86), "__mulxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_linux),
+            "__multc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_macos),
+            "__muldc3"
+        );
+    }
+
+    #[test]
+    fn test_complex_div_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::Float, &target), "__divsc3");
+    }
+
+    #[test]
+    fn test_complex_div_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::LongDouble, &x86), "__divxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_linux),
+            "__divtc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_macos),
+            "__divdc3"
+        );
+    }
 }
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index 073972a1..663e6dbe 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -7913,10 +7913,8 @@ impl<'a> Linearizer<'a> {
             }
             BinaryOp::Mul => {
                 // Complex multiply via rtlib call (__mulsc3, __muldc3, etc.)
-                // Uses robust implementation that handles overflow correctly
-                let rtlib = RtlibNames::new(self.target);
                 let base_kind = self.types.kind(base_typ);
-                let func_name = rtlib.complex_mul(base_kind);
+                let func_name = crate::ir::hwmap::complex_mul_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
@@ -7939,10 +7937,8 @@ impl<'a> Linearizer<'a> {
             }
             BinaryOp::Div => {
                 // Complex divide via rtlib call (__divsc3, __divdc3, etc.)
-                // Uses Smith's method for robust overflow handling
-                let rtlib = RtlibNames::new(self.target);
                 let base_kind = self.types.kind(base_typ);
-                let func_name = rtlib.complex_div(base_kind);
+                let func_name = crate::ir::hwmap::complex_div_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
diff --git a/cc/rtlib.rs b/cc/rtlib.rs
index 6a673479..a154b783 100644
--- a/cc/rtlib.rs
+++ b/cc/rtlib.rs
@@ -27,7 +27,6 @@
 //
 
 use crate::target::{Arch, Os, Target};
-use crate::types::TypeKind;
 
 /// ABI used for Float16 parameters/returns in rtlib functions.
 ///
@@ -65,12 +64,6 @@ impl<'a> RtlibNames<'a> {
         Self { target }
     }
 
-    /// Returns true if long double is the same as double on this platform.
-    /// On macOS aarch64 (Apple Silicon), long double is 64-bit (same as double).
-    pub fn longdouble_is_double(&self) -> bool {
-        self.target.arch == Arch::Aarch64 && self.target.os == Os::MacOS
-    }
-
     /// Returns the ABI used by this rtlib for Float16 parameters/returns.
     ///
     /// This is an rtlib attribute - different runtime libraries have different
@@ -138,106 +131,12 @@ impl<'a> RtlibNames<'a> {
             _ => None,
         }
     }
-
-    // ========================================================================
-    // Complex operations
-    // ========================================================================
-
-    /// Get function name for complex multiplication
-    ///
-    /// Complex multiply: result = __mulXc3(a_real, a_imag, b_real, b_imag)
-    pub fn complex_mul(&self, base_kind: TypeKind) -> &'static str {
-        match base_kind {
-            TypeKind::Float => "__mulsc3",
-            TypeKind::Double => "__muldc3",
-            TypeKind::LongDouble => {
-                if self.longdouble_is_double() {
-                    "__muldc3" // macOS aarch64: long double == double
-                } else {
-                    match self.target.arch {
-                        Arch::X86_64 => "__mulxc3",  // x87 80-bit
-                        Arch::Aarch64 => "__multc3", // IEEE quad 128-bit
-                    }
-                }
-            }
-            _ => "__muldc3", // fallback
-        }
-    }
-
-    /// Get function name for complex division
-    ///
-    /// Complex divide: result = __divXc3(a_real, a_imag, b_real, b_imag)
-    /// Uses Smith's method for robust overflow handling.
-    pub fn complex_div(&self, base_kind: TypeKind) -> &'static str {
-        match base_kind {
-            TypeKind::Float => "__divsc3",
-            TypeKind::Double => "__divdc3",
-            TypeKind::LongDouble => {
-                if self.longdouble_is_double() {
-                    "__divdc3"
-                } else {
-                    match self.target.arch {
-                        Arch::X86_64 => "__divxc3",
-                        Arch::Aarch64 => "__divtc3",
-                    }
-                }
-            }
-            _ => "__divdc3", // fallback
-        }
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    #[test]
-    fn test_complex_mul_float() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::Float), "__mulsc3");
-    }
-
-    #[test]
-    fn test_complex_mul_double() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::Double), "__muldc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__mulxc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__multc3");
-    }
-
-    #[test]
-    fn test_complex_mul_longdouble_aarch64_macos() {
-        // On macOS aarch64, long double == double
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let rtlib = RtlibNames::new(&target);
-        assert_eq!(rtlib.complex_mul(TypeKind::LongDouble), "__muldc3");
-    }
-
-    #[test]
-    fn test_longdouble_is_double() {
-        let macos_arm = Target::new(Arch::Aarch64, Os::MacOS);
-        let linux_arm = Target::new(Arch::Aarch64, Os::Linux);
-        let linux_x86 = Target::new(Arch::X86_64, Os::Linux);
-
-        assert!(RtlibNames::new(&macos_arm).longdouble_is_double());
-        assert!(!RtlibNames::new(&linux_arm).longdouble_is_double());
-        assert!(!RtlibNames::new(&linux_x86).longdouble_is_double());
-    }
-
     // ========================================================================
     // Float16 (_Float16) rtlib tests
     // ========================================================================

From 46b3b263932a03472b2d9173b0700ec78c7255c9 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:40:25 +0000
Subject: [PATCH 05/18] cc: add 8 int128 decomposition opcodes + hwmap
 expansions

New opcodes: Lo64, Hi64, Pair64, AddC, AdcC, SubC, SbcC, UMulHi.
These enable shared int128 decomposition in hwmap.rs instead of
duplicated per-backend code.

hwmap now expands int128 operations into 64-bit sequences:
  - Bitwise (And/Or/Xor): Lo64+Hi64 each operand, 64-bit op, Pair64
  - Neg: SubC(0,lo), SbcC(0,hi,carry), Pair64
  - Not: Lo64+Hi64, Not each, Pair64
  - Add: AddC(lo,lo), AdcC(hi,hi,carry), Pair64
  - Sub: SubC(lo,lo), SbcC(hi,hi,borrow), Pair64
  - Mul: cross-product via Mul+UMulHi+Add
  - Eq/Ne: xor+or reduction, then 64-bit compare
  - Ordered comparisons: hi compare + Select(hi_eq, lo_cmp, hi_cmp)
  - Zext/Sext: Pair64 with zero/sign-extended halves

Backend support for all 8 opcodes on x86_64 and aarch64.
Fix x86_64 int128_pseudos to not mark Lo64/Hi64/Pair64 operands
incorrectly (caused Sext misrouting to emit_int128_extend).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/aarch64/codegen.rs    |  10 +
 cc/arch/aarch64/expression.rs | 156 ++++++++++
 cc/arch/x86_64/codegen.rs     |  10 +
 cc/arch/x86_64/expression.rs  | 191 ++++++++++++
 cc/arch/x86_64/regalloc.rs    |  56 ++--
 cc/ir/hwmap.rs                | 538 +++++++++++++++++++++++++++++++++-
 cc/ir/mod.rs                  |  18 ++
 7 files changed, 956 insertions(+), 23 deletions(-)

diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs
index 744f2454..bffd6807 100644
--- a/cc/arch/aarch64/codegen.rs
+++ b/cc/arch/aarch64/codegen.rs
@@ -1745,6 +1745,16 @@ impl Aarch64CodeGen {
                 self.emit_fence(insn);
             }
 
+            // Int128 decomposition ops (from hwmap expansion)
+            Opcode::Lo64 => self.emit_lo64(insn),
+            Opcode::Hi64 => self.emit_hi64(insn),
+            Opcode::Pair64 => self.emit_pair64(insn),
+            Opcode::AddC => self.emit_addc(insn, false),
+            Opcode::AdcC => self.emit_addc(insn, true),
+            Opcode::SubC => self.emit_subc(insn, false),
+            Opcode::SbcC => self.emit_subc(insn, true),
+            Opcode::UMulHi => self.emit_umulhi(insn),
+
             // Skip no-ops and unimplemented
             _ => {}
         }
diff --git a/cc/arch/aarch64/expression.rs b/cc/arch/aarch64/expression.rs
index a39dbfb1..00213d72 100644
--- a/cc/arch/aarch64/expression.rs
+++ b/cc/arch/aarch64/expression.rs
@@ -1356,4 +1356,160 @@ impl Aarch64CodeGen {
             _ => {}
         }
     }
+
+    // ========================================================================
+    // Int128 decomposition ops (Lo64, Hi64, Pair64)
+    // ========================================================================
+
+    /// Lo64: extract low 64 bits from 128-bit pseudo.
+    pub(super) fn emit_lo64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Lo64 must have target");
+        // Load both halves, use lo
+        self.load_int128(src, Reg::X9, Reg::X10);
+        let dst_loc = self.get_location(target);
+        match dst_loc {
+            Loc::Reg(r) => {
+                if r != Reg::X9 {
+                    self.push_lir(Aarch64Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(Reg::X9),
+                        dst: r,
+                    });
+                }
+            }
+            _ => self.emit_move_to_loc(Reg::X9, &dst_loc, 64),
+        }
+    }
+
+    /// Hi64: extract high 64 bits from 128-bit pseudo.
+    pub(super) fn emit_hi64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Hi64 must have target");
+        // Load both halves, use hi
+        self.load_int128(src, Reg::X9, Reg::X10);
+        let dst_loc = self.get_location(target);
+        match dst_loc {
+            Loc::Reg(r) => {
+                if r != Reg::X10 {
+                    self.push_lir(Aarch64Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(Reg::X10),
+                        dst: r,
+                    });
+                }
+            }
+            _ => self.emit_move_to_loc(Reg::X10, &dst_loc, 64),
+        }
+    }
+
+    /// Pair64: combine two 64-bit pseudos into 128-bit.
+    pub(super) fn emit_pair64(&mut self, insn: &Instruction) {
+        let src_lo = insn.src[0];
+        let src_hi = insn.src[1];
+        let target = insn.target.expect("Pair64 must have target");
+
+        self.emit_move(src_lo, Reg::X9, 64);
+        self.emit_move(src_hi, Reg::X10, 64);
+        self.store_int128(Reg::X9, Reg::X10, target);
+    }
+
+    /// AddC/AdcC: 64-bit add with carry.
+    /// AddC (with_carry=false): adds (sets flags)
+    /// AdcC (with_carry=true): adc (add with carry in)
+    pub(super) fn emit_addc(&mut self, insn: &Instruction, with_carry: bool) {
+        let target = insn.target.expect("AddC/AdcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
+        };
+
+        self.emit_move(src1, dst_reg, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        if with_carry {
+            self.push_lir(Aarch64Inst::Adc {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: Reg::X10,
+                dst: dst_reg,
+            });
+        } else {
+            self.push_lir(Aarch64Inst::Adds {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: GpOperand::Reg(Reg::X10),
+                dst: dst_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// SubC/SbcC: 64-bit sub with borrow.
+    /// SubC (with_borrow=false): subs (sets flags)
+    /// SbcC (with_borrow=true): sbc (sub with borrow in)
+    pub(super) fn emit_subc(&mut self, insn: &Instruction, with_borrow: bool) {
+        let target = insn.target.expect("SubC/SbcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
+        };
+
+        self.emit_move(src1, dst_reg, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        if with_borrow {
+            self.push_lir(Aarch64Inst::Sbc {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: Reg::X10,
+                dst: dst_reg,
+            });
+        } else {
+            self.push_lir(Aarch64Inst::Subs {
+                size: OperandSize::B64,
+                src1: dst_reg,
+                src2: GpOperand::Reg(Reg::X10),
+                dst: dst_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// UMulHi: upper 64 bits of 64×64 unsigned multiply.
+    pub(super) fn emit_umulhi(&mut self, insn: &Instruction) {
+        let target = insn.target.expect("UMulHi must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::X16,
+        };
+
+        self.emit_move(src1, Reg::X9, 64);
+        self.emit_move(src2, Reg::X10, 64);
+
+        self.push_lir(Aarch64Inst::Umulh {
+            src1: Reg::X9,
+            src2: Reg::X10,
+            dst: dst_reg,
+        });
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
 }
diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs
index 349b1bc7..efb3e7b4 100644
--- a/cc/arch/x86_64/codegen.rs
+++ b/cc/arch/x86_64/codegen.rs
@@ -1717,6 +1717,16 @@ impl X86_64CodeGen {
                 self.emit_fence(insn);
             }
 
+            // Int128 decomposition ops (from hwmap expansion)
+            Opcode::Lo64 => self.emit_lo64(insn),
+            Opcode::Hi64 => self.emit_hi64(insn),
+            Opcode::Pair64 => self.emit_pair64(insn),
+            Opcode::AddC => self.emit_addc(insn, false),
+            Opcode::AdcC => self.emit_addc(insn, true),
+            Opcode::SubC => self.emit_subc(insn, false),
+            Opcode::SbcC => self.emit_subc(insn, true),
+            Opcode::UMulHi => self.emit_umulhi(insn),
+
             // Skip no-ops and unimplemented
             _ => {}
         }
diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index b0621c62..0a005d98 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -606,6 +606,16 @@ impl X86_64CodeGen {
                     dst: GpOperand::Reg(dst),
                 });
             }
+            Loc::Reg(r) => {
+                // Register: treat as containing the low 64 bits
+                if *r != dst {
+                    self.push_lir(X86Inst::Mov {
+                        size: OperandSize::B64,
+                        src: GpOperand::Reg(*r),
+                        dst: GpOperand::Reg(dst),
+                    });
+                }
+            }
             _ => panic!("int128_load_lo: unexpected loc {:?}", loc),
         }
     }
@@ -634,6 +644,14 @@ impl X86_64CodeGen {
                     dst: GpOperand::Reg(dst),
                 });
             }
+            Loc::Reg(_) => {
+                // Register holds a scalar; hi half is 0
+                self.push_lir(X86Inst::Mov {
+                    size: OperandSize::B64,
+                    src: GpOperand::Imm(0),
+                    dst: GpOperand::Reg(dst),
+                });
+            }
             _ => panic!("int128_load_hi: unexpected loc {:?}", loc),
         }
     }
@@ -1690,4 +1708,177 @@ impl X86_64CodeGen {
             }
         }
     }
+
+    // ========================================================================
+    // Int128 decomposition ops (Lo64, Hi64, Pair64)
+    // ========================================================================
+
+    /// Lo64: extract low 64 bits from 128-bit pseudo.
+    /// target(64) = lo64(src(128))
+    pub(super) fn emit_lo64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Lo64 must have target");
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
+        self.int128_load_lo(src, dst_reg);
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// Hi64: extract high 64 bits from 128-bit pseudo.
+    /// target(64) = hi64(src(128))
+    pub(super) fn emit_hi64(&mut self, insn: &Instruction) {
+        let src = insn.src[0];
+        let target = insn.target.expect("Hi64 must have target");
+        let dst_loc = self.get_location(target);
+        let dst_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
+        self.int128_load_hi(src, dst_reg);
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
+        }
+    }
+
+    /// Pair64: combine two 64-bit pseudos into 128-bit.
+    /// target(128) = pair64(lo(64), hi(64))
+    pub(super) fn emit_pair64(&mut self, insn: &Instruction) {
+        let src_lo = insn.src[0];
+        let src_hi = insn.src[1];
+        let target = insn.target.expect("Pair64 must have target");
+        let dst_loc = self.get_location(target);
+
+        // Store lo half
+        self.emit_move(src_lo, Reg::R10, 64);
+        self.int128_store_lo(Reg::R10, &dst_loc);
+
+        // Store hi half
+        self.emit_move(src_hi, Reg::R10, 64);
+        self.int128_store_hi(Reg::R10, &dst_loc);
+    }
+
+    /// AddC/AdcC: 64-bit add with carry.
+    /// AddC (with_carry=false): add, sets CF
+    /// AdcC (with_carry=true): adc (add with carry in), sets CF
+    pub(super) fn emit_addc(&mut self, insn: &Instruction, with_carry: bool) {
+        let target = insn.target.expect("AddC/AdcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let work_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
+
+        self.emit_move(src1, work_reg, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            Loc::Imm(v) if *v >= i32::MIN as i128 && *v <= i32::MAX as i128 => {
+                GpOperand::Imm(*v as i64)
+            }
+            _ => {
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
+            }
+        };
+
+        if with_carry {
+            self.push_lir(X86Inst::Adc {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        } else {
+            self.push_lir(X86Inst::Add {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) {
+            self.emit_move_to_loc(work_reg, &dst_loc, 64);
+        }
+    }
+
+    /// SubC/SbcC: 64-bit sub with borrow.
+    /// SubC (with_borrow=false): sub, sets CF
+    /// SbcC (with_borrow=true): sbb (sub with borrow in), sets CF
+    pub(super) fn emit_subc(&mut self, insn: &Instruction, with_borrow: bool) {
+        let target = insn.target.expect("SubC/SbcC must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+        let work_reg = match &dst_loc {
+            Loc::Reg(r) => *r,
+            _ => Reg::R10,
+        };
+
+        self.emit_move(src1, work_reg, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            Loc::Imm(v) if *v >= i32::MIN as i128 && *v <= i32::MAX as i128 => {
+                GpOperand::Imm(*v as i64)
+            }
+            _ => {
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
+            }
+        };
+
+        if with_borrow {
+            self.push_lir(X86Inst::Sbb {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        } else {
+            self.push_lir(X86Inst::Sub {
+                size: OperandSize::B64,
+                src: src2_op,
+                dst: work_reg,
+            });
+        }
+
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) {
+            self.emit_move_to_loc(work_reg, &dst_loc, 64);
+        }
+    }
+
+    /// UMulHi: upper 64 bits of 64×64 unsigned multiply.
+    /// Uses mul instruction which puts result in RDX:RAX.
+    pub(super) fn emit_umulhi(&mut self, insn: &Instruction) {
+        let target = insn.target.expect("UMulHi must have target");
+        let src1 = insn.src[0];
+        let src2 = insn.src[1];
+        let dst_loc = self.get_location(target);
+
+        // mul uses RAX as implicit first operand, result in RDX:RAX
+        self.emit_move(src1, Reg::Rax, 64);
+        let src2_loc = self.get_location(src2);
+        let src2_op = match &src2_loc {
+            Loc::Reg(r) => GpOperand::Reg(*r),
+            _ => {
+                self.emit_move(src2, Reg::R11, 64);
+                GpOperand::Reg(Reg::R11)
+            }
+        };
+
+        self.push_lir(X86Inst::Mul1 {
+            size: OperandSize::B64,
+            src: src2_op,
+        });
+
+        // High result is in RDX
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == Reg::Rdx) {
+            self.emit_move_to_loc(Reg::Rdx, &dst_loc, 64);
+        }
+    }
 }
diff --git a/cc/arch/x86_64/regalloc.rs b/cc/arch/x86_64/regalloc.rs
index 966371c6..e2437758 100644
--- a/cc/arch/x86_64/regalloc.rs
+++ b/cc/arch/x86_64/regalloc.rs
@@ -598,28 +598,44 @@ impl RegAlloc {
                             | Opcode::SetAe
                     );
 
-                    // For Load: target is int128, but src[0] is the address (64-bit pointer).
-                    // For Store: src[0] is address (64-bit), src[1] is the int128 value.
-                    // For comparisons: target is a small integer result.
-                    if !is_comparison && !matches!(insn.op, Opcode::Load) {
-                        if let Some(target) = insn.target {
-                            self.int128_pseudos.insert(target);
-                        }
-                    }
-                    if matches!(insn.op, Opcode::Load) {
-                        // Load: only target is int128, not the address src[0]
-                        if let Some(target) = insn.target {
-                            self.int128_pseudos.insert(target);
+                    // Lo64/Hi64: target is 64-bit (not int128), source is int128
+                    // Pair64: target is int128, sources are 64-bit (not int128)
+                    // AddC/AdcC/SubC/SbcC/UMulHi: 64-bit ops, not int128
+                    match insn.op {
+                        Opcode::Lo64 | Opcode::Hi64 => {
+                            // Source is int128, target is 64-bit
+                            for &src in &insn.src {
+                                self.int128_pseudos.insert(src);
+                            }
                         }
-                    } else if matches!(insn.op, Opcode::Store) {
-                        // Store: src[0] is address (skip), src[1] is the int128 value
-                        if let Some(&val) = insn.src.get(1) {
-                            self.int128_pseudos.insert(val);
+                        Opcode::Pair64 => {
+                            // Target is int128, sources are 64-bit
+                            if let Some(target) = insn.target {
+                                self.int128_pseudos.insert(target);
+                            }
                         }
-                    } else {
-                        // Other ops: all sources are int128
-                        for &src in &insn.src {
-                            self.int128_pseudos.insert(src);
+                        _ => {
+                            // For Load: target is int128, but src[0] is the address (64-bit pointer).
+                            // For Store: src[0] is address (64-bit), src[1] is the int128 value.
+                            // For comparisons: target is a small integer result.
+                            if !is_comparison && !matches!(insn.op, Opcode::Load) {
+                                if let Some(target) = insn.target {
+                                    self.int128_pseudos.insert(target);
+                                }
+                            }
+                            if matches!(insn.op, Opcode::Load) {
+                                if let Some(target) = insn.target {
+                                    self.int128_pseudos.insert(target);
+                                }
+                            } else if matches!(insn.op, Opcode::Store) {
+                                if let Some(&val) = insn.src.get(1) {
+                                    self.int128_pseudos.insert(val);
+                                }
+                            } else {
+                                for &src in &insn.src {
+                                    self.int128_pseudos.insert(src);
+                                }
+                            }
                         }
                     }
                 }
diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
index b838996c..de004b92 100644
--- a/cc/ir/hwmap.rs
+++ b/cc/ir/hwmap.rs
@@ -316,11 +316,59 @@ fn map_longdouble_convert(
     }
 }
 
+/// Classify an int128 operation that needs inline expansion.
+fn map_int128_expand(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
+    if insn.size != 128 {
+        return None;
+    }
+
+    match insn.op {
+        // Arithmetic/bitwise/unary: result type is int128
+        Opcode::And
+        | Opcode::Or
+        | Opcode::Xor
+        | Opcode::Neg
+        | Opcode::Not
+        | Opcode::Add
+        | Opcode::Sub
+        | Opcode::Mul => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(HwMapAction::Expand)
+        }
+        // Zext/Sext to 128: result type is int128
+        Opcode::Zext | Opcode::Sext => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(HwMapAction::Expand)
+        }
+        // Comparisons: size==128 is the operand size, result is int/bool
+        Opcode::SetEq
+        | Opcode::SetNe
+        | Opcode::SetLt
+        | Opcode::SetLe
+        | Opcode::SetGt
+        | Opcode::SetGe
+        | Opcode::SetB
+        | Opcode::SetBe
+        | Opcode::SetA
+        | Opcode::SetAe => Some(HwMapAction::Expand),
+        _ => None,
+    }
+}
+
 /// Common hardware mapping logic shared by all targets.
 fn map_common(insn: &Instruction, types: &TypeTable, target: &Target) -> Option<HwMapAction> {
     if let Some(action) = map_int128_divmod(insn, types) {
         return Some(action);
     }
+    if let Some(action) = map_int128_expand(insn, types) {
+        return Some(action);
+    }
     if let Some(action) = map_int128_float_convert(insn, types, target) {
         return Some(action);
     }
@@ -698,6 +746,490 @@ fn build_f16_truncate_call(
     call_insn
 }
 
+/// Dispatch Expand actions to the appropriate expansion function.
+fn expand_insn(
+    insn: &Instruction,
+    func: &mut Function,
+    new_insns: &mut Vec<Instruction>,
+    types: &TypeTable,
+    target: &Target,
+) {
+    if insn.size == 128 {
+        if let Some(typ) = insn.typ {
+            if types.kind(typ) == TypeKind::Int128 {
+                expand_int128(insn, func, new_insns, types);
+                return;
+            }
+        }
+    }
+    expand_float16(insn, func, new_insns, types, target);
+}
+
+/// Allocate a new 64-bit register pseudo.
+fn alloc_reg64(func: &mut Function) -> PseudoId {
+    let id = func.alloc_pseudo();
+    func.add_pseudo(Pseudo::reg(id, id.0));
+    id
+}
+
+/// Expand an int128 instruction into 64-bit operations using Lo64/Hi64/Pair64.
+fn expand_int128(
+    insn: &Instruction,
+    func: &mut Function,
+    new_insns: &mut Vec<Instruction>,
+    types: &TypeTable,
+) {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+
+    match insn.op {
+        // Bitwise: Lo64+Hi64 both operands, 64-bit op on each half, Pair64
+        Opcode::And | Opcode::Or | Opcode::Xor => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            // 64-bit op on lo halves
+            let r_lo = alloc_reg64(func);
+            new_insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
+
+            // 64-bit op on hi halves
+            let r_hi = alloc_reg64(func);
+            new_insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
+
+            // Combine into 128-bit result
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                r_lo,
+                r_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Not: Lo64+Hi64, Not each, Pair64
+        Opcode::Not => {
+            let src = insn.src[0];
+            let s_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, s_lo, src, long_type, 64));
+            let s_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, s_hi, src, long_type, 64));
+
+            let r_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
+            let r_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Not, r_hi, s_hi, long_type, 64));
+
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                r_lo,
+                r_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Neg: SubC(0, lo), SbcC(0, hi, carry), Pair64
+        Opcode::Neg => {
+            let src = insn.src[0];
+            let s_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, s_lo, src, long_type, 64));
+            let s_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, s_hi, src, long_type, 64));
+
+            let zero = func.create_const_pseudo(0);
+
+            let r_lo = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
+            new_insns.push(Instruction::binop(
+                Opcode::SubC,
+                r_lo,
+                zero,
+                s_lo,
+                long_type,
+                64,
+            ));
+
+            let r_hi = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
+            let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
+            sbc.src.push(r_lo); // src[2] = borrow producer
+            new_insns.push(sbc);
+
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                r_lo,
+                r_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Add: AddC(lo,lo), AdcC(hi,hi,carry), Pair64
+        Opcode::Add => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            let r_lo = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
+            new_insns.push(Instruction::binop(
+                Opcode::AddC,
+                r_lo,
+                a_lo,
+                b_lo,
+                long_type,
+                64,
+            ));
+
+            let r_hi = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
+            let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
+            adc.src.push(r_lo); // src[2] = carry producer
+            new_insns.push(adc);
+
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                r_lo,
+                r_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Sub: SubC(lo,lo), SbcC(hi,hi,borrow), Pair64
+        Opcode::Sub => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            let r_lo = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
+            new_insns.push(Instruction::binop(
+                Opcode::SubC,
+                r_lo,
+                a_lo,
+                b_lo,
+                long_type,
+                64,
+            ));
+
+            let r_hi = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
+            let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
+            sbc.src.push(r_lo); // src[2] = borrow producer
+            new_insns.push(sbc);
+
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                r_lo,
+                r_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Mul: a*b = (a_lo*b_lo) + ((a_lo*b_hi + a_hi*b_lo) << 64)
+        Opcode::Mul => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            // low_result = a_lo * b_lo (lower 64 bits)
+            let low_result = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Mul,
+                low_result,
+                a_lo,
+                b_lo,
+                long_type,
+                64,
+            ));
+
+            // high_part = umulhi(a_lo, b_lo) (upper 64 bits of full 128-bit product)
+            let high_part = alloc_reg64(func);
+            func.add_pseudo(Pseudo::reg(high_part, high_part.0));
+            new_insns.push(Instruction::binop(
+                Opcode::UMulHi,
+                high_part,
+                a_lo,
+                b_lo,
+                long_type,
+                64,
+            ));
+
+            // cross1 = a_lo * b_hi
+            let cross1 = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Mul,
+                cross1,
+                a_lo,
+                b_hi,
+                long_type,
+                64,
+            ));
+
+            // cross2 = a_hi * b_lo
+            let cross2 = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Mul,
+                cross2,
+                a_hi,
+                b_lo,
+                long_type,
+                64,
+            ));
+
+            // final_hi = high_part + cross1 + cross2
+            let sum1 = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Add,
+                sum1,
+                high_part,
+                cross1,
+                long_type,
+                64,
+            ));
+            let final_hi = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Add,
+                final_hi,
+                sum1,
+                cross2,
+                long_type,
+                64,
+            ));
+
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                low_result,
+                final_hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Eq/Ne: xor lo halves, xor hi halves, or results, compare to 0
+        Opcode::SetEq | Opcode::SetNe => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            let xor_lo = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Xor,
+                xor_lo,
+                a_lo,
+                b_lo,
+                long_type,
+                64,
+            ));
+            let xor_hi = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Xor,
+                xor_hi,
+                a_hi,
+                b_hi,
+                long_type,
+                64,
+            ));
+            let or_result = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Or,
+                or_result,
+                xor_lo,
+                xor_hi,
+                long_type,
+                64,
+            ));
+
+            let zero = func.create_const_pseudo(0);
+            // Final comparison is 64-bit (comparing reduced or-result against 0)
+            new_insns.push(Instruction::binop(
+                insn.op, result, or_result, zero, long_type, 64,
+            ));
+        }
+
+        // Ordered comparisons: compare hi halves, if equal compare lo halves (unsigned)
+        Opcode::SetLt
+        | Opcode::SetLe
+        | Opcode::SetGt
+        | Opcode::SetGe
+        | Opcode::SetB
+        | Opcode::SetBe
+        | Opcode::SetA
+        | Opcode::SetAe => {
+            let src1 = insn.src[0];
+            let src2 = insn.src[1];
+
+            let a_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
+            let a_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
+            let b_lo = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let b_hi = alloc_reg64(func);
+            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+
+            // All decomposed comparisons are 64-bit
+            // hi_eq = (a_hi == b_hi)
+            let hi_eq = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::SetEq,
+                hi_eq,
+                a_hi,
+                b_hi,
+                long_type,
+                64,
+            ));
+
+            // hi_cmp = signed/unsigned comparison on hi halves (original op)
+            let hi_cmp = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
+            ));
+
+            // lo_cmp = UNSIGNED comparison on lo halves
+            let lo_op = match insn.op {
+                Opcode::SetLt | Opcode::SetB => Opcode::SetB,
+                Opcode::SetLe | Opcode::SetBe => Opcode::SetBe,
+                Opcode::SetGt | Opcode::SetA => Opcode::SetA,
+                Opcode::SetGe | Opcode::SetAe => Opcode::SetAe,
+                _ => unreachable!(),
+            };
+            let lo_cmp = alloc_reg64(func);
+            new_insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
+
+            // result = hi_eq ? lo_cmp : hi_cmp
+            new_insns.push(Instruction::select(
+                result, hi_eq, lo_cmp, hi_cmp, long_type, 64,
+            ));
+        }
+
+        // Zext to 128: zero-extend src to 64-bit, Pair64(lo, 0)
+        Opcode::Zext => {
+            let src = insn.src[0];
+            let src_size = insn.src_size;
+
+            // Zero-extend src to 64-bit if needed
+            let lo = if src_size < 64 {
+                let ext = alloc_reg64(func);
+                let mut zext_insn = Instruction::unop(Opcode::Zext, ext, src, long_type, 64);
+                zext_insn.src_size = src_size;
+                new_insns.push(zext_insn);
+                ext
+            } else {
+                src
+            };
+
+            let zero = func.create_const_pseudo(0);
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                lo,
+                zero,
+                int128_type,
+                128,
+            ));
+        }
+
+        // Sext to 128: first sext src to 64-bit, then hi = Asr(lo, 63)
+        Opcode::Sext => {
+            let src = insn.src[0];
+            let src_size = insn.src_size;
+
+            // Sign-extend src to 64-bit if needed
+            let lo = if src_size < 64 {
+                let ext = alloc_reg64(func);
+                let mut sext_insn = Instruction::unop(Opcode::Sext, ext, src, long_type, 64);
+                sext_insn.src_size = src_size;
+                new_insns.push(sext_insn);
+                ext
+            } else {
+                src
+            };
+
+            let shift_amount = func.create_const_pseudo(63);
+            let hi = alloc_reg64(func);
+            new_insns.push(Instruction::binop(
+                Opcode::Asr,
+                hi,
+                lo,
+                shift_amount,
+                long_type,
+                64,
+            ));
+            let int128_type = insn.typ.unwrap();
+            new_insns.push(Instruction::binop(
+                Opcode::Pair64,
+                result,
+                lo,
+                hi,
+                int128_type,
+                128,
+            ));
+        }
+
+        _ => panic!("expand_int128: unexpected opcode {}", insn.op),
+    }
+}
+
 /// Expand a Float16 arithmetic/neg/cmp instruction using promote-operate-truncate.
 fn expand_float16(
     insn: &Instruction,
@@ -889,7 +1421,7 @@ pub fn hwmap_function(func: &mut Function, types: &TypeTable, target: &Target) {
                     block_changed = true;
                 }
                 HwMapAction::Expand => {
-                    expand_float16(insn, func, &mut new_insns, types, target);
+                    expand_insn(insn, func, &mut new_insns, types, target);
                     block_changed = true;
                 }
             }
@@ -1333,7 +1865,7 @@ mod tests {
     }
 
     #[test]
-    fn test_int128_add_stays_legal() {
+    fn test_int128_add_expands() {
         let target = Target::new(Arch::X86_64, Os::Linux);
         let types = TypeTable::new(&target);
         let hwmap = X86_64HwMap {
@@ -1348,7 +1880,7 @@ mod tests {
             types.int128_id,
             128,
         );
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
+        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Expand);
     }
 
     #[test]
diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 49e75d44..8dc50d65 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -232,6 +232,16 @@ pub enum Opcode {
     AtomicFetchOr,  // Atomic fetch-and-or
     AtomicFetchXor, // Atomic fetch-and-xor
     Fence,          // Memory fence
+
+    // Int128 decomposition ops (used by hwmap expansion)
+    Lo64,   // Extract low 64 bits from 128-bit pseudo
+    Hi64,   // Extract high 64 bits from 128-bit pseudo
+    Pair64, // Combine two 64-bit pseudos into 128-bit: target = (src[0]=lo, src[1]=hi)
+    AddC,   // 64-bit add with carry output: target = src[0] + src[1], sets carry
+    AdcC, // 64-bit add with carry in+out: target = src[0] + src[1] + carry; src[2] = carry producer
+    SubC, // 64-bit sub with borrow output: target = src[0] - src[1], sets borrow
+    SbcC, // 64-bit sub with borrow in+out: target = src[0] - src[1] - borrow; src[2] = borrow producer
+    UMulHi, // Upper 64 bits of unsigned 64×64 multiply: target = (src[0] * src[1]) >> 64
 }
 
 impl Opcode {
@@ -384,6 +394,14 @@ impl Opcode {
             Opcode::AtomicFetchOr => "atomic_fetch_or",
             Opcode::AtomicFetchXor => "atomic_fetch_xor",
             Opcode::Fence => "fence",
+            Opcode::Lo64 => "lo64",
+            Opcode::Hi64 => "hi64",
+            Opcode::Pair64 => "pair64",
+            Opcode::AddC => "addc",
+            Opcode::AdcC => "adcc",
+            Opcode::SubC => "subc",
+            Opcode::SbcC => "sbcc",
+            Opcode::UMulHi => "umulhi",
         }
     }
 }

From a7ae7f9d151ed20919032aabd7bb1c7caba2275e Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:41:53 +0000
Subject: [PATCH 06/18] cc: show src_size in IR Display for conversion ops

Conversion opcodes (Sext, Zext, Trunc, FCvtS, FCvtU, SCvtF, UCvtF,
FCvtF) now display as e.g. sext.32to64 instead of sext.64, making
the source width visible in --dump-ir output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/ir/mod.rs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 8dc50d65..3bf63713 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -1043,8 +1043,23 @@ impl fmt::Display for Instruction {
 
         write!(f, "{}", self.op.name())?;
 
-        // Size suffix
-        if self.size > 0 {
+        // Size suffix (for conversions, show src_size→size)
+        if self.src_size > 0
+            && self.src_size != self.size
+            && matches!(
+                self.op,
+                Opcode::Sext
+                    | Opcode::Zext
+                    | Opcode::Trunc
+                    | Opcode::FCvtS
+                    | Opcode::FCvtU
+                    | Opcode::SCvtF
+                    | Opcode::UCvtF
+                    | Opcode::FCvtF
+            )
+        {
+            write!(f, ".{}to{}", self.src_size, self.size)?;
+        } else if self.size > 0 {
             write!(f, ".{}", self.size)?;
         }
 

From a921630455343ec8f693c5bd6296d3c891e29c0d Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 22 Mar 2026 23:53:18 +0000
Subject: [PATCH 07/18] cc: remove ~1200 lines of duplicated int128 backend
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that hwmap expands int128 add/sub/mul/bitwise/neg/not/comparisons/
zext/sext into 64-bit sequences using Lo64/Hi64/Pair64/AddC/etc.,
the per-backend implementations are dead code.

x86_64: removed emit_int128_mul (~120 lines), emit_int128_div (~50),
  emit_int128_compare (~145), emit_int128_unary (~55), Zext/Sext-to-128
  in emit_int128_extend (~75), Add/Sub/And/Or/Xor in emit_int128_binop
  (~100), int128_src2_lo/hi_operand helpers (~50). Total: ~645 lines.

aarch64: removed Add/Sub/And/Or/Xor/Mul from emit_int128_binop (~140),
  emit_int128_div (~50), emit_int128_compare (~120), emit_int128_unary
  (~50), Zext/Sext-to-128 in emit_extend (~75). Removed dead LIR
  variants MAdd/Negs/Ngc (~60). Total: ~550 lines.

Only int128 shifts (Shl/Lsr/Asr) remain in the backends — these
require arch-specific branching (SHLD/SHRD vs LSL+LSR+ORR).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/aarch64/expression.rs | 564 +++--------------------------
 cc/arch/aarch64/lir.rs        |  61 ----
 cc/arch/x86_64/expression.rs  | 645 ++--------------------------------
 3 files changed, 73 insertions(+), 1197 deletions(-)

diff --git a/cc/arch/aarch64/expression.rs b/cc/arch/aarch64/expression.rs
index 00213d72..710c4bc9 100644
--- a/cc/arch/aarch64/expression.rs
+++ b/cc/arch/aarch64/expression.rs
@@ -24,8 +24,11 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
+        // 128-bit shifts are still handled by the backend (hwmap doesn't expand them)
         if size == 128 {
-            self.emit_int128_binop(insn);
+            if matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr) {
+                self.emit_int128_binop(insn);
+            }
             return;
         }
 
@@ -122,11 +125,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_unary(insn, op);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let src = match insn.src.first() {
             Some(&s) => s,
@@ -167,11 +165,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_binop(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -208,11 +201,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_div(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -273,11 +261,6 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        if size == 128 {
-            self.emit_int128_compare(insn);
-            return;
-        }
-
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -347,9 +330,9 @@ impl Aarch64CodeGen {
             None => return,
         };
 
-        // Handle 128-bit extensions and truncations
-        if insn.size == 128 || insn.src_size == 128 {
-            self.emit_int128_extend(insn);
+        // Handle truncation FROM 128-bit (Zext/Sext TO 128 handled by hwmap)
+        if insn.src_size == 128 && insn.op == Opcode::Trunc {
+            self.emit_int128_trunc(insn);
             return;
         }
 
@@ -504,7 +487,7 @@ impl Aarch64CodeGen {
         }
     }
 
-    /// Emit 128-bit binary operation (add, sub, and, or, xor, shl, lsr, asr, mul)
+    /// Emit 128-bit shift operation (shl, lsr, asr)
     fn emit_int128_binop(&mut self, insn: &Instruction) {
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -518,144 +501,13 @@ impl Aarch64CodeGen {
         // Load src1 as 128-bit: X9=lo1, X10=hi1
         self.load_int128(src1, Reg::X9, Reg::X10);
 
-        // For shift ops, src2 is the shift amount (a regular small integer, not int128).
-        // Load it as a 64-bit value into X11 only. For all other ops, load as 128-bit.
-        let is_shift = matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr);
-        if is_shift {
-            self.emit_move(src2, Reg::X11, 64);
-        } else {
-            self.load_int128(src2, Reg::X11, Reg::X16);
-        }
+        // Shift amount is a regular small integer, not int128.
+        self.emit_move(src2, Reg::X11, 64);
 
         match insn.op {
-            Opcode::Add => {
-                // adds x9, x9, x11  (lo + lo, set carry)
-                // adc  x10, x10, x16 (hi + hi + carry)
-                self.push_lir(Aarch64Inst::Adds {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Adc {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X16,
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Sub => {
-                // subs x9, x9, x11  (lo - lo, set borrow)
-                // sbc  x10, x10, x16 (hi - hi - borrow)
-                self.push_lir(Aarch64Inst::Subs {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Sbc {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X16,
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::And => {
-                self.push_lir(Aarch64Inst::And {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::And {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Or => {
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Xor => {
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Mul => {
-                // 128-bit multiply: (lo1, hi1) * (lo2, hi2)
-                // result_lo = lo1 * lo2 (lower 64 bits)
-                // result_hi = umulh(lo1, lo2) + hi1*lo2 + lo1*hi2
-                //
-                // X9=lo1, X10=hi1, X11=lo2, X16=hi2
-                // X17 = umulh(lo1, lo2)
-                self.push_lir(Aarch64Inst::Umulh {
-                    src1: Reg::X9,
-                    src2: Reg::X11,
-                    dst: Reg::X17,
-                });
-                // X17 = X17 + hi1*lo2 = madd(X10, X11, X17)
-                self.push_lir(Aarch64Inst::MAdd {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: Reg::X11,
-                    acc: Reg::X17,
-                    dst: Reg::X17,
-                });
-                // X17 = X17 + lo1*hi2 = madd(X9, X16, X17)
-                self.push_lir(Aarch64Inst::MAdd {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: Reg::X16,
-                    acc: Reg::X17,
-                    dst: Reg::X17,
-                });
-                // X9 = lo1 * lo2 (lower 64 bits)
-                self.push_lir(Aarch64Inst::Mul {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: Reg::X11,
-                    dst: Reg::X9,
-                });
-                // hi result in X10
-                self.push_lir(Aarch64Inst::Mov {
-                    size: OperandSize::B64,
-                    src: GpOperand::Reg(Reg::X17),
-                    dst: Reg::X10,
-                });
-            }
-            Opcode::Shl => {
-                // 128-bit left shift: shift amount in X11 (lo half of src2)
-                self.emit_int128_shl();
-            }
-            Opcode::Lsr => {
-                // 128-bit logical right shift
-                self.emit_int128_lsr();
-            }
-            Opcode::Asr => {
-                // 128-bit arithmetic right shift
-                self.emit_int128_asr();
-            }
+            Opcode::Shl => self.emit_int128_shl(),
+            Opcode::Lsr => self.emit_int128_lsr(),
+            Opcode::Asr => self.emit_int128_asr(),
             _ => return,
         }
 
@@ -976,385 +828,59 @@ impl Aarch64CodeGen {
         self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_done)));
     }
 
-    /// Emit 128-bit division (calls __udivti3/__divti3/__umodti3/__modti3 runtime helpers)
-    fn emit_int128_div(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
+    /// Emit truncation from 128-bit to a smaller type
+    fn emit_int128_trunc(&mut self, insn: &Instruction) {
+        let src = match insn.src.first() {
+            Some(&s) => s,
             None => return,
         };
-
-        // AAPCS64: 128-bit args passed in X0:X1 (first) and X2:X3 (second)
-        // Return value in X0:X1
-        self.load_int128(src1, Reg::X0, Reg::X1);
-        self.load_int128(src2, Reg::X2, Reg::X3);
-
-        let func_name = match insn.op {
-            Opcode::DivS => "__divti3",
-            Opcode::DivU => "__udivti3",
-            Opcode::ModS => "__modti3",
-            Opcode::ModU => "__umodti3",
-            _ => return,
-        };
-
-        use crate::arch::lir::CallTarget;
-        self.push_lir(Aarch64Inst::Bl {
-            target: CallTarget::Direct(crate::arch::lir::Symbol::extern_sym(func_name)),
-        });
-
-        // Result in X0:X1 -> store to target
-        // Move to X9:X10 first to avoid clobbering if target overlaps arg regs
-        self.push_lir(Aarch64Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::X0),
-            dst: Reg::X9,
-        });
-        self.push_lir(Aarch64Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::X1),
-            dst: Reg::X10,
-        });
-        self.store_int128(Reg::X9, Reg::X10, target);
-    }
-
-    /// Emit 128-bit comparison
-    fn emit_int128_compare(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
         let target = match insn.target {
             Some(t) => t,
             None => return,
         };
 
-        self.load_int128(src1, Reg::X9, Reg::X10);
-        self.load_int128(src2, Reg::X11, Reg::X16);
-
+        // Truncate from 128: just take lo half (or part of it)
+        self.load_int128(src, Reg::X9, Reg::X10);
+        // X9 has lo half, which is what we want
+        let target_size = insn.size;
         let dst_loc = self.get_location(target);
         let dst_reg = match &dst_loc {
             Loc::Reg(r) => *r,
-            _ => Reg::X17,
+            _ => Reg::X9, // already in X9
         };
-
-        match insn.op {
-            Opcode::SetEq | Opcode::SetNe => {
-                // Eq/Ne: eor both halves, orr results, compare with zero
-                // X9 = lo1 ^ lo2
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                    dst: Reg::X9,
-                });
-                // X10 = hi1 ^ hi2
-                self.push_lir(Aarch64Inst::Eor {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                    dst: Reg::X10,
-                });
-                // X9 = X9 | X10
-                self.push_lir(Aarch64Inst::Orr {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X10),
-                    dst: Reg::X9,
-                });
-                // cmp X9, #0
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Imm(0),
-                });
-                let cond = if insn.op == Opcode::SetEq {
-                    CondCode::Eq
-                } else {
-                    CondCode::Ne
-                };
-                self.push_lir(Aarch64Inst::Cset { cond, dst: dst_reg });
-            }
-            _ => {
-                // Ordered comparisons: branch-based approach to avoid ccmp nzcv=0 bug.
-                // Compare hi halves first; if not equal, hi comparison determines result.
-                // If hi halves are equal, compare lo halves (always unsigned tiebreaker).
-                let label_hi_gt = self.next_unique_label("i128cmp");
-                let label_hi_lt = self.next_unique_label("i128cmp");
-                let label_done = self.next_unique_label("i128cmp");
-
-                // Determine signedness and what result to produce in each case.
-                // For signed: compare hi with signed conditions.
-                // For unsigned: compare hi with unsigned conditions.
-                let (hi_gt_cond, hi_lt_cond, lo_cond, hi_gt_val, hi_lt_val) = match insn.op {
-                    // SetLt: result=1 when src1 < src2
-                    Opcode::SetLt => (CondCode::Sgt, CondCode::Slt, CondCode::Ult, 0i64, 1i64),
-                    // SetLe: result=1 when src1 <= src2
-                    Opcode::SetLe => (CondCode::Sgt, CondCode::Slt, CondCode::Ule, 0, 1),
-                    // SetGt: result=1 when src1 > src2
-                    Opcode::SetGt => (CondCode::Sgt, CondCode::Slt, CondCode::Ugt, 1, 0),
-                    // SetGe: result=1 when src1 >= src2
-                    Opcode::SetGe => (CondCode::Sgt, CondCode::Slt, CondCode::Uge, 1, 0),
-                    // SetB (unsigned <): result=1 when src1 < src2
-                    Opcode::SetB => (CondCode::Ugt, CondCode::Ult, CondCode::Ult, 0, 1),
-                    // SetBe (unsigned <=)
-                    Opcode::SetBe => (CondCode::Ugt, CondCode::Ult, CondCode::Ule, 0, 1),
-                    // SetA (unsigned >)
-                    Opcode::SetA => (CondCode::Ugt, CondCode::Ult, CondCode::Ugt, 1, 0),
-                    // SetAe (unsigned >=)
-                    Opcode::SetAe => (CondCode::Ugt, CondCode::Ult, CondCode::Uge, 1, 0),
-                    _ => return,
-                };
-
-                // Compare hi halves (X10=hi1, X16=hi2)
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X10,
-                    src2: GpOperand::Reg(Reg::X16),
-                });
-                self.push_lir(Aarch64Inst::BCond {
-                    cond: hi_gt_cond,
-                    target: label_hi_gt.clone(),
-                });
-                self.push_lir(Aarch64Inst::BCond {
-                    cond: hi_lt_cond,
-                    target: label_hi_lt.clone(),
-                });
-
-                // Hi halves equal: compare lo halves (unsigned tiebreaker)
-                self.push_lir(Aarch64Inst::Cmp {
-                    size: OperandSize::B64,
-                    src1: Reg::X9,
-                    src2: GpOperand::Reg(Reg::X11),
-                });
-                self.push_lir(Aarch64Inst::Cset {
-                    cond: lo_cond,
-                    dst: dst_reg,
-                });
-                self.push_lir(Aarch64Inst::B {
-                    target: label_done.clone(),
-                });
-
-                // Hi1 > Hi2 (signed or unsigned depending on comparison)
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_hi_gt)));
-                self.push_lir(Aarch64Inst::Mov {
+        if dst_reg != Reg::X9 {
+            self.push_lir(Aarch64Inst::Mov {
+                size: OperandSize::B64,
+                src: GpOperand::Reg(Reg::X9),
+                dst: dst_reg,
+            });
+        }
+        // Mask to target size if needed
+        match target_size {
+            8 => {
+                self.push_lir(Aarch64Inst::And {
                     size: OperandSize::B32,
-                    src: GpOperand::Imm(hi_gt_val),
+                    src1: dst_reg,
+                    src2: GpOperand::Imm(0xff),
                     dst: dst_reg,
                 });
-                self.push_lir(Aarch64Inst::B {
-                    target: label_done.clone(),
-                });
-
-                // Hi1 < Hi2 (signed or unsigned depending on comparison)
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_hi_lt)));
-                self.push_lir(Aarch64Inst::Mov {
+            }
+            16 => {
+                self.push_lir(Aarch64Inst::And {
                     size: OperandSize::B32,
-                    src: GpOperand::Imm(hi_lt_val),
+                    src1: dst_reg,
+                    src2: GpOperand::Imm(0xffff),
                     dst: dst_reg,
                 });
-
-                // Done
-                self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(label_done)));
-            }
-        }
-
-        // Store as 64-bit so CBR's 64-bit load doesn't read stack garbage
-        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-            self.emit_move_to_loc(dst_reg, &dst_loc, 64);
-        }
-    }
-
-    /// Emit 128-bit unary operation (neg, not)
-    fn emit_int128_unary(&mut self, insn: &Instruction, op: UnaryOp) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
-
-        self.load_int128(src, Reg::X9, Reg::X10);
-
-        match op {
-            UnaryOp::Neg => {
-                // negs x9, x9  (negate lo, set flags)
-                // ngc  x10, x10 (negate hi with borrow)
-                self.push_lir(Aarch64Inst::Negs {
-                    size: OperandSize::B64,
-                    src: Reg::X9,
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Ngc {
-                    size: OperandSize::B64,
-                    src: Reg::X10,
-                    dst: Reg::X10,
-                });
-            }
-            UnaryOp::Not => {
-                // mvn x9, x9
-                // mvn x10, x10
-                self.push_lir(Aarch64Inst::Mvn {
-                    size: OperandSize::B64,
-                    src: Reg::X9,
-                    dst: Reg::X9,
-                });
-                self.push_lir(Aarch64Inst::Mvn {
-                    size: OperandSize::B64,
-                    src: Reg::X10,
-                    dst: Reg::X10,
-                });
-            }
-        }
-
-        self.store_int128(Reg::X9, Reg::X10, target);
-    }
-
-    /// Emit 128-bit extend/truncate operations
-    fn emit_int128_extend(&mut self, insn: &Instruction) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
-
-        match insn.op {
-            Opcode::Zext => {
-                // Zero extend to 128: lo = src, hi = 0
-                let dst_loc = self.get_location(target);
-                if let Loc::Stack(dst_offset) = dst_loc {
-                    self.emit_move(src, Reg::X9, 64);
-                    // Zero-extend from smaller source if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(Aarch64Inst::Uxtb {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(Aarch64Inst::Uxth {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        32 => {
-                            // Writing to w9 zeroes upper 32 bits
-                            self.push_lir(Aarch64Inst::Mov {
-                                size: OperandSize::B32,
-                                src: GpOperand::Reg(Reg::X9),
-                                dst: Reg::X9,
-                            });
-                        }
-                        _ => {} // 64-bit: nothing extra needed
-                    }
-                    let mem = self.stack_mem(dst_offset);
-                    self.push_lir(Aarch64Inst::Stp {
-                        size: OperandSize::B64,
-                        src1: Reg::X9,
-                        src2: Reg::Xzr,
-                        addr: mem,
-                    });
-                }
             }
-            Opcode::Sext => {
-                // Sign extend to 128: lo = src, hi = src >> 63 (sign bit)
-                let dst_loc = self.get_location(target);
-                if let Loc::Stack(dst_offset) = dst_loc {
-                    self.emit_move(src, Reg::X9, 64);
-                    // Sign-extend from smaller source if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(Aarch64Inst::Sxtb {
-                                dst_size: OperandSize::B64,
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(Aarch64Inst::Sxth {
-                                dst_size: OperandSize::B64,
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        32 => {
-                            self.push_lir(Aarch64Inst::Sxtw {
-                                src: Reg::X9,
-                                dst: Reg::X9,
-                            });
-                        }
-                        _ => {} // 64-bit: nothing extra needed
-                    }
-                    // hi = lo >> 63 (arithmetic)
-                    self.push_lir(Aarch64Inst::Asr {
-                        size: OperandSize::B64,
-                        src: Reg::X9,
-                        amount: GpOperand::Imm(63),
-                        dst: Reg::X10,
-                    });
-                    let mem = self.stack_mem(dst_offset);
-                    self.push_lir(Aarch64Inst::Stp {
-                        size: OperandSize::B64,
-                        src1: Reg::X9,
-                        src2: Reg::X10,
-                        addr: mem,
-                    });
-                }
-            }
-            Opcode::Trunc => {
-                // Truncate from 128: just take lo half (or part of it)
-                self.load_int128(src, Reg::X9, Reg::X10);
-                // X9 has lo half, which is what we want
-                let target_size = insn.size;
-                let dst_loc = self.get_location(target);
-                let dst_reg = match &dst_loc {
-                    Loc::Reg(r) => *r,
-                    _ => Reg::X9, // already in X9
-                };
-                if dst_reg != Reg::X9 {
-                    self.push_lir(Aarch64Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Reg(Reg::X9),
-                        dst: dst_reg,
-                    });
-                }
-                // Mask to target size if needed
-                match target_size {
-                    8 => {
-                        self.push_lir(Aarch64Inst::And {
-                            size: OperandSize::B32,
-                            src1: dst_reg,
-                            src2: GpOperand::Imm(0xff),
-                            dst: dst_reg,
-                        });
-                    }
-                    16 => {
-                        self.push_lir(Aarch64Inst::And {
-                            size: OperandSize::B32,
-                            src1: dst_reg,
-                            src2: GpOperand::Imm(0xffff),
-                            dst: dst_reg,
-                        });
-                    }
-                    32 | 64 => {
-                        // Already correct width
-                    }
-                    _ => {}
-                }
-                if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-                    self.emit_move_to_loc(dst_reg, &dst_loc, target_size);
-                }
+            32 | 64 => {
+                // Already correct width
             }
             _ => {}
         }
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, target_size);
+        }
     }
 
     // ========================================================================
diff --git a/cc/arch/aarch64/lir.rs b/cc/arch/aarch64/lir.rs
index 26566e69..68a4d4e2 100644
--- a/cc/arch/aarch64/lir.rs
+++ b/cc/arch/aarch64/lir.rs
@@ -642,29 +642,6 @@ pub enum Aarch64Inst {
     /// UMULH - Unsigned multiply high (upper 64 bits of 64x64->128 multiply)
     Umulh { src1: Reg, src2: Reg, dst: Reg },
 
-    /// MADD - Multiply-add: dst = acc + (src1 * src2)
-    MAdd {
-        size: OperandSize,
-        src1: Reg,
-        src2: Reg,
-        acc: Reg,
-        dst: Reg,
-    },
-
-    /// NEGS - Negate and set flags (used for 128-bit negate: lo half)
-    Negs {
-        size: OperandSize,
-        src: Reg,
-        dst: Reg,
-    },
-
-    /// NGC - Negate with carry (used for 128-bit negate: hi half)
-    Ngc {
-        size: OperandSize,
-        src: Reg,
-        dst: Reg,
-    },
-
     // ========================================================================
     // Directives (Architecture-Independent)
     // ========================================================================
@@ -1789,44 +1766,6 @@ impl EmitAsm for Aarch64Inst {
                 );
             }
 
-            Aarch64Inst::MAdd {
-                size,
-                src1,
-                src2,
-                acc,
-                dst,
-            } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    madd {}, {}, {}, {}",
-                    dst.name_for_size(sz),
-                    src1.name_for_size(sz),
-                    src2.name_for_size(sz),
-                    acc.name_for_size(sz)
-                );
-            }
-
-            Aarch64Inst::Negs { size, src, dst } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    negs {}, {}",
-                    dst.name_for_size(sz),
-                    src.name_for_size(sz)
-                );
-            }
-
-            Aarch64Inst::Ngc { size, src, dst } => {
-                let sz = size.bits().max(32);
-                let _ = writeln!(
-                    out,
-                    "    ngc {}, {}",
-                    dst.name_for_size(sz),
-                    src.name_for_size(sz)
-                );
-            }
-
             // Directives - delegate to shared implementation
             Aarch64Inst::Directive(dir) => {
                 dir.emit(target, out);
diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index 0a005d98..2e356e4a 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -172,10 +172,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_unary(insn, op);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let src = match insn.src.first() {
             Some(&s) => s,
@@ -211,10 +207,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_mul(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -259,10 +251,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_div(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -339,10 +327,6 @@ impl X86_64CodeGen {
             .typ
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
-        if insn.typ.is_some_and(|t| types.kind(t) == TypeKind::Int128) {
-            self.emit_int128_compare(insn);
-            return;
-        }
         let op_size = OperandSize::from_bits(size);
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -721,50 +705,9 @@ impl X86_64CodeGen {
         Label::new(prefix, suffix)
     }
 
-    /// Get the GpOperand for the lo half of src2 (for use in add/sub/etc).
-    /// If the operand is an immediate, returns GpOperand::Imm or loads into R11.
-    fn int128_src2_lo_operand(&mut self, src2: PseudoId) -> GpOperand {
-        let loc = self.get_location(src2);
-        match &loc {
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::R11,
-                    });
-                    GpOperand::Reg(Reg::R11)
-                } else {
-                    GpOperand::Imm(lo)
-                }
-            }
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_lo_mem(&loc)),
-            _ => panic!("int128_src2_lo_operand: unexpected loc {:?}", loc),
-        }
-    }
-
-    /// Get the GpOperand for the hi half of src2.
-    fn int128_src2_hi_operand(&mut self, src2: PseudoId) -> GpOperand {
-        let loc = self.get_location(src2);
-        match &loc {
-            Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
-                if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: hi,
-                        dst: Reg::R11,
-                    });
-                    GpOperand::Reg(Reg::R11)
-                } else {
-                    GpOperand::Imm(hi)
-                }
-            }
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_hi_mem(&loc)),
-            _ => panic!("int128_src2_hi_operand: unexpected loc {:?}", loc),
-        }
-    }
-
-    /// Emit 128-bit binary operation (Add, Sub, And, Or, Xor, Shl, Lsr, Asr).
+    /// Emit 128-bit shift operations (Shl, Lsr, Asr).
+    /// Other int128 ops (Add, Sub, And, Or, Xor, Mul, Neg, Not, comparisons)
+    /// are expanded by the hwmap pass into 64-bit sequences.
     fn emit_int128_binop(&mut self, insn: &Instruction) {
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
@@ -777,91 +720,6 @@ impl X86_64CodeGen {
         let dst_loc = self.get_location(target);
 
         match insn.op {
-            Opcode::Add => {
-                // lo: addq src2_lo, src1_lo → dst_lo (sets CF)
-                // hi: adcq src2_hi, src1_hi → dst_hi (uses CF)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Add {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Adc {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
-            Opcode::Sub => {
-                // lo: subq src2_lo, src1_lo → dst_lo (sets CF)
-                // hi: sbbq src2_hi, src1_hi → dst_hi (uses CF)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Sub {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Sbb {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
-            Opcode::And | Opcode::Or | Opcode::Xor => {
-                // Independent 64-bit ops on lo and hi halves (no carry)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                match insn.op {
-                    Opcode::And => self.push_lir(X86Inst::And {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Or => self.push_lir(X86Inst::Or {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Xor => self.push_lir(X86Inst::Xor {
-                        size: OperandSize::B64,
-                        src: src2_lo,
-                        dst: Reg::R10,
-                    }),
-                    _ => unreachable!(),
-                }
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                match insn.op {
-                    Opcode::And => self.push_lir(X86Inst::And {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Or => self.push_lir(X86Inst::Or {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    Opcode::Xor => self.push_lir(X86Inst::Xor {
-                        size: OperandSize::B64,
-                        src: src2_hi,
-                        dst: Reg::R10,
-                    }),
-                    _ => unreachable!(),
-                }
-                self.int128_store_hi(Reg::R10, &dst_loc);
-            }
             Opcode::Shl => {
                 self.emit_int128_shl(src1, src2, &dst_loc);
             }
@@ -1211,501 +1069,54 @@ impl X86_64CodeGen {
         self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label)));
     }
 
-    /// Emit 128-bit multiply.
-    /// result_lo = lo(src1_lo * src2_lo)
-    /// result_hi = hi(src1_lo * src2_lo) + src1_lo * src2_hi + src1_hi * src2_lo
-    ///
-    /// Uses RAX, RDX (for mul), R10, R11 as scratch. RAX/RDX are allocatable
-    /// but the regalloc ensures they are not live across this instruction.
-    fn emit_int128_mul(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
+    /// Emit 128-bit extend/truncate operations.
+    fn emit_int128_extend(&mut self, insn: &Instruction) {
+        let src = match insn.src.first() {
+            Some(&s) => s,
             None => return,
         };
-        let dst_loc = self.get_location(target);
-
-        // Step 1: RAX = src1_lo, mulq src2_lo → RDX:RAX = src1_lo * src2_lo
-        self.int128_load_lo(src1, Reg::Rax);
-        let src2_lo_loc = self.get_location(src2);
-        let src2_lo_op = match &src2_lo_loc {
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_lo_mem(&src2_lo_loc)),
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::R10,
-                    });
-                } else {
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Imm(lo),
-                        dst: GpOperand::Reg(Reg::R10),
-                    });
-                }
-                GpOperand::Reg(Reg::R10)
-            }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_lo_loc),
-        };
-        self.push_lir(X86Inst::Mul1 {
-            size: OperandSize::B64,
-            src: src2_lo_op,
-        });
-        // RAX = result_lo, RDX = partial_hi
-        self.int128_store_lo(Reg::Rax, &dst_loc);
-        // Save partial_hi in R11
-        self.push_lir(X86Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::Rdx),
-            dst: GpOperand::Reg(Reg::R11),
-        });
-
-        // Step 2: R10 = src1_hi * src2_lo (only lo 64 bits matter)
-        self.int128_load_hi(src1, Reg::R10);
-        // We need src2_lo in a register for imulq
-        let src2_lo_loc2 = self.get_location(src2);
-        let src2_lo_gp = match &src2_lo_loc2 {
-            Loc::Stack(_) | Loc::IncomingArg(_) => {
-                GpOperand::Mem(self.int128_lo_mem(&src2_lo_loc2))
-            }
-            Loc::Imm(v) => {
-                let lo = *v as i64;
-                if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: lo,
-                        dst: Reg::Rax,
-                    });
-                    GpOperand::Reg(Reg::Rax)
-                } else {
-                    GpOperand::Imm(lo)
-                }
-            }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_lo_loc2),
-        };
-        self.push_lir(X86Inst::IMul2 {
-            size: OperandSize::B64,
-            src: src2_lo_gp,
-            dst: Reg::R10,
-        });
-        // R11 += R10
-        self.push_lir(X86Inst::Add {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: Reg::R11,
-        });
-
-        // Step 3: R10 = src1_lo * src2_hi (only lo 64 bits matter)
-        self.int128_load_lo(src1, Reg::R10);
-        let src2_hi_loc = self.get_location(src2);
-        let src2_hi_gp = match &src2_hi_loc {
-            Loc::Stack(_) | Loc::IncomingArg(_) => GpOperand::Mem(self.int128_hi_mem(&src2_hi_loc)),
-            Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
-                if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
-                    self.push_lir(X86Inst::MovAbs {
-                        imm: hi,
-                        dst: Reg::Rax,
-                    });
-                    GpOperand::Reg(Reg::Rax)
-                } else {
-                    GpOperand::Imm(hi)
-                }
-            }
-            _ => panic!("int128_mul: unexpected src2 loc {:?}", src2_hi_loc),
-        };
-        self.push_lir(X86Inst::IMul2 {
-            size: OperandSize::B64,
-            src: src2_hi_gp,
-            dst: Reg::R10,
-        });
-        // R11 += R10
-        self.push_lir(X86Inst::Add {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: Reg::R11,
-        });
-
-        // Store result_hi
-        self.int128_store_hi(Reg::R11, &dst_loc);
-    }
-
-    /// Emit 128-bit division.
-    /// For __int128 division, we call the compiler runtime functions
-    /// __divti3 (signed) or __udivti3 (unsigned).
-    /// Args: (lo1, hi1, lo2, hi2) in RDI, RSI, RDX, RCX
-    /// Returns: (lo, hi) in RAX, RDX
-    fn emit_int128_div(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
         let target = match insn.target {
             Some(t) => t,
             None => return,
         };
-        let dst_loc = self.get_location(target);
 
-        let func_name = match insn.op {
-            Opcode::DivS => "__divti3",
-            Opcode::DivU => "__udivti3",
-            Opcode::ModS => "__modti3",
-            Opcode::ModU => "__umodti3",
-            _ => return,
-        };
-
-        // SysV ABI: __int128 args passed as (lo, hi) pairs in GP registers
-        // arg1 = (RDI=lo1, RSI=hi1), arg2 = (RDX=lo2, RCX=hi2)
-        // BUT: we must be careful about order because loading src2_lo into RDX
-        // could clobber a register we need. Load src2 first into RCX/R10,
-        // then src1, then move src2_lo to RDX.
-
-        // Load src2_hi into RCX first
-        self.int128_load_hi(src2, Reg::Rcx);
-        // Load src2_lo into R10 (temporary, will move to RDX later)
-        self.int128_load_lo(src2, Reg::R10);
-        // Load src1
-        self.int128_load_lo(src1, Reg::Rdi);
-        self.int128_load_hi(src1, Reg::Rsi);
-        // Now move src2_lo to RDX
-        self.push_lir(X86Inst::Mov {
-            size: OperandSize::B64,
-            src: GpOperand::Reg(Reg::R10),
-            dst: GpOperand::Reg(Reg::Rdx),
-        });
-
-        // Call the runtime function
-        let sym = crate::arch::lir::Symbol::global(func_name.to_string());
-        self.push_lir(X86Inst::Call {
-            target: crate::arch::lir::CallTarget::Direct(sym),
-        });
-
-        // Result in RAX (lo), RDX (hi)
-        self.int128_store_lo(Reg::Rax, &dst_loc);
-        self.int128_store_hi(Reg::Rdx, &dst_loc);
-    }
-
-    /// Emit 128-bit comparison.
-    fn emit_int128_compare(&mut self, insn: &Instruction) {
-        let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
-            (Some(&s1), Some(&s2)) => (s1, s2),
-            _ => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
+        // Truncating FROM 128-bit (insn.src_size == 128)
+        // Just load the lo half and truncate
         let dst_loc = self.get_location(target);
-        let work_reg = match &dst_loc {
+        let dst_reg = match &dst_loc {
             Loc::Reg(r) => *r,
             _ => Reg::R10,
         };
-
-        match insn.op {
-            Opcode::SetEq | Opcode::SetNe => {
-                // XOR both halves and OR them. Result is zero iff equal.
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Xor {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: Reg::R10,
-                });
-                self.int128_load_hi(src1, Reg::R11);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Xor {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: Reg::R11,
-                });
-                self.push_lir(X86Inst::Or {
-                    size: OperandSize::B64,
-                    src: GpOperand::Reg(Reg::R11),
-                    dst: Reg::R10,
-                });
-                let cc = if insn.op == Opcode::SetEq {
-                    CondCode::Eq
-                } else {
-                    CondCode::Ne
-                };
-                self.push_lir(X86Inst::SetCC { cc, dst: work_reg });
+        self.int128_load_lo(src, dst_reg);
+        // Truncate to target size
+        match insn.size {
+            8 => {
                 self.push_lir(X86Inst::Movzx {
                     src_size: OperandSize::B8,
                     dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: dst_reg,
                 });
             }
-            _ => {
-                // Ordered comparisons: compare hi first, then lo if hi equal.
-                // For signed: hi compared signed, lo compared unsigned.
-                // For unsigned: both compared unsigned.
-                let is_signed = matches!(
-                    insn.op,
-                    Opcode::SetLt | Opcode::SetLe | Opcode::SetGt | Opcode::SetGe
-                );
-
-                let hi_decides_label = self.int128_label("i128cmp_hi");
-                let done_label = self.int128_label("i128cmp_done");
-
-                // Compare hi halves
-                self.int128_load_hi(src1, Reg::R10);
-                let src2_hi = self.int128_src2_hi_operand(src2);
-                self.push_lir(X86Inst::Cmp {
-                    size: OperandSize::B64,
-                    src: src2_hi,
-                    dst: GpOperand::Reg(Reg::R10),
-                });
-                // If hi halves are not equal, the hi comparison decides
-                self.push_lir(X86Inst::Jcc {
-                    cc: CondCode::Ne,
-                    target: hi_decides_label.clone(),
-                });
-
-                // Hi halves are equal: compare lo halves (always unsigned)
-                self.int128_load_lo(src1, Reg::R10);
-                let src2_lo = self.int128_src2_lo_operand(src2);
-                self.push_lir(X86Inst::Cmp {
-                    size: OperandSize::B64,
-                    src: src2_lo,
-                    dst: GpOperand::Reg(Reg::R10),
-                });
-                // Use unsigned comparison for lo half
-                let lo_cc = match insn.op {
-                    Opcode::SetLt | Opcode::SetB => CondCode::Ult,
-                    Opcode::SetLe | Opcode::SetBe => CondCode::Ule,
-                    Opcode::SetGt | Opcode::SetA => CondCode::Ugt,
-                    Opcode::SetGe | Opcode::SetAe => CondCode::Uge,
-                    _ => CondCode::Ult,
-                };
-                self.push_lir(X86Inst::SetCC {
-                    cc: lo_cc,
-                    dst: work_reg,
-                });
-                self.push_lir(X86Inst::Movzx {
-                    src_size: OperandSize::B8,
-                    dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
-                });
-                self.push_lir(X86Inst::Jmp {
-                    target: done_label.clone(),
-                });
-
-                // Hi decides the comparison
-                self.push_lir(X86Inst::Directive(Directive::BlockLabel(hi_decides_label)));
-                let hi_cc = if is_signed {
-                    match insn.op {
-                        Opcode::SetLt => CondCode::Slt,
-                        Opcode::SetLe => CondCode::Sle,
-                        Opcode::SetGt => CondCode::Sgt,
-                        Opcode::SetGe => CondCode::Sge,
-                        _ => CondCode::Slt,
-                    }
-                } else {
-                    match insn.op {
-                        Opcode::SetB => CondCode::Ult,
-                        Opcode::SetBe => CondCode::Ule,
-                        Opcode::SetA => CondCode::Ugt,
-                        Opcode::SetAe => CondCode::Uge,
-                        _ => CondCode::Ult,
-                    }
-                };
-                self.push_lir(X86Inst::SetCC {
-                    cc: hi_cc,
-                    dst: work_reg,
-                });
+            16 => {
                 self.push_lir(X86Inst::Movzx {
-                    src_size: OperandSize::B8,
+                    src_size: OperandSize::B16,
                     dst_size: OperandSize::B32,
-                    src: GpOperand::Reg(work_reg),
-                    dst: work_reg,
-                });
-
-                self.push_lir(X86Inst::Directive(Directive::BlockLabel(done_label)));
-            }
-        }
-
-        if !matches!(&dst_loc, Loc::Reg(r) if *r == work_reg) {
-            self.emit_move_to_loc(work_reg, &dst_loc, u32::BITS);
-        }
-    }
-
-    /// Emit 128-bit unary operation (Neg, Not).
-    fn emit_int128_unary(&mut self, insn: &Instruction, op: UnaryOp) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
-        let dst_loc = self.get_location(target);
-
-        match op {
-            UnaryOp::Not => {
-                // Bitwise NOT: not lo; not hi
-                self.int128_load_lo(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: dst_reg,
                 });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.int128_store_hi(Reg::R10, &dst_loc);
             }
-            UnaryOp::Neg => {
-                // Two's complement negate: not lo; not hi; add $1, lo; adc $0, hi
-                self.int128_load_lo(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.push_lir(X86Inst::Add {
-                    size: OperandSize::B64,
-                    src: GpOperand::Imm(1),
-                    dst: Reg::R10,
-                });
-                self.int128_store_lo(Reg::R10, &dst_loc);
-                self.int128_load_hi(src, Reg::R10);
-                self.push_lir(X86Inst::Not {
-                    size: OperandSize::B64,
-                    dst: Reg::R10,
-                });
-                self.push_lir(X86Inst::Adc {
-                    size: OperandSize::B64,
-                    src: GpOperand::Imm(0),
-                    dst: Reg::R10,
+            32 => {
+                self.push_lir(X86Inst::Mov {
+                    size: OperandSize::B32,
+                    src: GpOperand::Reg(dst_reg),
+                    dst: GpOperand::Reg(dst_reg),
                 });
-                self.int128_store_hi(Reg::R10, &dst_loc);
             }
+            _ => {} // 64-bit: lo half is the result
         }
-    }
-
-    /// Emit 128-bit extend/truncate operations.
-    fn emit_int128_extend(&mut self, insn: &Instruction) {
-        let src = match insn.src.first() {
-            Some(&s) => s,
-            None => return,
-        };
-        let target = match insn.target {
-            Some(t) => t,
-            None => return,
-        };
-
-        if insn.size == 128 {
-            // Extending TO 128-bit
-            let dst_loc = self.get_location(target);
-            match insn.op {
-                Opcode::Zext => {
-                    // Zero-extend: lo = src, hi = 0
-                    self.emit_move(src, Reg::R10, insn.src_size.max(32));
-                    // If src_size < 64, ensure upper bits are zeroed
-                    if insn.src_size < 32 {
-                        let mask = (1i64 << insn.src_size) - 1;
-                        self.push_lir(X86Inst::And {
-                            size: OperandSize::B32,
-                            src: GpOperand::Imm(mask),
-                            dst: Reg::R10,
-                        });
-                    }
-                    self.int128_store_lo(Reg::R10, &dst_loc);
-                    self.int128_store_hi_imm(0, &dst_loc);
-                }
-                Opcode::Sext => {
-                    // Sign-extend: lo = src, hi = src >> 63 (sign extension)
-                    self.emit_move(src, Reg::R10, insn.src_size.max(32));
-                    // Sign-extend src to 64 bits first if needed
-                    match insn.src_size {
-                        8 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B8,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        16 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B16,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        32 => {
-                            self.push_lir(X86Inst::Movsx {
-                                src_size: OperandSize::B32,
-                                dst_size: OperandSize::B64,
-                                src: GpOperand::Reg(Reg::R10),
-                                dst: Reg::R10,
-                            });
-                        }
-                        _ => {} // 64-bit: already correct
-                    }
-                    self.int128_store_lo(Reg::R10, &dst_loc);
-                    // hi = sign extension of lo
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B64,
-                        src: GpOperand::Reg(Reg::R10),
-                        dst: GpOperand::Reg(Reg::R11),
-                    });
-                    self.push_lir(X86Inst::Sar {
-                        size: OperandSize::B64,
-                        count: ShiftCount::Imm(63),
-                        dst: Reg::R11,
-                    });
-                    self.int128_store_hi(Reg::R11, &dst_loc);
-                }
-                _ => {}
-            }
-        } else {
-            // Truncating FROM 128-bit (insn.src_size == 128)
-            // Just load the lo half and truncate
-            let dst_loc = self.get_location(target);
-            let dst_reg = match &dst_loc {
-                Loc::Reg(r) => *r,
-                _ => Reg::R10,
-            };
-            self.int128_load_lo(src, dst_reg);
-            // Truncate to target size
-            match insn.size {
-                8 => {
-                    self.push_lir(X86Inst::Movzx {
-                        src_size: OperandSize::B8,
-                        dst_size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: dst_reg,
-                    });
-                }
-                16 => {
-                    self.push_lir(X86Inst::Movzx {
-                        src_size: OperandSize::B16,
-                        dst_size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: dst_reg,
-                    });
-                }
-                32 => {
-                    self.push_lir(X86Inst::Mov {
-                        size: OperandSize::B32,
-                        src: GpOperand::Reg(dst_reg),
-                        dst: GpOperand::Reg(dst_reg),
-                    });
-                }
-                _ => {} // 64-bit: lo half is the result
-            }
-            if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
-                self.emit_move_to_loc(dst_reg, &dst_loc, insn.size);
-            }
+        if !matches!(&dst_loc, Loc::Reg(r) if *r == dst_reg) {
+            self.emit_move_to_loc(dst_reg, &dst_loc, insn.size);
         }
     }
 

From 05468f1631ab3f6d7d87d86367fcaa9287ac1e8f Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 00:42:11 +0000
Subject: [PATCH 08/18] cc: address hwmap code review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes from self-review:

1. expand_insn: explicit dispatch instead of fallthrough to
   expand_float16. Panics on unhandled Expand with context.

2. map_int128_expand: comparison arms now verify insn.typ is
   Int128, preventing misrouting of 128-bit long double comparisons
   if map_common ordering ever changes.

3. Remove 7 duplicate func.add_pseudo calls after alloc_reg64
   (which already registers the pseudo).

4. Factor out extract_halves() helper — replaces ~120 lines of
   repeated Lo64+Hi64 pairs across 8 expansion arms.

5. Simplify Float16 comparison detection — remove redundant inner
   matches!() and dead `let _ = typ`.

6. Validate --dump-ir stage names early with helpful error message.

7. Add codegen_int128_carry_chain_optimized integration test that
   exercises AddC→AdcC carry propagation, SubC→SbcC borrow, and
   cross-boundary multiply under -O2. Confirms optimizer does not
   break the flag-chain invariant.

8. Add explanatory comments on int128_load_lo/hi Loc::Reg handling.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/x86_64/expression.rs |   6 +-
 cc/ir/hwmap.rs               | 218 +++++++++++++----------------------
 cc/main.rs                   |  28 +++++
 cc/tests/codegen/misc.rs     |  49 ++++++++
 4 files changed, 161 insertions(+), 140 deletions(-)

diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index 2e356e4a..e896bbd0 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -591,7 +591,8 @@ impl X86_64CodeGen {
                 });
             }
             Loc::Reg(r) => {
-                // Register: treat as containing the low 64 bits
+                // After optimization, a 64-bit value feeding Pair64 may be
+                // register-allocated. Lo64 of such a value is the register itself.
                 if *r != dst {
                     self.push_lir(X86Inst::Mov {
                         size: OperandSize::B64,
@@ -629,7 +630,8 @@ impl X86_64CodeGen {
                 });
             }
             Loc::Reg(_) => {
-                // Register holds a scalar; hi half is 0
+                // After optimization, a 64-bit value feeding Pair64 may be
+                // register-allocated. Hi64 of such a value is always 0.
                 self.push_lir(X86Inst::Mov {
                     size: OperandSize::B64,
                     src: GpOperand::Imm(0),
diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
index de004b92..576f766f 100644
--- a/cc/ir/hwmap.rs
+++ b/cc/ir/hwmap.rs
@@ -346,7 +346,8 @@ fn map_int128_expand(insn: &Instruction, types: &TypeTable) -> Option<HwMapActio
             }
             Some(HwMapAction::Expand)
         }
-        // Comparisons: size==128 is the operand size, result is int/bool
+        // Comparisons: size==128 is the operand size, result is int/bool.
+        // Verify via src_typ that operands are actually int128 (not long double).
         Opcode::SetEq
         | Opcode::SetNe
         | Opcode::SetLt
@@ -356,7 +357,15 @@ fn map_int128_expand(insn: &Instruction, types: &TypeTable) -> Option<HwMapActio
         | Opcode::SetB
         | Opcode::SetBe
         | Opcode::SetA
-        | Opcode::SetAe => Some(HwMapAction::Expand),
+        | Opcode::SetAe => {
+            // insn.typ on comparisons is the operand type (not result type).
+            // Check it's actually Int128, not some other 128-bit type.
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(HwMapAction::Expand)
+        }
         _ => None,
     }
 }
@@ -462,29 +471,14 @@ fn map_float16_softfloat(insn: &Instruction, types: &TypeTable) -> Option<HwMapA
         | Opcode::FCmpOLe
         | Opcode::FCmpOGt
         | Opcode::FCmpOGe => {
-            // Comparisons store the operand type in src_typ or check size
             if let Some(src_typ) = insn.src_typ {
                 if types.kind(src_typ) == TypeKind::Float16 {
                     return Some(HwMapAction::Expand);
                 }
             }
-            // Also check by size: Float16 operations have size==16
+            // Fallback: check operand size (Float16 = 16 bits)
             if insn.size == 16 {
-                if let Some(typ) = insn.typ {
-                    // Result type is int, but check if this is a float comparison
-                    if matches!(
-                        insn.op,
-                        Opcode::FCmpOEq
-                            | Opcode::FCmpONe
-                            | Opcode::FCmpOLt
-                            | Opcode::FCmpOLe
-                            | Opcode::FCmpOGt
-                            | Opcode::FCmpOGe
-                    ) {
-                        let _ = typ;
-                        return Some(HwMapAction::Expand);
-                    }
-                }
+                return Some(HwMapAction::Expand);
             }
             None
         }
@@ -754,6 +748,7 @@ fn expand_insn(
     types: &TypeTable,
     target: &Target,
 ) {
+    // Int128 expansion: typ is Int128 for arith/bitwise, or operand type for comparisons
     if insn.size == 128 {
         if let Some(typ) = insn.typ {
             if types.kind(typ) == TypeKind::Int128 {
@@ -762,7 +757,30 @@ fn expand_insn(
             }
         }
     }
-    expand_float16(insn, func, new_insns, types, target);
+
+    // Float16 soft-float expansion (x86-64 only)
+    if matches!(
+        insn.op,
+        Opcode::FAdd
+            | Opcode::FSub
+            | Opcode::FMul
+            | Opcode::FDiv
+            | Opcode::FNeg
+            | Opcode::FCmpOEq
+            | Opcode::FCmpONe
+            | Opcode::FCmpOLt
+            | Opcode::FCmpOLe
+            | Opcode::FCmpOGt
+            | Opcode::FCmpOGe
+    ) {
+        expand_float16(insn, func, new_insns, types, target);
+        return;
+    }
+
+    panic!(
+        "expand_insn: unhandled Expand for {} (size={}) in function {}",
+        insn.op, insn.size, func.name
+    );
 }
 
 /// Allocate a new 64-bit register pseudo.
@@ -772,6 +790,20 @@ fn alloc_reg64(func: &mut Function) -> PseudoId {
     id
 }
 
+/// Extract lo and hi 64-bit halves from a 128-bit pseudo.
+fn extract_halves(
+    func: &mut Function,
+    new_insns: &mut Vec<Instruction>,
+    src: PseudoId,
+    long_type: TypeId,
+) -> (PseudoId, PseudoId) {
+    let lo = alloc_reg64(func);
+    new_insns.push(Instruction::unop(Opcode::Lo64, lo, src, long_type, 64));
+    let hi = alloc_reg64(func);
+    new_insns.push(Instruction::unop(Opcode::Hi64, hi, src, long_type, 64));
+    (lo, hi)
+}
+
 /// Expand an int128 instruction into 64-bit operations using Lo64/Hi64/Pair64.
 fn expand_int128(
     insn: &Instruction,
@@ -783,32 +815,16 @@ fn expand_int128(
     let long_type = types.ulong_id;
 
     match insn.op {
-        // Bitwise: Lo64+Hi64 both operands, 64-bit op on each half, Pair64
+        // Bitwise: independent 64-bit ops on lo/hi halves
         Opcode::And | Opcode::Or | Opcode::Xor => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
 
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
-
-            // 64-bit op on lo halves
             let r_lo = alloc_reg64(func);
             new_insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
-
-            // 64-bit op on hi halves
             let r_hi = alloc_reg64(func);
             new_insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
 
-            // Combine into 128-bit result
             let int128_type = insn.typ.unwrap();
             new_insns.push(Instruction::binop(
                 Opcode::Pair64,
@@ -820,13 +836,9 @@ fn expand_int128(
             ));
         }
 
-        // Not: Lo64+Hi64, Not each, Pair64
+        // Not: decompose, not each half
         Opcode::Not => {
-            let src = insn.src[0];
-            let s_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, s_lo, src, long_type, 64));
-            let s_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, s_hi, src, long_type, 64));
+            let (s_lo, s_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
 
             let r_lo = alloc_reg64(func);
             new_insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
@@ -844,18 +856,12 @@ fn expand_int128(
             ));
         }
 
-        // Neg: SubC(0, lo), SbcC(0, hi, carry), Pair64
+        // Neg: 0 - value with borrow chain
         Opcode::Neg => {
-            let src = insn.src[0];
-            let s_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, s_lo, src, long_type, 64));
-            let s_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, s_hi, src, long_type, 64));
-
+            let (s_lo, s_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
             let zero = func.create_const_pseudo(0);
 
             let r_lo = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
             new_insns.push(Instruction::binop(
                 Opcode::SubC,
                 r_lo,
@@ -864,11 +870,9 @@ fn expand_int128(
                 long_type,
                 64,
             ));
-
             let r_hi = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
             let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
-            sbc.src.push(r_lo); // src[2] = borrow producer
+            sbc.src.push(r_lo);
             new_insns.push(sbc);
 
             let int128_type = insn.typ.unwrap();
@@ -882,22 +886,12 @@ fn expand_int128(
             ));
         }
 
-        // Add: AddC(lo,lo), AdcC(hi,hi,carry), Pair64
+        // Add: carry chain
         Opcode::Add => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
 
             let r_lo = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
             new_insns.push(Instruction::binop(
                 Opcode::AddC,
                 r_lo,
@@ -906,11 +900,9 @@ fn expand_int128(
                 long_type,
                 64,
             ));
-
             let r_hi = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
             let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
-            adc.src.push(r_lo); // src[2] = carry producer
+            adc.src.push(r_lo);
             new_insns.push(adc);
 
             let int128_type = insn.typ.unwrap();
@@ -924,22 +916,12 @@ fn expand_int128(
             ));
         }
 
-        // Sub: SubC(lo,lo), SbcC(hi,hi,borrow), Pair64
+        // Sub: borrow chain
         Opcode::Sub => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
 
             let r_lo = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_lo, r_lo.0));
             new_insns.push(Instruction::binop(
                 Opcode::SubC,
                 r_lo,
@@ -948,11 +930,9 @@ fn expand_int128(
                 long_type,
                 64,
             ));
-
             let r_hi = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(r_hi, r_hi.0));
             let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
-            sbc.src.push(r_lo); // src[2] = borrow producer
+            sbc.src.push(r_lo);
             new_insns.push(sbc);
 
             let int128_type = insn.typ.unwrap();
@@ -966,21 +946,11 @@ fn expand_int128(
             ));
         }
 
-        // Mul: a*b = (a_lo*b_lo) + ((a_lo*b_hi + a_hi*b_lo) << 64)
+        // Mul: cross-product decomposition
         Opcode::Mul => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
-
-            // low_result = a_lo * b_lo (lower 64 bits)
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
+
             let low_result = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 Opcode::Mul,
@@ -991,9 +961,7 @@ fn expand_int128(
                 64,
             ));
 
-            // high_part = umulhi(a_lo, b_lo) (upper 64 bits of full 128-bit product)
             let high_part = alloc_reg64(func);
-            func.add_pseudo(Pseudo::reg(high_part, high_part.0));
             new_insns.push(Instruction::binop(
                 Opcode::UMulHi,
                 high_part,
@@ -1003,7 +971,6 @@ fn expand_int128(
                 64,
             ));
 
-            // cross1 = a_lo * b_hi
             let cross1 = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 Opcode::Mul,
@@ -1014,7 +981,6 @@ fn expand_int128(
                 64,
             ));
 
-            // cross2 = a_hi * b_lo
             let cross2 = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 Opcode::Mul,
@@ -1025,7 +991,6 @@ fn expand_int128(
                 64,
             ));
 
-            // final_hi = high_part + cross1 + cross2
             let sum1 = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 Opcode::Add,
@@ -1056,19 +1021,10 @@ fn expand_int128(
             ));
         }
 
-        // Eq/Ne: xor lo halves, xor hi halves, or results, compare to 0
+        // Eq/Ne: xor+or reduction, then 64-bit compare against 0
         Opcode::SetEq | Opcode::SetNe => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
 
             let xor_lo = alloc_reg64(func);
             new_insns.push(Instruction::binop(
@@ -1099,13 +1055,12 @@ fn expand_int128(
             ));
 
             let zero = func.create_const_pseudo(0);
-            // Final comparison is 64-bit (comparing reduced or-result against 0)
             new_insns.push(Instruction::binop(
                 insn.op, result, or_result, zero, long_type, 64,
             ));
         }
 
-        // Ordered comparisons: compare hi halves, if equal compare lo halves (unsigned)
+        // Ordered comparisons: hi compare + Select(hi_eq, lo_cmp, hi_cmp)
         Opcode::SetLt
         | Opcode::SetLe
         | Opcode::SetGt
@@ -1114,20 +1069,9 @@ fn expand_int128(
         | Opcode::SetBe
         | Opcode::SetA
         | Opcode::SetAe => {
-            let src1 = insn.src[0];
-            let src2 = insn.src[1];
-
-            let a_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, a_lo, src1, long_type, 64));
-            let a_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, a_hi, src1, long_type, 64));
-            let b_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Lo64, b_lo, src2, long_type, 64));
-            let b_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Hi64, b_hi, src2, long_type, 64));
-
-            // All decomposed comparisons are 64-bit
-            // hi_eq = (a_hi == b_hi)
+            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
+            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
+
             let hi_eq = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 Opcode::SetEq,
@@ -1138,13 +1082,12 @@ fn expand_int128(
                 64,
             ));
 
-            // hi_cmp = signed/unsigned comparison on hi halves (original op)
             let hi_cmp = alloc_reg64(func);
             new_insns.push(Instruction::binop(
                 insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
             ));
 
-            // lo_cmp = UNSIGNED comparison on lo halves
+            // Low halves always use unsigned compare
             let lo_op = match insn.op {
                 Opcode::SetLt | Opcode::SetB => Opcode::SetB,
                 Opcode::SetLe | Opcode::SetBe => Opcode::SetBe,
@@ -1155,7 +1098,6 @@ fn expand_int128(
             let lo_cmp = alloc_reg64(func);
             new_insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
 
-            // result = hi_eq ? lo_cmp : hi_cmp
             new_insns.push(Instruction::select(
                 result, hi_eq, lo_cmp, hi_cmp, long_type, 64,
             ));
diff --git a/cc/main.rs b/cc/main.rs
index bd4f18ef..ff6a96c5 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -219,6 +219,28 @@ struct Args {
     unsupported_mflags: Vec<String>,
 }
 
+/// Valid stage names for --dump-ir.
+const DUMP_IR_STAGES: &[&str] = &[
+    "post-linearize",
+    "post-hwmap",
+    "post-opt",
+    "post-lower",
+    "all",
+];
+
+/// Validate --dump-ir stage name. Returns error message if invalid.
+fn validate_dump_ir_stage(stage: &str) -> Result<(), String> {
+    if DUMP_IR_STAGES.contains(&stage) {
+        Ok(())
+    } else {
+        Err(format!(
+            "unknown --dump-ir stage '{}'. Valid stages: {}",
+            stage,
+            DUMP_IR_STAGES.join(", ")
+        ))
+    }
+}
+
 /// Check if IR should be dumped at the given stage.
 fn should_dump_ir(args: &Args, stage: &str) -> bool {
     match args.dump_ir.as_deref() {
@@ -439,6 +461,12 @@ fn process_file(
         ));
     }
 
+    if let Some(stage) = &args.dump_ir {
+        if let Err(msg) = validate_dump_ir_stage(stage) {
+            return Err(io::Error::new(io::ErrorKind::InvalidInput, msg));
+        }
+    }
+
     if args.dump_ast {
         println!("{:#?}", ast);
         return Ok(());
diff --git a/cc/tests/codegen/misc.rs b/cc/tests/codegen/misc.rs
index 2466e5dd..999eee64 100644
--- a/cc/tests/codegen/misc.rs
+++ b/cc/tests/codegen/misc.rs
@@ -4761,3 +4761,52 @@ int main(void) {
 "#;
     assert_eq!(compile_and_run("codegen_float16_mega", code, &[]), 0);
 }
+
+/// Test that the AddC→AdcC carry chain survives optimization.
+/// The optimizer must not insert flag-clobbering instructions between
+/// the add-with-carry pair. This test exercises large int128 values
+/// that require actual carry propagation.
+#[test]
+fn codegen_int128_carry_chain_optimized() {
+    let code = r#"
+typedef __int128 int128;
+typedef unsigned __int128 uint128;
+
+int main(void) {
+    /* Add with carry: build 0xFFFFFFFFFFFFFFFF via runtime to avoid
+       constant-folding into a negative i128 literal */
+    unsigned long long max64 = ~0ULL;
+    uint128 a = (uint128)max64;
+    uint128 b = 1;
+    uint128 sum = a + b;
+    /* sum should be 0x0000000000000001_0000000000000000 */
+    if ((unsigned long long)sum != 0) return 1;
+    if ((unsigned long long)(sum >> 64) != 1) return 2;
+
+    /* Sub with borrow: 0x1_0000000000000000 - 1 must borrow */
+    uint128 f = (uint128)1 << 64;
+    uint128 g = f - 1;
+    if ((unsigned long long)g != 0xFFFFFFFFFFFFFFFFULL) return 5;
+    if ((unsigned long long)(g >> 64) != 0) return 6;
+
+    /* Negation of 1: should produce all-1s */
+    int128 h = 1;
+    int128 neg_h = -h;
+    if (neg_h != -1) return 7;
+
+    /* Multiply with carry: (2^63) * 2 = 2^64 (crosses lo/hi boundary) */
+    unsigned long long half = 0x8000000000000000ULL;
+    uint128 i = (uint128)half;
+    uint128 j = 2;
+    uint128 prod = i * j;
+    if ((unsigned long long)prod != 0) return 8;
+    if ((unsigned long long)(prod >> 64) != 1) return 9;
+
+    return 0;
+}
+"#;
+    assert_eq!(
+        compile_and_run_optimized("codegen_int128_carry_chain_optimized", code),
+        0
+    );
+}

From 3c9a8c8c291bf68e06efcd864d23e1407cb93840 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 02:46:53 +0000
Subject: [PATCH 09/18] cc: refactor hwmap into arch-specific mapping pass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move arch-specific instruction mapping logic from the monolithic
cc/ir/hwmap.rs into cc/arch/{x86_64,aarch64}/mapping.rs, with shared
trait and helpers in cc/arch/mapping.rs. This follows the existing
cc/arch/codegen.rs pattern of shared trait + per-arch implementations.

Key design changes:
- Replace HwMapAction enum + shared dispatcher with ArchMapper trait
  where arch code directly builds replacement IR via MappedInsn::Replace
- Split expand_int128 monolith into 10 individual functions
- Split expand_float16 into 3 individual functions
- Float16 soft-float handling stays in x86_64 mapper only
- Long double rtlib handling stays in aarch64 mapper only
- Shared test helpers extracted into test_helpers module

Pure refactoring — zero behavioral changes. All 161 tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/aarch64/codegen.rs    |    2 +-
 cc/arch/aarch64/expression.rs |    4 +-
 cc/arch/aarch64/mapping.rs    |  482 +++++++
 cc/arch/aarch64/mod.rs        |    1 +
 cc/arch/mapping.rs            | 1644 +++++++++++++++++++++++
 cc/arch/mod.rs                |    1 +
 cc/arch/x86_64/codegen.rs     |    2 +-
 cc/arch/x86_64/expression.rs  |    2 +-
 cc/arch/x86_64/mapping.rs     |  467 +++++++
 cc/arch/x86_64/mod.rs         |    1 +
 cc/ir/hwmap.rs                | 2323 ---------------------------------
 cc/ir/linearize.rs            |    4 +-
 cc/ir/mod.rs                  |    3 +-
 cc/main.rs                    |   10 +-
 14 files changed, 2609 insertions(+), 2337 deletions(-)
 create mode 100644 cc/arch/aarch64/mapping.rs
 create mode 100644 cc/arch/mapping.rs
 create mode 100644 cc/arch/x86_64/mapping.rs
 delete mode 100644 cc/ir/hwmap.rs

diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs
index bffd6807..e359fdbd 100644
--- a/cc/arch/aarch64/codegen.rs
+++ b/cc/arch/aarch64/codegen.rs
@@ -1745,7 +1745,7 @@ impl Aarch64CodeGen {
                 self.emit_fence(insn);
             }
 
-            // Int128 decomposition ops (from hwmap expansion)
+            // Int128 decomposition ops (from mapping pass expansion)
             Opcode::Lo64 => self.emit_lo64(insn),
             Opcode::Hi64 => self.emit_hi64(insn),
             Opcode::Pair64 => self.emit_pair64(insn),
diff --git a/cc/arch/aarch64/expression.rs b/cc/arch/aarch64/expression.rs
index 710c4bc9..216554a2 100644
--- a/cc/arch/aarch64/expression.rs
+++ b/cc/arch/aarch64/expression.rs
@@ -24,7 +24,7 @@ impl Aarch64CodeGen {
             .map(|t| types.size_bits(t).max(32))
             .unwrap_or(insn.size.max(32));
 
-        // 128-bit shifts are still handled by the backend (hwmap doesn't expand them)
+        // 128-bit shifts are still handled by the backend (mapping pass doesn't expand them)
         if size == 128 {
             if matches!(insn.op, Opcode::Shl | Opcode::Lsr | Opcode::Asr) {
                 self.emit_int128_binop(insn);
@@ -330,7 +330,7 @@ impl Aarch64CodeGen {
             None => return,
         };
 
-        // Handle truncation FROM 128-bit (Zext/Sext TO 128 handled by hwmap)
+        // Handle truncation FROM 128-bit (Zext/Sext TO 128 handled by mapping pass)
         if insn.src_size == 128 && insn.op == Opcode::Trunc {
             self.emit_int128_trunc(insn);
             return;
diff --git a/cc/arch/aarch64/mapping.rs b/cc/arch/aarch64/mapping.rs
new file mode 100644
index 00000000..576d153d
--- /dev/null
+++ b/cc/arch/aarch64/mapping.rs
@@ -0,0 +1,482 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// AArch64 instruction mapping
+//
+
+use crate::arch::mapping::{
+    alloc_reg64, build_binop_rtlib_call, build_convert_rtlib_call, build_rtlib_call_explicit,
+    int_suffix_for_longdouble, longdouble_needs_rtlib, map_int128_divmod, map_int128_expand,
+    map_int128_float_convert, ArchMapper, MappedInsn, MappingCtx, RtlibCallParams,
+};
+use crate::ir::{Instruction, Opcode};
+use crate::types::TypeKind;
+
+/// AArch64 instruction mapper.
+pub struct Aarch64Mapper;
+
+impl ArchMapper for Aarch64Mapper {
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn {
+        // Shared: int128 div/mod → rtlib
+        if let Some(r) = map_int128_divmod(insn, ctx) {
+            return r;
+        }
+        // Shared: int128 expand (add/sub/mul/bitwise/neg/not/cmp/zext/sext)
+        if let Some(r) = map_int128_expand(insn, ctx) {
+            return r;
+        }
+        // Shared: int128↔float → rtlib
+        if let Some(r) = map_int128_float_convert(insn, ctx) {
+            return r;
+        }
+        // aarch64 only: long double → rtlib (Linux, not macOS)
+        if let Some(r) = self.map_longdouble(insn, ctx) {
+            return r;
+        }
+        MappedInsn::Legal
+    }
+}
+
+impl Aarch64Mapper {
+    /// Classify and expand long double operations via rtlib calls.
+    /// Only applies on aarch64/Linux where long double is 128-bit IEEE quad.
+    fn map_longdouble(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> Option<MappedInsn> {
+        if !longdouble_needs_rtlib(ctx.target) {
+            return None;
+        }
+
+        match insn.op {
+            // Binary arithmetic: FAdd/FSub/FMul/FDiv → single rtlib call
+            Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+                let typ = insn.typ?;
+                if ctx.types.kind(typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let name = match insn.op {
+                    Opcode::FAdd => "__addtf3",
+                    Opcode::FSub => "__subtf3",
+                    Opcode::FMul => "__multf3",
+                    Opcode::FDiv => "__divtf3",
+                    _ => unreachable!(),
+                };
+                let call = build_binop_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Negation: FNeg → single rtlib call
+            Opcode::FNeg => {
+                let typ = insn.typ?;
+                if ctx.types.kind(typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let call = build_binop_rtlib_call(insn, "__negtf2", ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Comparisons: call rtlib cmp, then compare result against 0
+            Opcode::FCmpOLt
+            | Opcode::FCmpOLe
+            | Opcode::FCmpOGt
+            | Opcode::FCmpOGe
+            | Opcode::FCmpOEq
+            | Opcode::FCmpONe => {
+                if insn.size != 128 {
+                    return None;
+                }
+                // Also check src_typ if available
+                if let Some(src_typ) = insn.src_typ {
+                    if ctx.types.kind(src_typ) != TypeKind::LongDouble {
+                        return None;
+                    }
+                }
+                let (name, cmp_op) = match insn.op {
+                    Opcode::FCmpOLt => ("__lttf2", Opcode::SetLt),
+                    Opcode::FCmpOLe => ("__letf2", Opcode::SetLe),
+                    Opcode::FCmpOGt => ("__gttf2", Opcode::SetGt),
+                    Opcode::FCmpOGe => ("__getf2", Opcode::SetGe),
+                    Opcode::FCmpOEq => ("__eqtf2", Opcode::SetEq),
+                    Opcode::FCmpONe => ("__netf2", Opcode::SetNe),
+                    _ => unreachable!(),
+                };
+
+                let result_pseudo = insn.target.expect("cmp must have target");
+                let int_type = ctx.types.int_id;
+                let int_size = ctx.types.size_bits(int_type);
+                let ld_type = ctx.types.longdouble_id;
+
+                // Allocate pseudo for cmp call result
+                let cmp_result = alloc_reg64(ctx.func);
+                let zero = ctx.func.create_const_pseudo(0);
+
+                // Build the rtlib call: cmp_result = __lttf2(left, right)
+                let arg_vals = insn.src.clone();
+                let arg_types = vec![ld_type; arg_vals.len()];
+                let call = build_rtlib_call_explicit(
+                    RtlibCallParams {
+                        target_pseudo: cmp_result,
+                        arg_vals: &arg_vals,
+                        func_name: name,
+                        arg_types,
+                        ret_type: int_type,
+                        pos: insn.pos,
+                    },
+                    ctx.types,
+                    ctx.target,
+                );
+
+                // Build the int comparison: result = cmp_op(cmp_result, 0)
+                let cmp =
+                    Instruction::binop(cmp_op, result_pseudo, cmp_result, zero, int_type, int_size);
+
+                Some(MappedInsn::Replace(vec![call, cmp]))
+            }
+
+            // Float-to-float conversions involving long double
+            Opcode::FCvtF => {
+                let dst_typ = insn.typ?;
+                let src_typ = insn.src_typ?;
+                let dst_kind = ctx.types.kind(dst_typ);
+                let src_kind = ctx.types.kind(src_typ);
+                if src_kind == TypeKind::LongDouble {
+                    // longdouble → float/double
+                    let name = match dst_kind {
+                        TypeKind::Float => "__trunctfsf2",
+                        TypeKind::Double => "__trunctfdf2",
+                        _ => return None,
+                    };
+                    let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                    Some(MappedInsn::Replace(vec![call]))
+                } else if dst_kind == TypeKind::LongDouble {
+                    // float/double → longdouble
+                    let name = match src_kind {
+                        TypeKind::Float => "__extendsftf2",
+                        TypeKind::Double => "__extenddftf2",
+                        _ => return None,
+                    };
+                    let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                    Some(MappedInsn::Replace(vec![call]))
+                } else {
+                    None
+                }
+            }
+
+            // Int-to-float: int → longdouble
+            Opcode::SCvtF | Opcode::UCvtF => {
+                let dst_typ = insn.typ?;
+                if ctx.types.kind(dst_typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let src_typ = insn.src_typ?;
+                // Skip int128 (handled by map_int128_float_convert)
+                if ctx.types.kind(src_typ) == TypeKind::Int128 {
+                    return None;
+                }
+                let isuf = int_suffix_for_longdouble(ctx.types, src_typ);
+                let name: &'static str = match isuf {
+                    "si" => "__floatsitf",
+                    "di" => "__floatditf",
+                    "usi" => "__floatunsitf",
+                    "udi" => "__floatunditf",
+                    _ => return None,
+                };
+                let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            // Float-to-int: longdouble → int
+            Opcode::FCvtS | Opcode::FCvtU => {
+                let src_typ = insn.src_typ?;
+                if ctx.types.kind(src_typ) != TypeKind::LongDouble {
+                    return None;
+                }
+                let dst_typ = insn.typ?;
+                // Skip int128 (handled by map_int128_float_convert)
+                if ctx.types.kind(dst_typ) == TypeKind::Int128 {
+                    return None;
+                }
+                let isuf = int_suffix_for_longdouble(ctx.types, dst_typ);
+                let name: &'static str = match isuf {
+                    "si" => "__fixtfsi",
+                    "di" => "__fixtfdi",
+                    "usi" => "__fixunstfsi",
+                    "udi" => "__fixunstfdi",
+                    _ => return None,
+                };
+                let call = build_convert_rtlib_call(insn, name, ctx.types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arch::mapping::test_helpers::*;
+    use crate::arch::mapping::MappingCtx;
+    use crate::ir::{Instruction, Opcode, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    #[test]
+    fn test_aarch64_legal_insns() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let func_template = make_test_func(&types);
+        for block in &func_template.blocks {
+            for insn in &block.insns {
+                let mut func = make_minimal_func(&types);
+                let mut ctx = MappingCtx {
+                    func: &mut func,
+                    types: &types,
+                    target: &target,
+                };
+                let result = mapper.map_insn(insn, &mut ctx);
+                assert_legal(&result);
+            }
+        }
+    }
+
+    // ========================================================================
+    // Int128 div/mod
+    // ========================================================================
+
+    #[test]
+    fn test_aarch64_int128_divmod() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__divti3");
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__umodti3");
+    }
+
+    // ========================================================================
+    // Long double → rtlib (aarch64/Linux only)
+    // ========================================================================
+
+    #[test]
+    fn test_aarch64_longdouble_binop() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        for (op, name) in [
+            (Opcode::FAdd, "__addtf3"),
+            (Opcode::FSub, "__subtf3"),
+            (Opcode::FMul, "__multf3"),
+            (Opcode::FDiv, "__divtf3"),
+        ] {
+            let insn = Instruction::binop(
+                op,
+                PseudoId(2),
+                PseudoId(0),
+                PseudoId(1),
+                types.longdouble_id,
+                128,
+            );
+            let mut func = make_minimal_func(&types);
+            let mut ctx = MappingCtx {
+                func: &mut func,
+                types: &types,
+                target: &target,
+            };
+            assert_libcall(&mapper.map_insn(&insn, &mut ctx), name);
+        }
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_binop_macos_legal() {
+        let target = Target::new(Arch::Aarch64, Os::MacOS);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // macOS aarch64: long double == double, native
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            64,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_neg() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let insn = Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.longdouble_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__negtf2");
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_cmp() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_cmp_libcall(&mapper.map_insn(&insn, &mut ctx), "__lttf2", Opcode::SetLt);
+
+        let mut insn = Instruction::binop(
+            Opcode::FCmpOEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            128,
+        );
+        insn.src_typ = Some(types.longdouble_id);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_cmp_libcall(&mapper.map_insn(&insn, &mut ctx), "__eqtf2", Opcode::SetEq);
+    }
+
+    #[test]
+    fn test_aarch64_longdouble_convert() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // float → longdouble
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendsftf2");
+
+        // longdouble → double
+        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.longdouble_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__trunctfdf2");
+
+        // int32 → longdouble
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 128, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatsitf");
+
+        // longdouble → int64
+        let insn = make_convert_insn(Opcode::FCvtS, types.long_id, 64, types.longdouble_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixtfdi");
+    }
+
+    #[test]
+    fn test_aarch64_int128_longdouble() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = Aarch64Mapper;
+
+        // aarch64 long double uses "tf" suffix
+        let insn = make_convert_insn(
+            Opcode::SCvtF,
+            types.longdouble_id,
+            128,
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattitf");
+    }
+}
diff --git a/cc/arch/aarch64/mod.rs b/cc/arch/aarch64/mod.rs
index 76d8effe..bcfb71fb 100644
--- a/cc/arch/aarch64/mod.rs
+++ b/cc/arch/aarch64/mod.rs
@@ -16,6 +16,7 @@ mod features;
 mod float;
 pub mod lir;
 pub mod macros;
+pub(crate) mod mapping;
 pub mod regalloc;
 
 pub use macros::get_macros;
diff --git a/cc/arch/mapping.rs b/cc/arch/mapping.rs
new file mode 100644
index 00000000..3ea73be7
--- /dev/null
+++ b/cc/arch/mapping.rs
@@ -0,0 +1,1644 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// Architecture-independent instruction mapping interface
+//
+// This pass runs after SSA construction and before optimization.
+// It handles target-specific lowering: expanding unsupported operations
+// into sequences of simpler instructions or runtime library calls.
+//
+
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
+use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, Pseudo, PseudoId};
+use crate::rtlib::{Float16Abi, RtlibNames};
+use crate::target::{Arch, Os, Target};
+use crate::types::{TypeId, TypeKind, TypeTable};
+
+// ============================================================================
+// Trait and types
+// ============================================================================
+
+/// Context passed to arch mapper. Provides mutable access to the
+/// function for pseudo allocation, plus type/target info.
+pub struct MappingCtx<'a> {
+    pub func: &'a mut Function,
+    pub types: &'a TypeTable,
+    pub target: &'a Target,
+}
+
+/// Result of mapping a single instruction.
+pub enum MappedInsn {
+    /// Instruction is natively supported — keep unchanged.
+    Legal,
+    /// Replace with these instructions in the same basic block.
+    Replace(Vec<Instruction>),
+}
+
+/// Per-architecture instruction mapper.
+pub trait ArchMapper {
+    /// Map one instruction. The arch impl calls shared helpers
+    /// to build replacement IR, then returns it in MappedInsn::Replace.
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn;
+}
+
+// ============================================================================
+// Complex number rtlib name selection
+// ============================================================================
+
+/// Get the rtlib function name for complex multiplication.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_mul_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__mulsc3",
+        TypeKind::Double => "__muldc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__muldc3" // macOS aarch64: long double == double
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__mulxc3",
+                    Arch::Aarch64 => "__multc3",
+                }
+            }
+        }
+        _ => "__muldc3",
+    }
+}
+
+/// Get the rtlib function name for complex division.
+/// Target-dependent for long double (x87 vs IEEE quad).
+pub fn complex_div_name(base_kind: TypeKind, target: &Target) -> &'static str {
+    match base_kind {
+        TypeKind::Float => "__divsc3",
+        TypeKind::Double => "__divdc3",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
+                "__divdc3"
+            } else {
+                match target.arch {
+                    Arch::X86_64 => "__divxc3",
+                    Arch::Aarch64 => "__divtc3",
+                }
+            }
+        }
+        _ => "__divdc3",
+    }
+}
+
+// ============================================================================
+// Utility helpers
+// ============================================================================
+
+/// Get the rtlib suffix for a float type kind on the given target.
+pub(crate) fn float_suffix(kind: TypeKind, target: &Target) -> &'static str {
+    match kind {
+        TypeKind::Float => "sf",
+        TypeKind::Double => "df",
+        TypeKind::LongDouble => {
+            if target.arch == Arch::X86_64 {
+                "xf"
+            } else {
+                "tf"
+            }
+        }
+        _ => "",
+    }
+}
+
+/// Check if long double needs soft-float rtlib on this target.
+/// Returns true only for aarch64/Linux (128-bit IEEE quad).
+/// x86_64 uses native x87; macOS aarch64 long double == double.
+pub(crate) fn longdouble_needs_rtlib(target: &Target) -> bool {
+    target.arch == Arch::Aarch64 && target.os != Os::MacOS
+}
+
+/// Get the integer suffix for a long double↔int conversion.
+pub(crate) fn int_suffix_for_longdouble(types: &TypeTable, int_type: TypeId) -> &'static str {
+    let size = types.size_bits(int_type);
+    let is_unsigned = types.is_unsigned(int_type);
+    match (is_unsigned, size <= 32) {
+        (true, true) => "usi",
+        (true, false) => "udi",
+        (false, true) => "si",
+        (false, false) => "di",
+    }
+}
+
+// ============================================================================
+// Pseudo/instruction helpers
+// ============================================================================
+
+/// Allocate a new 64-bit register pseudo.
+pub(crate) fn alloc_reg64(func: &mut Function) -> PseudoId {
+    let id = func.alloc_pseudo();
+    func.add_pseudo(Pseudo::reg(id, id.0));
+    id
+}
+
+/// Extract lo and hi 64-bit halves from a 128-bit pseudo.
+fn extract_halves(
+    func: &mut Function,
+    insns: &mut Vec<Instruction>,
+    src: PseudoId,
+    long_type: TypeId,
+) -> (PseudoId, PseudoId) {
+    let lo = alloc_reg64(func);
+    insns.push(Instruction::unop(Opcode::Lo64, lo, src, long_type, 64));
+    let hi = alloc_reg64(func);
+    insns.push(Instruction::unop(Opcode::Hi64, hi, src, long_type, 64));
+    (lo, hi)
+}
+
+// ============================================================================
+// Rtlib call builders
+// ============================================================================
+
+/// Parameters for building an explicit rtlib call.
+pub(crate) struct RtlibCallParams<'a> {
+    pub target_pseudo: PseudoId,
+    pub arg_vals: &'a [PseudoId],
+    pub func_name: &'a str,
+    pub arg_types: Vec<TypeId>,
+    pub ret_type: TypeId,
+    pub pos: Option<crate::diag::Position>,
+}
+
+/// Build a runtime library call instruction replacing an IR instruction.
+///
+/// Creates a Call instruction with proper ABI classification using the
+/// C calling convention, mirroring the linearizer's `emit_rtlib_call`.
+fn build_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    arg_types: Vec<TypeId>,
+    ret_type: TypeId,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let target_pseudo = insn.target.expect("insn must have target");
+    let ret_size = types.size_bits(ret_type);
+
+    let arg_vals = insn.src.clone();
+
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_classes: Vec<_> = arg_types
+        .iter()
+        .map(|&t| abi.classify_param(t, types))
+        .collect();
+    let ret_class = abi.classify_return(ret_type, types);
+    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        func_name,
+        arg_vals,
+        arg_types,
+        ret_type,
+        ret_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = insn.pos;
+    call_insn
+}
+
+/// Build a rtlib call with explicit parameters.
+/// Used for expansion patterns where the call target differs from
+/// the original instruction's target.
+pub(crate) fn build_rtlib_call_explicit(
+    params: RtlibCallParams<'_>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_size = types.size_bits(params.ret_type);
+
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_classes: Vec<_> = params
+        .arg_types
+        .iter()
+        .map(|&t| abi.classify_param(t, types))
+        .collect();
+    let ret_class = abi.classify_return(params.ret_type, types);
+    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(params.target_pseudo),
+        params.func_name,
+        params.arg_vals.to_vec(),
+        params.arg_types,
+        params.ret_type,
+        ret_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = params.pos;
+    call_insn
+}
+
+/// Build a rtlib call for a binop (both args same type as result).
+pub(crate) fn build_binop_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("binop must have type");
+    let arg_types = vec![ret_type; insn.src.len()];
+    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+}
+
+/// Build a rtlib call for a conversion (single arg, different src/dst types).
+pub(crate) fn build_convert_rtlib_call(
+    insn: &Instruction,
+    func_name: &str,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let ret_type = insn.typ.expect("conversion must have type");
+    let src_type = insn.src_typ.expect("conversion must have src_typ");
+    let arg_types = vec![src_type];
+    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+}
+
+/// Build a call to __extendhfsf2 (Float16 → float) with proper ABI.
+fn build_f16_extend_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+
+    // Arg type: ushort for compiler-rt, Float16 for libgcc
+    let arg_type = if f16_abi == Float16Abi::Integer {
+        types.ushort_id
+    } else {
+        types.float16_id
+    };
+
+    // Arg classification
+    let param_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_param(types.float16_id, types)
+    };
+
+    // Return is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let ret_class = abi.classify_return(float_type, types);
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__extendhfsf2",
+        vec![src],
+        vec![arg_type],
+        float_type,
+        float_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+/// Build a call to __truncsfhf2 (float → Float16) with proper ABI.
+fn build_f16_truncate_call(
+    target_pseudo: PseudoId,
+    src: PseudoId,
+    pos: Option<crate::diag::Position>,
+    types: &TypeTable,
+    target: &Target,
+) -> Instruction {
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+    let float_type = types.float_id;
+    let float16_type = types.float16_id;
+    let f16_size = types.size_bits(float16_type);
+
+    // Arg is always SSE float
+    let abi = get_abi_for_conv(CallingConv::C, target);
+    let param_class = abi.classify_param(float_type, types);
+
+    // Return: ushort for compiler-rt, Float16/SSE for libgcc
+    let ret_class = if f16_abi == Float16Abi::Integer {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        abi.classify_return(float16_type, types)
+    };
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        "__truncsfhf2",
+        vec![src],
+        vec![float_type],
+        float16_type,
+        f16_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = pos;
+    call_insn
+}
+
+// ============================================================================
+// Int128 expansion helpers
+// ============================================================================
+
+/// Expand int128 bitwise op (And/Or/Xor) into 64-bit operations.
+fn expand_int128_bitwise(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = alloc_reg64(func);
+    insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
+    let r_hi = alloc_reg64(func);
+    insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Not into 64-bit operations.
+fn expand_int128_not(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+
+    let r_lo = alloc_reg64(func);
+    insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
+    let r_hi = alloc_reg64(func);
+    insns.push(Instruction::unop(Opcode::Not, r_hi, s_hi, long_type, 64));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Neg (0 - value with borrow chain).
+fn expand_int128_neg(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let zero = func.create_const_pseudo(0);
+
+    let r_lo = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::SubC,
+        r_lo,
+        zero,
+        s_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = alloc_reg64(func);
+    let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
+    sbc.src.push(r_lo);
+    insns.push(sbc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Add (carry chain).
+fn expand_int128_add(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::AddC,
+        r_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = alloc_reg64(func);
+    let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
+    adc.src.push(r_lo);
+    insns.push(adc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Sub (borrow chain).
+fn expand_int128_sub(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let r_lo = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::SubC,
+        r_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let r_hi = alloc_reg64(func);
+    let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
+    sbc.src.push(r_lo);
+    insns.push(sbc);
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        r_lo,
+        r_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Mul (cross-product decomposition).
+fn expand_int128_mul(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let low_result = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        low_result,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let high_part = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::UMulHi,
+        high_part,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let cross1 = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        cross1,
+        a_lo,
+        b_hi,
+        long_type,
+        64,
+    ));
+
+    let cross2 = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Mul,
+        cross2,
+        a_hi,
+        b_lo,
+        long_type,
+        64,
+    ));
+
+    let sum1 = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Add,
+        sum1,
+        high_part,
+        cross1,
+        long_type,
+        64,
+    ));
+    let final_hi = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Add,
+        final_hi,
+        sum1,
+        cross2,
+        long_type,
+        64,
+    ));
+
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        low_result,
+        final_hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 equality comparison (SetEq/SetNe).
+fn expand_int128_cmp_eq(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let xor_lo = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Xor,
+        xor_lo,
+        a_lo,
+        b_lo,
+        long_type,
+        64,
+    ));
+    let xor_hi = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Xor,
+        xor_hi,
+        a_hi,
+        b_hi,
+        long_type,
+        64,
+    ));
+    let or_result = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Or,
+        or_result,
+        xor_lo,
+        xor_hi,
+        long_type,
+        64,
+    ));
+
+    let zero = func.create_const_pseudo(0);
+    insns.push(Instruction::binop(
+        insn.op, result, or_result, zero, long_type, 64,
+    ));
+    insns
+}
+
+/// Expand int128 ordered comparison (SetLt/SetLe/SetGt/SetGe/SetB/SetBe/SetA/SetAe).
+fn expand_int128_cmp_ord(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+    let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
+
+    let hi_eq = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::SetEq,
+        hi_eq,
+        a_hi,
+        b_hi,
+        long_type,
+        64,
+    ));
+
+    let hi_cmp = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
+    ));
+
+    // Low halves always use unsigned compare
+    let lo_op = match insn.op {
+        Opcode::SetLt | Opcode::SetB => Opcode::SetB,
+        Opcode::SetLe | Opcode::SetBe => Opcode::SetBe,
+        Opcode::SetGt | Opcode::SetA => Opcode::SetA,
+        Opcode::SetGe | Opcode::SetAe => Opcode::SetAe,
+        _ => unreachable!(),
+    };
+    let lo_cmp = alloc_reg64(func);
+    insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
+
+    insns.push(Instruction::select(
+        result, hi_eq, lo_cmp, hi_cmp, long_type, 64,
+    ));
+    insns
+}
+
+/// Expand int128 Zext (zero-extend to 128 bits).
+fn expand_int128_zext(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let src = insn.src[0];
+    let src_size = insn.src_size;
+
+    // Zero-extend src to 64-bit if needed
+    let lo = if src_size < 64 {
+        let ext = alloc_reg64(func);
+        let mut zext_insn = Instruction::unop(Opcode::Zext, ext, src, long_type, 64);
+        zext_insn.src_size = src_size;
+        insns.push(zext_insn);
+        ext
+    } else {
+        src
+    };
+
+    let zero = func.create_const_pseudo(0);
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        lo,
+        zero,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+/// Expand int128 Sext (sign-extend to 128 bits).
+fn expand_int128_sext(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let mut insns = Vec::new();
+
+    let src = insn.src[0];
+    let src_size = insn.src_size;
+
+    // Sign-extend src to 64-bit if needed
+    let lo = if src_size < 64 {
+        let ext = alloc_reg64(func);
+        let mut sext_insn = Instruction::unop(Opcode::Sext, ext, src, long_type, 64);
+        sext_insn.src_size = src_size;
+        insns.push(sext_insn);
+        ext
+    } else {
+        src
+    };
+
+    let shift_amount = func.create_const_pseudo(63);
+    let hi = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        Opcode::Asr,
+        hi,
+        lo,
+        shift_amount,
+        long_type,
+        64,
+    ));
+    let int128_type = insn.typ.unwrap();
+    insns.push(Instruction::binop(
+        Opcode::Pair64,
+        result,
+        lo,
+        hi,
+        int128_type,
+        128,
+    ));
+    insns
+}
+
+// ============================================================================
+// Float16 expansion helpers
+// ============================================================================
+
+/// Expand Float16 binary arithmetic (promote-operate-truncate).
+pub(crate) fn expand_float16_arith(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("binop must have target");
+    let left = insn.src[0];
+    let right = insn.src[1];
+    let mut insns = Vec::new();
+
+    // Extend left to float
+    let left_ext = alloc_reg64(func);
+    insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+    // Extend right to float
+    let right_ext = alloc_reg64(func);
+    insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+    // Native float operation
+    let float_result = alloc_reg64(func);
+    insns.push(Instruction::binop(
+        insn.op,
+        float_result,
+        left_ext,
+        right_ext,
+        float_type,
+        float_size,
+    ));
+
+    // Truncate result back to Float16
+    insns.push(build_f16_truncate_call(
+        result,
+        float_result,
+        pos,
+        types,
+        target,
+    ));
+    insns
+}
+
+/// Expand Float16 negation (promote-negate-truncate).
+pub(crate) fn expand_float16_neg(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("unary must have target");
+    let src = insn.src[0];
+    let mut insns = Vec::new();
+
+    let src_ext = alloc_reg64(func);
+    insns.push(build_f16_extend_call(src_ext, src, pos, types, target));
+
+    let neg_result = alloc_reg64(func);
+    insns.push(Instruction::unop(
+        Opcode::FNeg,
+        neg_result,
+        src_ext,
+        float_type,
+        float_size,
+    ));
+
+    insns.push(build_f16_truncate_call(
+        result, neg_result, pos, types, target,
+    ));
+    insns
+}
+
+/// Expand Float16 comparison (promote both, compare — no truncate).
+pub(crate) fn expand_float16_cmp(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    target: &Target,
+) -> Vec<Instruction> {
+    let float_type = types.float_id;
+    let float_size = types.size_bits(float_type);
+    let pos = insn.pos;
+    let result = insn.target.expect("cmp must have target");
+    let left = insn.src[0];
+    let right = insn.src[1];
+    let mut insns = Vec::new();
+
+    let left_ext = alloc_reg64(func);
+    insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
+
+    let right_ext = alloc_reg64(func);
+    insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
+
+    // Float comparison — result type is int, keep original type/size
+    let mut cmp = Instruction::binop(
+        insn.op,
+        result,
+        left_ext,
+        right_ext,
+        insn.typ.unwrap_or(types.int_id),
+        float_size,
+    );
+    cmp.src_typ = Some(float_type);
+    insns.push(cmp);
+    insns
+}
+
+// ============================================================================
+// Shared mapping decision functions
+// ============================================================================
+
+/// Classify and expand an int128 div/mod instruction into a rtlib call.
+pub(crate) fn map_int128_divmod(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    if insn.size != 128 {
+        return None;
+    }
+    let typ = insn.typ?;
+    if ctx.types.kind(typ) != TypeKind::Int128 {
+        return None;
+    }
+    let name = match insn.op {
+        Opcode::DivS => "__divti3",
+        Opcode::DivU => "__udivti3",
+        Opcode::ModS => "__modti3",
+        Opcode::ModU => "__umodti3",
+        _ => return None,
+    };
+    let call = build_binop_rtlib_call(insn, name, ctx.types, ctx.target);
+    Some(MappedInsn::Replace(vec![call]))
+}
+
+/// Classify and expand an int128 operation into 64-bit sequences.
+pub(crate) fn map_int128_expand(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    if insn.size != 128 {
+        return None;
+    }
+    let types = ctx.types;
+
+    match insn.op {
+        // Arithmetic/bitwise/unary: result type is int128
+        Opcode::And | Opcode::Or | Opcode::Xor => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_bitwise(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Not => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_not(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Neg => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_neg(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Add => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_add(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Sub => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_sub(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Mul => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_mul(
+                insn, ctx.func, types,
+            )))
+        }
+        // Extensions to 128: result type is int128
+        Opcode::Zext => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_zext(
+                insn, ctx.func, types,
+            )))
+        }
+        Opcode::Sext => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_sext(
+                insn, ctx.func, types,
+            )))
+        }
+        // Equality comparisons
+        Opcode::SetEq | Opcode::SetNe => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_cmp_eq(
+                insn, ctx.func, types,
+            )))
+        }
+        // Ordered comparisons
+        Opcode::SetLt
+        | Opcode::SetLe
+        | Opcode::SetGt
+        | Opcode::SetGe
+        | Opcode::SetB
+        | Opcode::SetBe
+        | Opcode::SetA
+        | Opcode::SetAe => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            Some(MappedInsn::Replace(expand_int128_cmp_ord(
+                insn, ctx.func, types,
+            )))
+        }
+        _ => None,
+    }
+}
+
+/// Classify and expand an int128↔float conversion into a rtlib call.
+pub(crate) fn map_int128_float_convert(
+    insn: &Instruction,
+    ctx: &mut MappingCtx<'_>,
+) -> Option<MappedInsn> {
+    let types = ctx.types;
+    let target = ctx.target;
+    match insn.op {
+        // int128 → float
+        Opcode::SCvtF | Opcode::UCvtF => {
+            if insn.src_size != 128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            if types.kind(src_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            let dst_kind = types.kind(dst_typ);
+            let fsuf = float_suffix(dst_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::UCvtF;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__floattisf",
+                (false, "df") => "__floattidf",
+                (false, "xf") => "__floattixf",
+                (false, "tf") => "__floattitf",
+                (true, "sf") => "__floatuntisf",
+                (true, "df") => "__floatuntidf",
+                (true, "xf") => "__floatuntixf",
+                (true, "tf") => "__floatuntitf",
+                _ => return None,
+            };
+            let call = build_convert_rtlib_call(insn, func_name, types, target);
+            Some(MappedInsn::Replace(vec![call]))
+        }
+        // float → int128
+        Opcode::FCvtS | Opcode::FCvtU => {
+            if insn.size != 128 {
+                return None;
+            }
+            let dst_typ = insn.typ?;
+            if types.kind(dst_typ) != TypeKind::Int128 {
+                return None;
+            }
+            let src_typ = insn.src_typ?;
+            let src_kind = types.kind(src_typ);
+            let fsuf = float_suffix(src_kind, target);
+            if fsuf.is_empty() {
+                return None;
+            }
+            let is_unsigned = insn.op == Opcode::FCvtU;
+            let func_name: &'static str = match (is_unsigned, fsuf) {
+                (false, "sf") => "__fixsfti",
+                (false, "df") => "__fixdfti",
+                (false, "xf") => "__fixxfti",
+                (false, "tf") => "__fixtfti",
+                (true, "sf") => "__fixunssfti",
+                (true, "df") => "__fixunsdfti",
+                (true, "xf") => "__fixunsxfti",
+                (true, "tf") => "__fixunstfti",
+                _ => return None,
+            };
+            let call = build_convert_rtlib_call(insn, func_name, types, target);
+            Some(MappedInsn::Replace(vec![call]))
+        }
+        _ => None,
+    }
+}
+
+// ============================================================================
+// Pass infrastructure
+// ============================================================================
+
+/// Create the appropriate ArchMapper for the given target.
+fn create_mapper(target: &Target) -> Box<dyn ArchMapper> {
+    match target.arch {
+        Arch::X86_64 => Box::new(crate::arch::x86_64::mapping::X86_64Mapper),
+        Arch::Aarch64 => Box::new(crate::arch::aarch64::mapping::Aarch64Mapper),
+    }
+}
+
+/// Run the instruction mapping pass on a single function.
+fn map_function(func: &mut Function, types: &TypeTable, target: &Target, mapper: &dyn ArchMapper) {
+    for block_idx in 0..func.blocks.len() {
+        // Take the insns out of the block to avoid borrow conflicts
+        let old_insns = std::mem::take(&mut func.blocks[block_idx].insns);
+        let mut new_insns = Vec::with_capacity(old_insns.len());
+        let mut changed = false;
+
+        for insn in &old_insns {
+            let mut ctx = MappingCtx {
+                func: &mut *func,
+                types,
+                target,
+            };
+            match mapper.map_insn(insn, &mut ctx) {
+                MappedInsn::Legal => new_insns.push(insn.clone()),
+                MappedInsn::Replace(replacements) => {
+                    new_insns.extend(replacements);
+                    changed = true;
+                }
+            }
+        }
+
+        if changed {
+            func.blocks[block_idx].insns = new_insns;
+        } else {
+            func.blocks[block_idx].insns = old_insns;
+        }
+    }
+}
+
+/// Run the instruction mapping pass on an entire module.
+pub fn run_mapping(module: &mut Module, types: &TypeTable, target: &Target) {
+    let mapper = create_mapper(target);
+    for func in &mut module.functions {
+        map_function(func, types, target, mapper.as_ref());
+    }
+}
+
+// ============================================================================
+// Shared test helpers
+// ============================================================================
+
+#[cfg(test)]
+pub(crate) mod test_helpers {
+    use super::*;
+    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
+    use crate::types::{TypeId, TypeTable};
+
+    /// Create a function with 3 pseudos for classification tests.
+    pub fn make_minimal_func(types: &TypeTable) -> Function {
+        let mut func = Function::new("test", types.int_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+        func.next_pseudo = 3;
+        func
+    }
+
+    /// Create a function with various legal instructions for pass runner tests.
+    pub fn make_test_func(types: &TypeTable) -> Function {
+        let mut func = Function::new("test_mapping", types.int_id);
+
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+
+        // Integer arithmetic
+        bb.add_insn(Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Sub,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Mul,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Bitwise
+        bb.add_insn(Instruction::binop(
+            Opcode::And,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Or,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::Xor,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Comparisons
+        bb.add_insn(Instruction::binop(
+            Opcode::SetEq,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::binop(
+            Opcode::SetLt,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        ));
+
+        // Unary
+        bb.add_insn(Instruction::unop(
+            Opcode::Neg,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::Not,
+            PseudoId(2),
+            PseudoId(0),
+            types.int_id,
+            32,
+        ));
+
+        // Float ops
+        bb.add_insn(Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.double_id,
+            64,
+        ));
+        bb.add_insn(Instruction::unop(
+            Opcode::FNeg,
+            PseudoId(2),
+            PseudoId(0),
+            types.double_id,
+            64,
+        ));
+
+        // Conversions
+        let mut sext = Instruction::unop(Opcode::Sext, PseudoId(2), PseudoId(0), types.long_id, 64);
+        sext.src_size = 32;
+        bb.add_insn(sext);
+        let mut zext =
+            Instruction::unop(Opcode::Zext, PseudoId(2), PseudoId(0), types.ulong_id, 64);
+        zext.src_size = 32;
+        bb.add_insn(zext);
+
+        // Memory
+        bb.add_insn(Instruction::load(
+            PseudoId(2),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+        bb.add_insn(Instruction::store(
+            PseudoId(1),
+            PseudoId(0),
+            0,
+            types.int_id,
+            32,
+        ));
+
+        // Terminator
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+        func
+    }
+
+    /// Create a conversion instruction for testing.
+    pub fn make_convert_insn(
+        op: Opcode,
+        dst_type: TypeId,
+        dst_size: u32,
+        src_type: TypeId,
+        src_size: u32,
+    ) -> Instruction {
+        let mut insn = Instruction::new(op)
+            .with_target(PseudoId(2))
+            .with_src(PseudoId(0))
+            .with_type_and_size(dst_type, dst_size);
+        insn.src_size = src_size;
+        insn.src_typ = Some(src_type);
+        insn
+    }
+
+    /// Assert the mapping result is Legal.
+    pub fn assert_legal(result: &MappedInsn) {
+        assert!(matches!(result, MappedInsn::Legal));
+    }
+
+    /// Assert the mapping result is a single LibCall replacement.
+    pub fn assert_libcall(result: &MappedInsn, expected_name: &str) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert_eq!(insns.len(), 1, "expected single Call replacement");
+                assert_eq!(insns[0].op, Opcode::Call);
+                assert_eq!(insns[0].func_name.as_deref(), Some(expected_name));
+            }
+            MappedInsn::Legal => {
+                panic!("expected Replace with LibCall to {expected_name}, got Legal")
+            }
+        }
+    }
+
+    /// Assert the mapping result is a multi-instruction expansion.
+    pub fn assert_expand(result: &MappedInsn) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert!(!insns.is_empty(), "expected non-empty expansion");
+            }
+            MappedInsn::Legal => panic!("expected Replace with expansion, got Legal"),
+        }
+    }
+
+    /// Assert the mapping result is a CmpLibCall (call + int compare).
+    pub fn assert_cmp_libcall(result: &MappedInsn, expected_name: &str, expected_cmp_op: Opcode) {
+        match result {
+            MappedInsn::Replace(insns) => {
+                assert_eq!(insns.len(), 2, "expected Call + compare");
+                assert_eq!(insns[0].op, Opcode::Call);
+                assert_eq!(insns[0].func_name.as_deref(), Some(expected_name));
+                assert_eq!(insns[1].op, expected_cmp_op);
+            }
+            MappedInsn::Legal => {
+                panic!("expected Replace with CmpLibCall to {expected_name}, got Legal")
+            }
+        }
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::test_helpers::*;
+    use super::*;
+    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    // ========================================================================
+    // Pass runner tests
+    // ========================================================================
+
+    #[test]
+    fn test_run_mapping_empty() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mut module = Module::new();
+
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_run_mapping_with_functions() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        module.add_function(make_test_func(&types));
+
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_mapping_idempotent() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+
+        run_mapping(&mut module, &types, &target);
+        run_mapping(&mut module, &types, &target);
+    }
+
+    #[test]
+    fn test_mapping_all_targets() {
+        let targets = vec![
+            Target::new(Arch::X86_64, Os::Linux),
+            Target::new(Arch::X86_64, Os::MacOS),
+            Target::new(Arch::X86_64, Os::FreeBSD),
+            Target::new(Arch::Aarch64, Os::Linux),
+            Target::new(Arch::Aarch64, Os::MacOS),
+        ];
+
+        for target in &targets {
+            let types = TypeTable::new(target);
+            let mut module = Module::new();
+            module.add_function(make_test_func(&types));
+            run_mapping(&mut module, &types, target);
+        }
+    }
+
+    #[test]
+    fn test_mapping_all_legal_x86_64() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        let orig_insn_count = module.functions[0].blocks[0].insns.len();
+
+        run_mapping(&mut module, &types, &target);
+
+        // All instructions should be unchanged (all legal)
+        assert_eq!(module.functions[0].blocks[0].insns.len(), orig_insn_count);
+    }
+
+    #[test]
+    fn test_mapping_all_legal_aarch64() {
+        let target = Target::new(Arch::Aarch64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut module = Module::new();
+        module.add_function(make_test_func(&types));
+        let orig_insn_count = module.functions[0].blocks[0].insns.len();
+
+        run_mapping(&mut module, &types, &target);
+
+        assert_eq!(module.functions[0].blocks[0].insns.len(), orig_insn_count);
+    }
+
+    // ========================================================================
+    // Integration: int128 div/mod transformation
+    // ========================================================================
+
+    #[test]
+    fn test_mapping_transforms_int128_divmod() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_divmod", types.int128_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
+        func.next_pseudo = 3;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        let mut module = Module::new();
+        module.add_function(func);
+        run_mapping(&mut module, &types, &target);
+
+        // After mapping, the DivS should be replaced with a Call to __divti3
+        let block = &module.functions[0].blocks[0];
+        assert_eq!(block.insns.len(), 3); // Entry, Call, Ret
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__divti3"));
+        assert_eq!(block.insns[1].target, Some(PseudoId(2)));
+        assert_eq!(block.insns[1].src, vec![PseudoId(0), PseudoId(1)]);
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Integration: int128↔float conversion transformation
+    // ========================================================================
+
+    #[test]
+    fn test_mapping_transforms_int128_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+
+        let mut func = Function::new("test_convert", types.double_id);
+        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
+        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
+        func.next_pseudo = 2;
+
+        let mut bb = BasicBlock::new(BasicBlockId(0));
+        bb.add_insn(Instruction::new(Opcode::Entry));
+        bb.add_insn(make_convert_insn(
+            Opcode::SCvtF,
+            types.double_id,
+            64,
+            types.int128_id,
+            128,
+        ));
+        bb.add_insn(Instruction::ret(Some(PseudoId(1))));
+        func.add_block(bb);
+        func.entry = BasicBlockId(0);
+
+        let mut module = Module::new();
+        module.add_function(func);
+        run_mapping(&mut module, &types, &target);
+
+        let block = &module.functions[0].blocks[0];
+        assert_eq!(block.insns.len(), 3);
+        assert_eq!(block.insns[1].op, Opcode::Call);
+        assert_eq!(block.insns[1].func_name.as_deref(), Some("__floattidf"));
+        assert!(block.insns[1].abi_info.is_some());
+    }
+
+    // ========================================================================
+    // Complex mul/div rtlib name tests
+    // ========================================================================
+
+    #[test]
+    fn test_complex_mul_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Float, &target), "__mulsc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_double() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::Double, &target), "__muldc3");
+    }
+
+    #[test]
+    fn test_complex_mul_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_mul_name(TypeKind::LongDouble, &x86), "__mulxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_linux),
+            "__multc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_mul_name(TypeKind::LongDouble, &arm_macos),
+            "__muldc3"
+        );
+    }
+
+    #[test]
+    fn test_complex_div_name_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::Float, &target), "__divsc3");
+    }
+
+    #[test]
+    fn test_complex_div_name_longdouble() {
+        let x86 = Target::new(Arch::X86_64, Os::Linux);
+        assert_eq!(complex_div_name(TypeKind::LongDouble, &x86), "__divxc3");
+
+        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_linux),
+            "__divtc3"
+        );
+
+        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
+        assert_eq!(
+            complex_div_name(TypeKind::LongDouble, &arm_macos),
+            "__divdc3"
+        );
+    }
+}
diff --git a/cc/arch/mod.rs b/cc/arch/mod.rs
index cf94972c..a6cb923a 100644
--- a/cc/arch/mod.rs
+++ b/cc/arch/mod.rs
@@ -15,6 +15,7 @@ pub mod aarch64;
 pub mod codegen;
 pub mod dwarf;
 pub mod lir;
+pub mod mapping;
 pub mod regalloc;
 pub mod x86_64;
 
diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs
index efb3e7b4..5ee4f475 100644
--- a/cc/arch/x86_64/codegen.rs
+++ b/cc/arch/x86_64/codegen.rs
@@ -1717,7 +1717,7 @@ impl X86_64CodeGen {
                 self.emit_fence(insn);
             }
 
-            // Int128 decomposition ops (from hwmap expansion)
+            // Int128 decomposition ops (from mapping pass expansion)
             Opcode::Lo64 => self.emit_lo64(insn),
             Opcode::Hi64 => self.emit_hi64(insn),
             Opcode::Pair64 => self.emit_pair64(insn),
diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index e896bbd0..091133f4 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -709,7 +709,7 @@ impl X86_64CodeGen {
 
     /// Emit 128-bit shift operations (Shl, Lsr, Asr).
     /// Other int128 ops (Add, Sub, And, Or, Xor, Mul, Neg, Not, comparisons)
-    /// are expanded by the hwmap pass into 64-bit sequences.
+    /// are expanded by the mapping pass into 64-bit sequences.
     fn emit_int128_binop(&mut self, insn: &Instruction) {
         let (src1, src2) = match (insn.src.first(), insn.src.get(1)) {
             (Some(&s1), Some(&s2)) => (s1, s2),
diff --git a/cc/arch/x86_64/mapping.rs b/cc/arch/x86_64/mapping.rs
new file mode 100644
index 00000000..616567dc
--- /dev/null
+++ b/cc/arch/x86_64/mapping.rs
@@ -0,0 +1,467 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// x86-64 instruction mapping
+//
+
+use crate::arch::mapping::{
+    expand_float16_arith, expand_float16_cmp, expand_float16_neg, map_int128_divmod,
+    map_int128_expand, map_int128_float_convert, ArchMapper, MappedInsn, MappingCtx,
+};
+use crate::ir::{Instruction, Opcode};
+use crate::types::TypeKind;
+
+/// x86-64 instruction mapper.
+pub struct X86_64Mapper;
+
+impl ArchMapper for X86_64Mapper {
+    fn map_insn(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> MappedInsn {
+        // Shared: int128 div/mod → rtlib
+        if let Some(r) = map_int128_divmod(insn, ctx) {
+            return r;
+        }
+        // Shared: int128 expand (add/sub/mul/bitwise/neg/not/cmp/zext/sext)
+        if let Some(r) = map_int128_expand(insn, ctx) {
+            return r;
+        }
+        // Shared: int128↔float → rtlib
+        if let Some(r) = map_int128_float_convert(insn, ctx) {
+            return r;
+        }
+        // x86-64 only: Float16 soft-float → expand
+        if let Some(r) = self.map_float16(insn, ctx) {
+            return r;
+        }
+        MappedInsn::Legal
+    }
+}
+
+impl X86_64Mapper {
+    /// Classify and expand Float16 operations via promote-operate-truncate.
+    fn map_float16(&self, insn: &Instruction, ctx: &mut MappingCtx<'_>) -> Option<MappedInsn> {
+        let types = ctx.types;
+        match insn.op {
+            // Arithmetic: promote-operate-truncate
+            Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
+                let typ = insn.typ?;
+                if types.kind(typ) == TypeKind::Float16 {
+                    Some(MappedInsn::Replace(expand_float16_arith(
+                        insn, ctx.func, types, ctx.target,
+                    )))
+                } else {
+                    None
+                }
+            }
+            // Negation: promote-negate-truncate
+            Opcode::FNeg => {
+                let typ = insn.typ?;
+                if types.kind(typ) == TypeKind::Float16 {
+                    Some(MappedInsn::Replace(expand_float16_neg(
+                        insn, ctx.func, types, ctx.target,
+                    )))
+                } else {
+                    None
+                }
+            }
+            // Comparisons: promote both, compare (no truncate)
+            Opcode::FCmpOEq
+            | Opcode::FCmpONe
+            | Opcode::FCmpOLt
+            | Opcode::FCmpOLe
+            | Opcode::FCmpOGt
+            | Opcode::FCmpOGe => {
+                if let Some(src_typ) = insn.src_typ {
+                    if types.kind(src_typ) == TypeKind::Float16 {
+                        return Some(MappedInsn::Replace(expand_float16_cmp(
+                            insn, ctx.func, types, ctx.target,
+                        )));
+                    }
+                }
+                // Fallback: check operand size (Float16 = 16 bits)
+                if insn.size == 16 {
+                    return Some(MappedInsn::Replace(expand_float16_cmp(
+                        insn, ctx.func, types, ctx.target,
+                    )));
+                }
+                None
+            }
+            // Float16 conversions handled by linearizer, no mapping needed
+            Opcode::FCvtF => None,
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arch::mapping::test_helpers::*;
+    use crate::arch::mapping::MappingCtx;
+    use crate::ir::{Instruction, Opcode, PseudoId};
+    use crate::target::{Arch, Os, Target};
+    use crate::types::TypeTable;
+
+    #[test]
+    fn test_x86_64_legal_insns() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let func_template = make_test_func(&types);
+        for block in &func_template.blocks {
+            for insn in &block.insns {
+                let mut func = make_minimal_func(&types);
+                let mut ctx = MappingCtx {
+                    func: &mut func,
+                    types: &types,
+                    target: &target,
+                };
+                let result = mapper.map_insn(insn, &mut ctx);
+                assert_legal(&result);
+            }
+        }
+    }
+
+    // ========================================================================
+    // Int128 div/mod
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_divs() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__divti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_divu() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__udivti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_mods() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::ModS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__modti3");
+    }
+
+    #[test]
+    fn test_x86_64_int128_modu() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::ModU,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.uint128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__umodti3");
+    }
+
+    #[test]
+    fn test_x86_64_int32_div_stays_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::DivS,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int_id,
+            32,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Int128 expand
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_add_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = Instruction::binop(
+            Opcode::Add,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Int128↔float conversion
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_to_float() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // signed int128 → float
+        let insn = make_convert_insn(Opcode::SCvtF, types.float_id, 32, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattisf");
+
+        // signed int128 → double
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattidf");
+
+        // unsigned int128 → float
+        let insn = make_convert_insn(Opcode::UCvtF, types.float_id, 32, types.uint128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatuntisf");
+
+        // unsigned int128 → double
+        let insn = make_convert_insn(Opcode::UCvtF, types.double_id, 64, types.uint128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatuntidf");
+    }
+
+    #[test]
+    fn test_x86_64_float_to_int128() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // float → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixsfti");
+
+        // double → signed int128
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixdfti");
+
+        // float → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunssfti");
+
+        // double → unsigned int128
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunsdfti");
+    }
+
+    #[test]
+    fn test_x86_64_int128_longdouble() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86-64 long double uses "xf" suffix
+        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 80, types.int128_id, 128);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floattixf");
+
+        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.longdouble_id, 80);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixxfti");
+    }
+
+    #[test]
+    fn test_x86_64_non_int128_conversion_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // int32 → double should remain Legal
+        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+
+        // double → int32 should remain Legal
+        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.double_id, 64);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_longdouble_binop_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86_64 long double (x87) is native — should be Legal
+        let insn = Instruction::binop(
+            Opcode::FAdd,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.longdouble_id,
+            80,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_longdouble_convert_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // x86_64 long double conversions are native
+        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 80, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+}
diff --git a/cc/arch/x86_64/mod.rs b/cc/arch/x86_64/mod.rs
index 5ae9eb98..4550db9a 100644
--- a/cc/arch/x86_64/mod.rs
+++ b/cc/arch/x86_64/mod.rs
@@ -16,6 +16,7 @@ mod features;
 mod float;
 pub mod lir;
 pub mod macros;
+pub(crate) mod mapping;
 pub mod regalloc;
 mod x87;
 
diff --git a/cc/ir/hwmap.rs b/cc/ir/hwmap.rs
deleted file mode 100644
index 576f766f..00000000
--- a/cc/ir/hwmap.rs
+++ /dev/null
@@ -1,2323 +0,0 @@
-//
-// Copyright (c) 2025-2026 Jeff Garzik
-//
-// This file is part of the posixutils-rs project covered under
-// the MIT License.  For the full license text, please see the LICENSE
-// file in the root directory of this project.
-// SPDX-License-Identifier: MIT
-//
-// Hardware Mapping Pass for pcc C99 compiler
-//
-// This pass runs after SSA construction and before optimization.
-// It centralizes decisions about how each IR instruction maps to hardware:
-//   - Legal: instruction is directly supported by the target
-//   - LibCall: instruction must be lowered to a runtime library call
-//   - Expand: instruction must be expanded into multiple simpler instructions
-//
-
-use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
-use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, Pseudo, PseudoId};
-use crate::rtlib::{Float16Abi, RtlibNames};
-use crate::target::{Arch, Os, Target};
-use crate::types::{TypeId, TypeKind, TypeTable};
-
-/// Action the hwmap pass should take for a given instruction.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum HwMapAction {
-    /// Instruction is directly supported by the target hardware.
-    Legal,
-    /// Instruction must be replaced with a call to a runtime library function.
-    LibCall(&'static str),
-    /// Long double comparison: call rtlib, then compare result against 0.
-    /// Contains (rtlib_func_name, int_compare_opcode).
-    CmpLibCall(&'static str, Opcode),
-    /// Expand into multiple instructions (promote-operate-truncate, etc.).
-    Expand,
-}
-
-/// Classify an int128 div/mod instruction into a LibCall action.
-fn map_int128_divmod(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
-    if insn.size != 128 {
-        return None;
-    }
-    let typ = insn.typ?;
-    if types.kind(typ) != TypeKind::Int128 {
-        return None;
-    }
-    match insn.op {
-        Opcode::DivS => Some(HwMapAction::LibCall("__divti3")),
-        Opcode::DivU => Some(HwMapAction::LibCall("__udivti3")),
-        Opcode::ModS => Some(HwMapAction::LibCall("__modti3")),
-        Opcode::ModU => Some(HwMapAction::LibCall("__umodti3")),
-        _ => None,
-    }
-}
-
-/// Get the rtlib suffix for a float type kind on the given target.
-fn float_suffix(kind: TypeKind, target: &Target) -> &'static str {
-    match kind {
-        TypeKind::Float => "sf",
-        TypeKind::Double => "df",
-        TypeKind::LongDouble => {
-            if target.arch == Arch::X86_64 {
-                "xf"
-            } else {
-                "tf"
-            }
-        }
-        _ => "",
-    }
-}
-
-/// Classify an int128↔float conversion instruction into a LibCall action.
-///
-/// Handles:
-///   - SCvtF/UCvtF with src_size==128: int128 → float (e.g. __floattisf)
-///   - FCvtS/FCvtU with size==128: float → int128 (e.g. __fixsfti)
-fn map_int128_float_convert(
-    insn: &Instruction,
-    types: &TypeTable,
-    target: &Target,
-) -> Option<HwMapAction> {
-    match insn.op {
-        // int128 → float
-        Opcode::SCvtF | Opcode::UCvtF => {
-            if insn.src_size != 128 {
-                return None;
-            }
-            let src_typ = insn.src_typ?;
-            if types.kind(src_typ) != TypeKind::Int128 {
-                return None;
-            }
-            let dst_typ = insn.typ?;
-            let dst_kind = types.kind(dst_typ);
-            let fsuf = float_suffix(dst_kind, target);
-            if fsuf.is_empty() {
-                return None;
-            }
-            let is_unsigned = insn.op == Opcode::UCvtF;
-            let func_name: &'static str = match (is_unsigned, fsuf) {
-                (false, "sf") => "__floattisf",
-                (false, "df") => "__floattidf",
-                (false, "xf") => "__floattixf",
-                (false, "tf") => "__floattitf",
-                (true, "sf") => "__floatuntisf",
-                (true, "df") => "__floatuntidf",
-                (true, "xf") => "__floatuntixf",
-                (true, "tf") => "__floatuntitf",
-                _ => return None,
-            };
-            Some(HwMapAction::LibCall(func_name))
-        }
-        // float → int128
-        Opcode::FCvtS | Opcode::FCvtU => {
-            if insn.size != 128 {
-                return None;
-            }
-            let dst_typ = insn.typ?;
-            if types.kind(dst_typ) != TypeKind::Int128 {
-                return None;
-            }
-            let src_typ = insn.src_typ?;
-            let src_kind = types.kind(src_typ);
-            let fsuf = float_suffix(src_kind, target);
-            if fsuf.is_empty() {
-                return None;
-            }
-            let is_unsigned = insn.op == Opcode::FCvtU;
-            let func_name: &'static str = match (is_unsigned, fsuf) {
-                (false, "sf") => "__fixsfti",
-                (false, "df") => "__fixdfti",
-                (false, "xf") => "__fixxfti",
-                (false, "tf") => "__fixtfti",
-                (true, "sf") => "__fixunssfti",
-                (true, "df") => "__fixunsdfti",
-                (true, "xf") => "__fixunsxfti",
-                (true, "tf") => "__fixunstfti",
-                _ => return None,
-            };
-            Some(HwMapAction::LibCall(func_name))
-        }
-        _ => None,
-    }
-}
-
-/// Check if long double needs soft-float rtlib on this target.
-/// Returns true only for aarch64/Linux (128-bit IEEE quad).
-/// x86_64 uses native x87; macOS aarch64 long double == double.
-fn longdouble_needs_rtlib(target: &Target) -> bool {
-    target.arch == Arch::Aarch64 && target.os != Os::MacOS
-}
-
-/// Classify a long double binary op (FAdd/FSub/FMul/FDiv) into a LibCall.
-fn map_longdouble_binop(
-    insn: &Instruction,
-    types: &TypeTable,
-    target: &Target,
-) -> Option<HwMapAction> {
-    if !longdouble_needs_rtlib(target) {
-        return None;
-    }
-    let typ = insn.typ?;
-    if types.kind(typ) != TypeKind::LongDouble {
-        return None;
-    }
-    match insn.op {
-        Opcode::FAdd => Some(HwMapAction::LibCall("__addtf3")),
-        Opcode::FSub => Some(HwMapAction::LibCall("__subtf3")),
-        Opcode::FMul => Some(HwMapAction::LibCall("__multf3")),
-        Opcode::FDiv => Some(HwMapAction::LibCall("__divtf3")),
-        _ => None,
-    }
-}
-
-/// Classify a long double negation (FNeg) into a LibCall.
-fn map_longdouble_neg(
-    insn: &Instruction,
-    types: &TypeTable,
-    target: &Target,
-) -> Option<HwMapAction> {
-    if !longdouble_needs_rtlib(target) {
-        return None;
-    }
-    if insn.op != Opcode::FNeg {
-        return None;
-    }
-    let typ = insn.typ?;
-    if types.kind(typ) != TypeKind::LongDouble {
-        return None;
-    }
-    Some(HwMapAction::LibCall("__negtf2"))
-}
-
-/// Classify a long double comparison (FCmpO*) into a CmpLibCall.
-/// The rtlib cmp function returns int; caller must compare vs 0.
-fn map_longdouble_cmp(
-    insn: &Instruction,
-    types: &TypeTable,
-    target: &Target,
-) -> Option<HwMapAction> {
-    if !longdouble_needs_rtlib(target) {
-        return None;
-    }
-    // FCmpO* instructions don't store the operand type in insn.typ (that's the
-    // result type, which is int). Check src_typ or fall back to size check.
-    // The comparison has size == size of the operands being compared.
-    // For long double on aarch64/Linux, size == 128.
-    if insn.size != 128 {
-        return None;
-    }
-    // Also check src_typ if available
-    if let Some(src_typ) = insn.src_typ {
-        if types.kind(src_typ) != TypeKind::LongDouble {
-            return None;
-        }
-    }
-    match insn.op {
-        Opcode::FCmpOLt => Some(HwMapAction::CmpLibCall("__lttf2", Opcode::SetLt)),
-        Opcode::FCmpOLe => Some(HwMapAction::CmpLibCall("__letf2", Opcode::SetLe)),
-        Opcode::FCmpOGt => Some(HwMapAction::CmpLibCall("__gttf2", Opcode::SetGt)),
-        Opcode::FCmpOGe => Some(HwMapAction::CmpLibCall("__getf2", Opcode::SetGe)),
-        Opcode::FCmpOEq => Some(HwMapAction::CmpLibCall("__eqtf2", Opcode::SetEq)),
-        Opcode::FCmpONe => Some(HwMapAction::CmpLibCall("__netf2", Opcode::SetNe)),
-        _ => None,
-    }
-}
-
-/// Get the integer suffix for a long double↔int conversion.
-fn int_suffix_for_longdouble(types: &TypeTable, int_type: TypeId) -> &'static str {
-    let size = types.size_bits(int_type);
-    let is_unsigned = types.is_unsigned(int_type);
-    match (is_unsigned, size <= 32) {
-        (true, true) => "usi",
-        (true, false) => "udi",
-        (false, true) => "si",
-        (false, false) => "di",
-    }
-}
-
-/// Classify a long double conversion into a LibCall.
-fn map_longdouble_convert(
-    insn: &Instruction,
-    types: &TypeTable,
-    target: &Target,
-) -> Option<HwMapAction> {
-    if !longdouble_needs_rtlib(target) {
-        return None;
-    }
-    match insn.op {
-        // Float-to-float: longdouble ↔ float/double
-        Opcode::FCvtF => {
-            let dst_typ = insn.typ?;
-            let src_typ = insn.src_typ?;
-            let dst_kind = types.kind(dst_typ);
-            let src_kind = types.kind(src_typ);
-            if src_kind == TypeKind::LongDouble {
-                // longdouble → float/double
-                match dst_kind {
-                    TypeKind::Float => Some(HwMapAction::LibCall("__trunctfsf2")),
-                    TypeKind::Double => Some(HwMapAction::LibCall("__trunctfdf2")),
-                    _ => None,
-                }
-            } else if dst_kind == TypeKind::LongDouble {
-                // float/double → longdouble
-                match src_kind {
-                    TypeKind::Float => Some(HwMapAction::LibCall("__extendsftf2")),
-                    TypeKind::Double => Some(HwMapAction::LibCall("__extenddftf2")),
-                    _ => None,
-                }
-            } else {
-                None
-            }
-        }
-        // Int-to-float: int → longdouble
-        Opcode::SCvtF | Opcode::UCvtF => {
-            let dst_typ = insn.typ?;
-            if types.kind(dst_typ) != TypeKind::LongDouble {
-                return None;
-            }
-            let src_typ = insn.src_typ?;
-            // Skip int128 (handled by map_int128_float_convert)
-            if types.kind(src_typ) == TypeKind::Int128 {
-                return None;
-            }
-            let isuf = int_suffix_for_longdouble(types, src_typ);
-            let func_name: &'static str = match isuf {
-                "si" => "__floatsitf",
-                "di" => "__floatditf",
-                "usi" => "__floatunsitf",
-                "udi" => "__floatunditf",
-                _ => return None,
-            };
-            Some(HwMapAction::LibCall(func_name))
-        }
-        // Float-to-int: longdouble → int
-        Opcode::FCvtS | Opcode::FCvtU => {
-            let src_typ = insn.src_typ?;
-            if types.kind(src_typ) != TypeKind::LongDouble {
-                return None;
-            }
-            let dst_typ = insn.typ?;
-            // Skip int128 (handled by map_int128_float_convert)
-            if types.kind(dst_typ) == TypeKind::Int128 {
-                return None;
-            }
-            let isuf = int_suffix_for_longdouble(types, dst_typ);
-            let func_name: &'static str = match isuf {
-                "si" => "__fixtfsi",
-                "di" => "__fixtfdi",
-                "usi" => "__fixunstfsi",
-                "udi" => "__fixunstfdi",
-                _ => return None,
-            };
-            Some(HwMapAction::LibCall(func_name))
-        }
-        _ => None,
-    }
-}
-
-/// Classify an int128 operation that needs inline expansion.
-fn map_int128_expand(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
-    if insn.size != 128 {
-        return None;
-    }
-
-    match insn.op {
-        // Arithmetic/bitwise/unary: result type is int128
-        Opcode::And
-        | Opcode::Or
-        | Opcode::Xor
-        | Opcode::Neg
-        | Opcode::Not
-        | Opcode::Add
-        | Opcode::Sub
-        | Opcode::Mul => {
-            let typ = insn.typ?;
-            if types.kind(typ) != TypeKind::Int128 {
-                return None;
-            }
-            Some(HwMapAction::Expand)
-        }
-        // Zext/Sext to 128: result type is int128
-        Opcode::Zext | Opcode::Sext => {
-            let typ = insn.typ?;
-            if types.kind(typ) != TypeKind::Int128 {
-                return None;
-            }
-            Some(HwMapAction::Expand)
-        }
-        // Comparisons: size==128 is the operand size, result is int/bool.
-        // Verify via src_typ that operands are actually int128 (not long double).
-        Opcode::SetEq
-        | Opcode::SetNe
-        | Opcode::SetLt
-        | Opcode::SetLe
-        | Opcode::SetGt
-        | Opcode::SetGe
-        | Opcode::SetB
-        | Opcode::SetBe
-        | Opcode::SetA
-        | Opcode::SetAe => {
-            // insn.typ on comparisons is the operand type (not result type).
-            // Check it's actually Int128, not some other 128-bit type.
-            let typ = insn.typ?;
-            if types.kind(typ) != TypeKind::Int128 {
-                return None;
-            }
-            Some(HwMapAction::Expand)
-        }
-        _ => None,
-    }
-}
-
-/// Common hardware mapping logic shared by all targets.
-fn map_common(insn: &Instruction, types: &TypeTable, target: &Target) -> Option<HwMapAction> {
-    if let Some(action) = map_int128_divmod(insn, types) {
-        return Some(action);
-    }
-    if let Some(action) = map_int128_expand(insn, types) {
-        return Some(action);
-    }
-    if let Some(action) = map_int128_float_convert(insn, types, target) {
-        return Some(action);
-    }
-    if let Some(action) = map_longdouble_binop(insn, types, target) {
-        return Some(action);
-    }
-    if let Some(action) = map_longdouble_neg(insn, types, target) {
-        return Some(action);
-    }
-    if let Some(action) = map_longdouble_cmp(insn, types, target) {
-        return Some(action);
-    }
-    map_longdouble_convert(insn, types, target)
-}
-
-// ============================================================================
-// Complex number rtlib name selection
-// ============================================================================
-
-/// Get the rtlib function name for complex multiplication.
-/// Target-dependent for long double (x87 vs IEEE quad).
-pub fn complex_mul_name(base_kind: TypeKind, target: &Target) -> &'static str {
-    match base_kind {
-        TypeKind::Float => "__mulsc3",
-        TypeKind::Double => "__muldc3",
-        TypeKind::LongDouble => {
-            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
-                "__muldc3" // macOS aarch64: long double == double
-            } else {
-                match target.arch {
-                    Arch::X86_64 => "__mulxc3",
-                    Arch::Aarch64 => "__multc3",
-                }
-            }
-        }
-        _ => "__muldc3",
-    }
-}
-
-/// Get the rtlib function name for complex division.
-/// Target-dependent for long double (x87 vs IEEE quad).
-pub fn complex_div_name(base_kind: TypeKind, target: &Target) -> &'static str {
-    match base_kind {
-        TypeKind::Float => "__divsc3",
-        TypeKind::Double => "__divdc3",
-        TypeKind::LongDouble => {
-            if target.arch == Arch::Aarch64 && target.os == Os::MacOS {
-                "__divdc3"
-            } else {
-                match target.arch {
-                    Arch::X86_64 => "__divxc3",
-                    Arch::Aarch64 => "__divtc3",
-                }
-            }
-        }
-        _ => "__divdc3",
-    }
-}
-
-/// Trait for target-specific hardware mapping decisions.
-pub trait TargetHwMap {
-    /// Determine how the target handles a given instruction.
-    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction;
-}
-
-/// Classify a Float16 instruction that needs soft-float expansion on x86-64.
-/// Returns Expand for arithmetic/neg (promote-operate-truncate) and
-/// comparisons (promote-compare). Returns LibCall for conversions.
-fn map_float16_softfloat(insn: &Instruction, types: &TypeTable) -> Option<HwMapAction> {
-    match insn.op {
-        // Arithmetic: promote-operate-truncate
-        Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
-            let typ = insn.typ?;
-            if types.kind(typ) == TypeKind::Float16 {
-                return Some(HwMapAction::Expand);
-            }
-            None
-        }
-        // Negation: promote-negate-truncate
-        Opcode::FNeg => {
-            let typ = insn.typ?;
-            if types.kind(typ) == TypeKind::Float16 {
-                return Some(HwMapAction::Expand);
-            }
-            None
-        }
-        // Comparisons: promote both, compare (no truncate)
-        Opcode::FCmpOEq
-        | Opcode::FCmpONe
-        | Opcode::FCmpOLt
-        | Opcode::FCmpOLe
-        | Opcode::FCmpOGt
-        | Opcode::FCmpOGe => {
-            if let Some(src_typ) = insn.src_typ {
-                if types.kind(src_typ) == TypeKind::Float16 {
-                    return Some(HwMapAction::Expand);
-                }
-            }
-            // Fallback: check operand size (Float16 = 16 bits)
-            if insn.size == 16 {
-                return Some(HwMapAction::Expand);
-            }
-            None
-        }
-        // Float16 conversions via rtlib
-        Opcode::FCvtF => {
-            let dst_typ = insn.typ?;
-            let src_typ = insn.src_typ?;
-            let dst_kind = types.kind(dst_typ);
-            let src_kind = types.kind(src_typ);
-            if src_kind == TypeKind::Float16 || dst_kind == TypeKind::Float16 {
-                // These are handled by the linearizer's Float16 conversion code
-                // which already emits the correct calls. No hwmap action needed
-                // because the linearizer emits Call instructions directly.
-            }
-            None
-        }
-        _ => None,
-    }
-}
-
-/// x86-64 hardware mapping.
-pub struct X86_64HwMap {
-    target: Target,
-}
-
-impl TargetHwMap for X86_64HwMap {
-    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction {
-        if let Some(action) = map_common(insn, types, &self.target) {
-            return action;
-        }
-        if let Some(action) = map_float16_softfloat(insn, types) {
-            return action;
-        }
-        HwMapAction::Legal
-    }
-}
-
-/// AArch64 hardware mapping.
-pub struct Aarch64HwMap {
-    target: Target,
-}
-
-impl TargetHwMap for Aarch64HwMap {
-    fn map_op(&self, insn: &Instruction, types: &TypeTable) -> HwMapAction {
-        if let Some(action) = map_common(insn, types, &self.target) {
-            return action;
-        }
-        HwMapAction::Legal
-    }
-}
-
-/// Get the appropriate TargetHwMap implementation for the given target.
-fn get_target_hwmap(target: &Target) -> Box<dyn TargetHwMap> {
-    match target.arch {
-        Arch::X86_64 => Box::new(X86_64HwMap {
-            target: target.clone(),
-        }),
-        Arch::Aarch64 => Box::new(Aarch64HwMap {
-            target: target.clone(),
-        }),
-    }
-}
-
-/// Build a runtime library call instruction replacing an IR instruction.
-///
-/// Creates a Call instruction with proper ABI classification using the
-/// C calling convention, mirroring the linearizer's `emit_rtlib_call`.
-fn build_rtlib_call(
-    insn: &Instruction,
-    func_name: &str,
-    arg_types: Vec<TypeId>,
-    ret_type: TypeId,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let target_pseudo = insn.target.expect("insn must have target");
-    let ret_size = types.size_bits(ret_type);
-
-    let arg_vals = insn.src.clone();
-
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let param_classes: Vec<_> = arg_types
-        .iter()
-        .map(|&t| abi.classify_param(t, types))
-        .collect();
-    let ret_class = abi.classify_return(ret_type, types);
-    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(target_pseudo),
-        func_name,
-        arg_vals,
-        arg_types,
-        ret_type,
-        ret_size,
-    );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = insn.pos;
-    call_insn
-}
-
-/// Parameters for building an explicit rtlib call.
-struct RtlibCallParams<'a> {
-    target_pseudo: PseudoId,
-    arg_vals: &'a [PseudoId],
-    func_name: &'a str,
-    arg_types: Vec<TypeId>,
-    ret_type: TypeId,
-    pos: Option<crate::diag::Position>,
-}
-
-/// Build a rtlib call with explicit parameters.
-/// Used for expansion patterns where the call target differs from
-/// the original instruction's target.
-fn build_rtlib_call_explicit(
-    params: RtlibCallParams<'_>,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let ret_size = types.size_bits(params.ret_type);
-
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let param_classes: Vec<_> = params
-        .arg_types
-        .iter()
-        .map(|&t| abi.classify_param(t, types))
-        .collect();
-    let ret_class = abi.classify_return(params.ret_type, types);
-    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(params.target_pseudo),
-        params.func_name,
-        params.arg_vals.to_vec(),
-        params.arg_types,
-        params.ret_type,
-        ret_size,
-    );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = params.pos;
-    call_insn
-}
-
-/// Build a rtlib call for a binop (both args same type as result).
-fn build_binop_rtlib_call(
-    insn: &Instruction,
-    func_name: &str,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let ret_type = insn.typ.expect("binop must have type");
-    let arg_types = vec![ret_type; insn.src.len()];
-    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
-}
-
-/// Build a rtlib call for a conversion (single arg, different src/dst types).
-fn build_convert_rtlib_call(
-    insn: &Instruction,
-    func_name: &str,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let ret_type = insn.typ.expect("conversion must have type");
-    let src_type = insn.src_typ.expect("conversion must have src_typ");
-    let arg_types = vec![src_type];
-    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
-}
-
-/// Build a call to __extendhfsf2 (Float16 → float) with proper ABI.
-fn build_f16_extend_call(
-    target_pseudo: PseudoId,
-    src: PseudoId,
-    pos: Option<crate::diag::Position>,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let rtlib = RtlibNames::new(target);
-    let f16_abi = rtlib.float16_abi();
-    let float_type = types.float_id;
-    let float_size = types.size_bits(float_type);
-
-    // Arg type: ushort for compiler-rt, Float16 for libgcc
-    let arg_type = if f16_abi == Float16Abi::Integer {
-        types.ushort_id
-    } else {
-        types.float16_id
-    };
-
-    // Arg classification
-    let param_class = if f16_abi == Float16Abi::Integer {
-        ArgClass::Extend {
-            signed: false,
-            size_bits: 16,
-        }
-    } else {
-        let abi = get_abi_for_conv(CallingConv::C, target);
-        abi.classify_param(types.float16_id, types)
-    };
-
-    // Return is always SSE float
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let ret_class = abi.classify_return(float_type, types);
-
-    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(target_pseudo),
-        "__extendhfsf2",
-        vec![src],
-        vec![arg_type],
-        float_type,
-        float_size,
-    );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = pos;
-    call_insn
-}
-
-/// Build a call to __truncsfhf2 (float → Float16) with proper ABI.
-fn build_f16_truncate_call(
-    target_pseudo: PseudoId,
-    src: PseudoId,
-    pos: Option<crate::diag::Position>,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let rtlib = RtlibNames::new(target);
-    let f16_abi = rtlib.float16_abi();
-    let float_type = types.float_id;
-    let float16_type = types.float16_id;
-    let f16_size = types.size_bits(float16_type);
-
-    // Arg is always SSE float
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let param_class = abi.classify_param(float_type, types);
-
-    // Return: ushort for compiler-rt, Float16/SSE for libgcc
-    let ret_class = if f16_abi == Float16Abi::Integer {
-        ArgClass::Extend {
-            signed: false,
-            size_bits: 16,
-        }
-    } else {
-        abi.classify_return(float16_type, types)
-    };
-
-    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(target_pseudo),
-        "__truncsfhf2",
-        vec![src],
-        vec![float_type],
-        float16_type,
-        f16_size,
-    );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = pos;
-    call_insn
-}
-
-/// Dispatch Expand actions to the appropriate expansion function.
-fn expand_insn(
-    insn: &Instruction,
-    func: &mut Function,
-    new_insns: &mut Vec<Instruction>,
-    types: &TypeTable,
-    target: &Target,
-) {
-    // Int128 expansion: typ is Int128 for arith/bitwise, or operand type for comparisons
-    if insn.size == 128 {
-        if let Some(typ) = insn.typ {
-            if types.kind(typ) == TypeKind::Int128 {
-                expand_int128(insn, func, new_insns, types);
-                return;
-            }
-        }
-    }
-
-    // Float16 soft-float expansion (x86-64 only)
-    if matches!(
-        insn.op,
-        Opcode::FAdd
-            | Opcode::FSub
-            | Opcode::FMul
-            | Opcode::FDiv
-            | Opcode::FNeg
-            | Opcode::FCmpOEq
-            | Opcode::FCmpONe
-            | Opcode::FCmpOLt
-            | Opcode::FCmpOLe
-            | Opcode::FCmpOGt
-            | Opcode::FCmpOGe
-    ) {
-        expand_float16(insn, func, new_insns, types, target);
-        return;
-    }
-
-    panic!(
-        "expand_insn: unhandled Expand for {} (size={}) in function {}",
-        insn.op, insn.size, func.name
-    );
-}
-
-/// Allocate a new 64-bit register pseudo.
-fn alloc_reg64(func: &mut Function) -> PseudoId {
-    let id = func.alloc_pseudo();
-    func.add_pseudo(Pseudo::reg(id, id.0));
-    id
-}
-
-/// Extract lo and hi 64-bit halves from a 128-bit pseudo.
-fn extract_halves(
-    func: &mut Function,
-    new_insns: &mut Vec<Instruction>,
-    src: PseudoId,
-    long_type: TypeId,
-) -> (PseudoId, PseudoId) {
-    let lo = alloc_reg64(func);
-    new_insns.push(Instruction::unop(Opcode::Lo64, lo, src, long_type, 64));
-    let hi = alloc_reg64(func);
-    new_insns.push(Instruction::unop(Opcode::Hi64, hi, src, long_type, 64));
-    (lo, hi)
-}
-
-/// Expand an int128 instruction into 64-bit operations using Lo64/Hi64/Pair64.
-fn expand_int128(
-    insn: &Instruction,
-    func: &mut Function,
-    new_insns: &mut Vec<Instruction>,
-    types: &TypeTable,
-) {
-    let result = insn.target.expect("int128 op must have target");
-    let long_type = types.ulong_id;
-
-    match insn.op {
-        // Bitwise: independent 64-bit ops on lo/hi halves
-        Opcode::And | Opcode::Or | Opcode::Xor => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let r_lo = alloc_reg64(func);
-            new_insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
-            let r_hi = alloc_reg64(func);
-            new_insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                r_lo,
-                r_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Not: decompose, not each half
-        Opcode::Not => {
-            let (s_lo, s_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-
-            let r_lo = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
-            let r_hi = alloc_reg64(func);
-            new_insns.push(Instruction::unop(Opcode::Not, r_hi, s_hi, long_type, 64));
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                r_lo,
-                r_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Neg: 0 - value with borrow chain
-        Opcode::Neg => {
-            let (s_lo, s_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let zero = func.create_const_pseudo(0);
-
-            let r_lo = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::SubC,
-                r_lo,
-                zero,
-                s_lo,
-                long_type,
-                64,
-            ));
-            let r_hi = alloc_reg64(func);
-            let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
-            sbc.src.push(r_lo);
-            new_insns.push(sbc);
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                r_lo,
-                r_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Add: carry chain
-        Opcode::Add => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let r_lo = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::AddC,
-                r_lo,
-                a_lo,
-                b_lo,
-                long_type,
-                64,
-            ));
-            let r_hi = alloc_reg64(func);
-            let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
-            adc.src.push(r_lo);
-            new_insns.push(adc);
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                r_lo,
-                r_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Sub: borrow chain
-        Opcode::Sub => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let r_lo = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::SubC,
-                r_lo,
-                a_lo,
-                b_lo,
-                long_type,
-                64,
-            ));
-            let r_hi = alloc_reg64(func);
-            let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
-            sbc.src.push(r_lo);
-            new_insns.push(sbc);
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                r_lo,
-                r_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Mul: cross-product decomposition
-        Opcode::Mul => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let low_result = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Mul,
-                low_result,
-                a_lo,
-                b_lo,
-                long_type,
-                64,
-            ));
-
-            let high_part = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::UMulHi,
-                high_part,
-                a_lo,
-                b_lo,
-                long_type,
-                64,
-            ));
-
-            let cross1 = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Mul,
-                cross1,
-                a_lo,
-                b_hi,
-                long_type,
-                64,
-            ));
-
-            let cross2 = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Mul,
-                cross2,
-                a_hi,
-                b_lo,
-                long_type,
-                64,
-            ));
-
-            let sum1 = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Add,
-                sum1,
-                high_part,
-                cross1,
-                long_type,
-                64,
-            ));
-            let final_hi = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Add,
-                final_hi,
-                sum1,
-                cross2,
-                long_type,
-                64,
-            ));
-
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                low_result,
-                final_hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Eq/Ne: xor+or reduction, then 64-bit compare against 0
-        Opcode::SetEq | Opcode::SetNe => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let xor_lo = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Xor,
-                xor_lo,
-                a_lo,
-                b_lo,
-                long_type,
-                64,
-            ));
-            let xor_hi = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Xor,
-                xor_hi,
-                a_hi,
-                b_hi,
-                long_type,
-                64,
-            ));
-            let or_result = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Or,
-                or_result,
-                xor_lo,
-                xor_hi,
-                long_type,
-                64,
-            ));
-
-            let zero = func.create_const_pseudo(0);
-            new_insns.push(Instruction::binop(
-                insn.op, result, or_result, zero, long_type, 64,
-            ));
-        }
-
-        // Ordered comparisons: hi compare + Select(hi_eq, lo_cmp, hi_cmp)
-        Opcode::SetLt
-        | Opcode::SetLe
-        | Opcode::SetGt
-        | Opcode::SetGe
-        | Opcode::SetB
-        | Opcode::SetBe
-        | Opcode::SetA
-        | Opcode::SetAe => {
-            let (a_lo, a_hi) = extract_halves(func, new_insns, insn.src[0], long_type);
-            let (b_lo, b_hi) = extract_halves(func, new_insns, insn.src[1], long_type);
-
-            let hi_eq = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::SetEq,
-                hi_eq,
-                a_hi,
-                b_hi,
-                long_type,
-                64,
-            ));
-
-            let hi_cmp = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
-            ));
-
-            // Low halves always use unsigned compare
-            let lo_op = match insn.op {
-                Opcode::SetLt | Opcode::SetB => Opcode::SetB,
-                Opcode::SetLe | Opcode::SetBe => Opcode::SetBe,
-                Opcode::SetGt | Opcode::SetA => Opcode::SetA,
-                Opcode::SetGe | Opcode::SetAe => Opcode::SetAe,
-                _ => unreachable!(),
-            };
-            let lo_cmp = alloc_reg64(func);
-            new_insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
-
-            new_insns.push(Instruction::select(
-                result, hi_eq, lo_cmp, hi_cmp, long_type, 64,
-            ));
-        }
-
-        // Zext to 128: zero-extend src to 64-bit, Pair64(lo, 0)
-        Opcode::Zext => {
-            let src = insn.src[0];
-            let src_size = insn.src_size;
-
-            // Zero-extend src to 64-bit if needed
-            let lo = if src_size < 64 {
-                let ext = alloc_reg64(func);
-                let mut zext_insn = Instruction::unop(Opcode::Zext, ext, src, long_type, 64);
-                zext_insn.src_size = src_size;
-                new_insns.push(zext_insn);
-                ext
-            } else {
-                src
-            };
-
-            let zero = func.create_const_pseudo(0);
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                lo,
-                zero,
-                int128_type,
-                128,
-            ));
-        }
-
-        // Sext to 128: first sext src to 64-bit, then hi = Asr(lo, 63)
-        Opcode::Sext => {
-            let src = insn.src[0];
-            let src_size = insn.src_size;
-
-            // Sign-extend src to 64-bit if needed
-            let lo = if src_size < 64 {
-                let ext = alloc_reg64(func);
-                let mut sext_insn = Instruction::unop(Opcode::Sext, ext, src, long_type, 64);
-                sext_insn.src_size = src_size;
-                new_insns.push(sext_insn);
-                ext
-            } else {
-                src
-            };
-
-            let shift_amount = func.create_const_pseudo(63);
-            let hi = alloc_reg64(func);
-            new_insns.push(Instruction::binop(
-                Opcode::Asr,
-                hi,
-                lo,
-                shift_amount,
-                long_type,
-                64,
-            ));
-            let int128_type = insn.typ.unwrap();
-            new_insns.push(Instruction::binop(
-                Opcode::Pair64,
-                result,
-                lo,
-                hi,
-                int128_type,
-                128,
-            ));
-        }
-
-        _ => panic!("expand_int128: unexpected opcode {}", insn.op),
-    }
-}
-
-/// Expand a Float16 arithmetic/neg/cmp instruction using promote-operate-truncate.
-fn expand_float16(
-    insn: &Instruction,
-    func: &mut Function,
-    new_insns: &mut Vec<Instruction>,
-    types: &TypeTable,
-    target: &Target,
-) {
-    let float_type = types.float_id;
-    let float_size = types.size_bits(float_type);
-    let pos = insn.pos;
-
-    match insn.op {
-        // Binary arithmetic: extend both → float op → truncate
-        Opcode::FAdd | Opcode::FSub | Opcode::FMul | Opcode::FDiv => {
-            let result = insn.target.expect("binop must have target");
-            let left = insn.src[0];
-            let right = insn.src[1];
-
-            // Extend left to float
-            let left_ext = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(left_ext, left_ext.0));
-            new_insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
-
-            // Extend right to float
-            let right_ext = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(right_ext, right_ext.0));
-            new_insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
-
-            // Native float operation
-            let float_result = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(float_result, float_result.0));
-            new_insns.push(Instruction::binop(
-                insn.op,
-                float_result,
-                left_ext,
-                right_ext,
-                float_type,
-                float_size,
-            ));
-
-            // Truncate result back to Float16
-            new_insns.push(build_f16_truncate_call(
-                result,
-                float_result,
-                pos,
-                types,
-                target,
-            ));
-        }
-
-        // Negation: extend → negate → truncate
-        Opcode::FNeg => {
-            let result = insn.target.expect("unary must have target");
-            let src = insn.src[0];
-
-            let src_ext = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(src_ext, src_ext.0));
-            new_insns.push(build_f16_extend_call(src_ext, src, pos, types, target));
-
-            let neg_result = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(neg_result, neg_result.0));
-            new_insns.push(Instruction::unop(
-                Opcode::FNeg,
-                neg_result,
-                src_ext,
-                float_type,
-                float_size,
-            ));
-
-            new_insns.push(build_f16_truncate_call(
-                result, neg_result, pos, types, target,
-            ));
-        }
-
-        // Comparison: extend both → float compare (no truncate)
-        Opcode::FCmpOEq
-        | Opcode::FCmpONe
-        | Opcode::FCmpOLt
-        | Opcode::FCmpOLe
-        | Opcode::FCmpOGt
-        | Opcode::FCmpOGe => {
-            let result = insn.target.expect("cmp must have target");
-            let left = insn.src[0];
-            let right = insn.src[1];
-
-            let left_ext = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(left_ext, left_ext.0));
-            new_insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
-
-            let right_ext = func.alloc_pseudo();
-            func.add_pseudo(Pseudo::reg(right_ext, right_ext.0));
-            new_insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
-
-            // Float comparison — result type is int, keep original type/size
-            let mut cmp = Instruction::binop(
-                insn.op,
-                result,
-                left_ext,
-                right_ext,
-                insn.typ.unwrap_or(types.int_id),
-                float_size,
-            );
-            cmp.src_typ = Some(float_type);
-            new_insns.push(cmp);
-        }
-
-        _ => panic!("expand_float16: unexpected opcode {}", insn.op),
-    }
-}
-
-/// Run the hardware mapping pass on a single function.
-///
-/// Walks all instructions and transforms non-Legal ops:
-///   - LibCall: replace with a runtime library call instruction
-///   - Expand: replace with multiple simpler instructions
-pub fn hwmap_function(func: &mut Function, types: &TypeTable, target: &Target) {
-    let hwmap = get_target_hwmap(target);
-
-    for block_idx in 0..func.blocks.len() {
-        // Take the insns out of the block to avoid borrow conflicts
-        let old_insns = std::mem::take(&mut func.blocks[block_idx].insns);
-        let mut new_insns = Vec::with_capacity(old_insns.len());
-        let mut block_changed = false;
-
-        for insn in &old_insns {
-            match hwmap.map_op(insn, types) {
-                HwMapAction::Legal => {
-                    new_insns.push(insn.clone());
-                }
-                HwMapAction::LibCall(name) => {
-                    let call = match insn.op {
-                        Opcode::FCvtS
-                        | Opcode::FCvtU
-                        | Opcode::SCvtF
-                        | Opcode::UCvtF
-                        | Opcode::FCvtF => build_convert_rtlib_call(insn, name, types, target),
-                        Opcode::FNeg => {
-                            // Unary: single arg, same type as result
-                            build_binop_rtlib_call(insn, name, types, target)
-                        }
-                        _ => build_binop_rtlib_call(insn, name, types, target),
-                    };
-                    new_insns.push(call);
-                    block_changed = true;
-                }
-                HwMapAction::CmpLibCall(name, cmp_op) => {
-                    // Long double comparison expansion:
-                    // 1. Call rtlib cmp function (returns int)
-                    // 2. Compare result against 0
-                    let result_pseudo = insn.target.expect("cmp must have target");
-                    let int_type = types.int_id;
-                    let int_size = types.size_bits(int_type);
-                    let ld_type = types.longdouble_id;
-
-                    // Allocate pseudo for cmp call result
-                    let cmp_result = func.alloc_pseudo();
-                    func.add_pseudo(Pseudo::reg(cmp_result, cmp_result.0));
-
-                    // Allocate pseudo for zero constant
-                    let zero = func.create_const_pseudo(0);
-
-                    // Build the rtlib call: cmp_result = __lttf2(left, right)
-                    let arg_vals = insn.src.clone();
-                    let arg_types = vec![ld_type; arg_vals.len()];
-                    let call = build_rtlib_call_explicit(
-                        RtlibCallParams {
-                            target_pseudo: cmp_result,
-                            arg_vals: &arg_vals,
-                            func_name: name,
-                            arg_types,
-                            ret_type: int_type,
-                            pos: insn.pos,
-                        },
-                        types,
-                        target,
-                    );
-                    new_insns.push(call);
-
-                    // Build the int comparison: result = cmp_op(cmp_result, 0)
-                    new_insns.push(Instruction::binop(
-                        cmp_op,
-                        result_pseudo,
-                        cmp_result,
-                        zero,
-                        int_type,
-                        int_size,
-                    ));
-                    block_changed = true;
-                }
-                HwMapAction::Expand => {
-                    expand_insn(insn, func, &mut new_insns, types, target);
-                    block_changed = true;
-                }
-            }
-        }
-
-        if block_changed {
-            func.blocks[block_idx].insns = new_insns;
-        } else {
-            // Put the original insns back (no change)
-            func.blocks[block_idx].insns = old_insns;
-        }
-    }
-}
-
-/// Run the hardware mapping pass on an entire module.
-pub fn hwmap_module(module: &mut Module, types: &TypeTable, target: &Target) {
-    for func in &mut module.functions {
-        hwmap_function(func, types, target);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::ir::{BasicBlock, BasicBlockId, Instruction, Opcode, Pseudo, PseudoId};
-    use crate::target::{Arch, Os, Target};
-    use crate::types::TypeTable;
-
-    fn make_test_func(types: &TypeTable) -> Function {
-        let mut func = Function::new("test_hwmap", types.int_id);
-
-        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
-        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
-        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
-
-        let mut bb = BasicBlock::new(BasicBlockId(0));
-        bb.add_insn(Instruction::new(Opcode::Entry));
-
-        // Integer arithmetic
-        bb.add_insn(Instruction::binop(
-            Opcode::Add,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::binop(
-            Opcode::Sub,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::binop(
-            Opcode::Mul,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-
-        // Bitwise
-        bb.add_insn(Instruction::binop(
-            Opcode::And,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::binop(
-            Opcode::Or,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::binop(
-            Opcode::Xor,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-
-        // Comparisons
-        bb.add_insn(Instruction::binop(
-            Opcode::SetEq,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::binop(
-            Opcode::SetLt,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        ));
-
-        // Unary
-        bb.add_insn(Instruction::unop(
-            Opcode::Neg,
-            PseudoId(2),
-            PseudoId(0),
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::unop(
-            Opcode::Not,
-            PseudoId(2),
-            PseudoId(0),
-            types.int_id,
-            32,
-        ));
-
-        // Float ops
-        bb.add_insn(Instruction::binop(
-            Opcode::FAdd,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.double_id,
-            64,
-        ));
-        bb.add_insn(Instruction::unop(
-            Opcode::FNeg,
-            PseudoId(2),
-            PseudoId(0),
-            types.double_id,
-            64,
-        ));
-
-        // Conversions
-        let mut sext = Instruction::unop(Opcode::Sext, PseudoId(2), PseudoId(0), types.long_id, 64);
-        sext.src_size = 32;
-        bb.add_insn(sext);
-        let mut zext =
-            Instruction::unop(Opcode::Zext, PseudoId(2), PseudoId(0), types.ulong_id, 64);
-        zext.src_size = 32;
-        bb.add_insn(zext);
-
-        // Memory
-        bb.add_insn(Instruction::load(
-            PseudoId(2),
-            PseudoId(0),
-            0,
-            types.int_id,
-            32,
-        ));
-        bb.add_insn(Instruction::store(
-            PseudoId(1),
-            PseudoId(0),
-            0,
-            types.int_id,
-            32,
-        ));
-
-        // Terminator
-        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
-
-        func.add_block(bb);
-        func.entry = BasicBlockId(0);
-        func
-    }
-
-    #[test]
-    fn test_map_op_returns_legal_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let func = make_test_func(&types);
-        for block in &func.blocks {
-            for insn in &block.insns {
-                assert_eq!(
-                    hwmap.map_op(insn, &types),
-                    HwMapAction::Legal,
-                    "expected Legal for {} on x86_64",
-                    insn.op
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_map_op_returns_legal_aarch64() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        let func = make_test_func(&types);
-        for block in &func.blocks {
-            for insn in &block.insns {
-                assert_eq!(
-                    hwmap.map_op(insn, &types),
-                    HwMapAction::Legal,
-                    "expected Legal for {} on aarch64",
-                    insn.op
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_hwmap_function_all_legal() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let mut func = make_test_func(&types);
-
-        hwmap_function(&mut func, &types, &target);
-    }
-
-    #[test]
-    fn test_hwmap_function_aarch64() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let mut func = make_test_func(&types);
-
-        hwmap_function(&mut func, &types, &target);
-    }
-
-    #[test]
-    fn test_hwmap_module_empty() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let mut module = Module::new();
-
-        hwmap_module(&mut module, &types, &target);
-    }
-
-    #[test]
-    fn test_hwmap_module_with_functions() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-
-        let mut module = Module::new();
-        module.add_function(make_test_func(&types));
-        module.add_function(make_test_func(&types));
-
-        hwmap_module(&mut module, &types, &target);
-    }
-
-    #[test]
-    fn test_hwmap_idempotent() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let mut func = make_test_func(&types);
-
-        hwmap_function(&mut func, &types, &target);
-        hwmap_function(&mut func, &types, &target);
-    }
-
-    #[test]
-    fn test_hwmap_action_enum() {
-        assert_eq!(HwMapAction::Legal, HwMapAction::Legal);
-        assert_ne!(HwMapAction::Legal, HwMapAction::LibCall("__divti3"));
-        assert_eq!(
-            HwMapAction::LibCall("__divti3"),
-            HwMapAction::LibCall("__divti3")
-        );
-        assert_ne!(
-            HwMapAction::LibCall("__divti3"),
-            HwMapAction::LibCall("__modti3")
-        );
-    }
-
-    #[test]
-    fn test_hwmap_all_targets() {
-        let targets = vec![
-            Target::new(Arch::X86_64, Os::Linux),
-            Target::new(Arch::X86_64, Os::MacOS),
-            Target::new(Arch::X86_64, Os::FreeBSD),
-            Target::new(Arch::Aarch64, Os::Linux),
-            Target::new(Arch::Aarch64, Os::MacOS),
-        ];
-
-        for target in &targets {
-            let types = TypeTable::new(target);
-            let mut func = make_test_func(&types);
-            hwmap_function(&mut func, &types, target);
-        }
-    }
-
-    // ========================================================================
-    // Phase 2a: Int128 div/mod → LibCall tests
-    // ========================================================================
-
-    #[test]
-    fn test_int128_divs_libcall_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::DivS,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__divti3")
-        );
-    }
-
-    #[test]
-    fn test_int128_divu_libcall_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::DivU,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.uint128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__udivti3")
-        );
-    }
-
-    #[test]
-    fn test_int128_mods_libcall_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::ModS,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__modti3")
-        );
-    }
-
-    #[test]
-    fn test_int128_modu_libcall_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::ModU,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.uint128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__umodti3")
-        );
-    }
-
-    #[test]
-    fn test_int128_divmod_libcall_aarch64() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::DivS,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__divti3")
-        );
-
-        let insn = Instruction::binop(
-            Opcode::ModU,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.uint128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__umodti3")
-        );
-    }
-
-    #[test]
-    fn test_int32_div_stays_legal() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::DivS,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            32,
-        );
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-    }
-
-    #[test]
-    fn test_int128_add_expands() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::Add,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int128_id,
-            128,
-        );
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Expand);
-    }
-
-    #[test]
-    fn test_hwmap_transforms_int128_divmod() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-
-        let mut func = Function::new("test_divmod", types.int128_id);
-        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
-        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
-        func.add_pseudo(Pseudo::reg(PseudoId(2), 2));
-        func.next_pseudo = 3;
-
-        let mut bb = BasicBlock::new(BasicBlockId(0));
-        bb.add_insn(Instruction::new(Opcode::Entry));
-        bb.add_insn(Instruction::binop(
-            Opcode::DivS,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int128_id,
-            128,
-        ));
-        bb.add_insn(Instruction::ret(Some(PseudoId(2))));
-        func.add_block(bb);
-        func.entry = BasicBlockId(0);
-
-        hwmap_function(&mut func, &types, &target);
-
-        // After hwmap, the DivS should be replaced with a Call to __divti3
-        let block = &func.blocks[0];
-        assert_eq!(block.insns.len(), 3); // Entry, Call, Ret
-        assert_eq!(block.insns[1].op, Opcode::Call);
-        assert_eq!(block.insns[1].func_name.as_deref(), Some("__divti3"));
-        assert_eq!(block.insns[1].target, Some(PseudoId(2)));
-        assert_eq!(block.insns[1].src, vec![PseudoId(0), PseudoId(1)]);
-        assert!(block.insns[1].abi_info.is_some());
-    }
-
-    // ========================================================================
-    // Phase 2b: Int128↔float conversion → LibCall tests
-    // ========================================================================
-
-    /// Helper to create a conversion instruction.
-    fn make_convert_insn(
-        op: Opcode,
-        dst_type: TypeId,
-        dst_size: u32,
-        src_type: TypeId,
-        src_size: u32,
-    ) -> Instruction {
-        let mut insn = Instruction::new(op)
-            .with_target(PseudoId(2))
-            .with_src(PseudoId(0))
-            .with_type_and_size(dst_type, dst_size);
-        insn.src_size = src_size;
-        insn.src_typ = Some(src_type);
-        insn
-    }
-
-    #[test]
-    fn test_int128_to_float_libcall() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // signed int128 → float
-        let insn = make_convert_insn(Opcode::SCvtF, types.float_id, 32, types.int128_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floattisf")
-        );
-
-        // signed int128 → double
-        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int128_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floattidf")
-        );
-
-        // unsigned int128 → float
-        let insn = make_convert_insn(Opcode::UCvtF, types.float_id, 32, types.uint128_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floatuntisf")
-        );
-
-        // unsigned int128 → double
-        let insn = make_convert_insn(Opcode::UCvtF, types.double_id, 64, types.uint128_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floatuntidf")
-        );
-    }
-
-    #[test]
-    fn test_float_to_int128_libcall() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // float → signed int128
-        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.float_id, 32);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixsfti")
-        );
-
-        // double → signed int128
-        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.double_id, 64);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixdfti")
-        );
-
-        // float → unsigned int128
-        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.float_id, 32);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixunssfti")
-        );
-
-        // double → unsigned int128
-        let insn = make_convert_insn(Opcode::FCvtU, types.uint128_id, 128, types.double_id, 64);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixunsdfti")
-        );
-    }
-
-    #[test]
-    fn test_int128_longdouble_x86_64() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // x86-64 long double uses "xf" suffix
-        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 80, types.int128_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floattixf")
-        );
-
-        let insn = make_convert_insn(Opcode::FCvtS, types.int128_id, 128, types.longdouble_id, 80);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixxfti")
-        );
-    }
-
-    #[test]
-    fn test_int128_longdouble_aarch64() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        // aarch64 long double uses "tf" suffix
-        let insn = make_convert_insn(
-            Opcode::SCvtF,
-            types.longdouble_id,
-            128,
-            types.int128_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floattitf")
-        );
-    }
-
-    #[test]
-    fn test_non_int128_conversion_stays_legal() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // int32 → double should remain Legal
-        let insn = make_convert_insn(Opcode::SCvtF, types.double_id, 64, types.int_id, 32);
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-
-        // double → int32 should remain Legal
-        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.double_id, 64);
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-    }
-
-    #[test]
-    fn test_hwmap_transforms_int128_conversion() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-
-        let mut func = Function::new("test_convert", types.double_id);
-        func.add_pseudo(Pseudo::reg(PseudoId(0), 0));
-        func.add_pseudo(Pseudo::reg(PseudoId(1), 1));
-        func.next_pseudo = 2;
-
-        let mut bb = BasicBlock::new(BasicBlockId(0));
-        bb.add_insn(Instruction::new(Opcode::Entry));
-        bb.add_insn(make_convert_insn(
-            Opcode::SCvtF,
-            types.double_id,
-            64,
-            types.int128_id,
-            128,
-        ));
-        bb.add_insn(Instruction::ret(Some(PseudoId(1))));
-        func.add_block(bb);
-        func.entry = BasicBlockId(0);
-
-        hwmap_function(&mut func, &types, &target);
-
-        let block = &func.blocks[0];
-        assert_eq!(block.insns.len(), 3);
-        assert_eq!(block.insns[1].op, Opcode::Call);
-        assert_eq!(block.insns[1].func_name.as_deref(), Some("__floattidf"));
-        assert!(block.insns[1].abi_info.is_some());
-    }
-
-    // ========================================================================
-    // Phase 2c: Long double → LibCall/CmpLibCall tests (aarch64/Linux only)
-    // ========================================================================
-
-    #[test]
-    fn test_longdouble_binop_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::binop(
-            Opcode::FAdd,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__addtf3")
-        );
-
-        let insn = Instruction::binop(
-            Opcode::FSub,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__subtf3")
-        );
-
-        let insn = Instruction::binop(
-            Opcode::FMul,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__multf3")
-        );
-
-        let insn = Instruction::binop(
-            Opcode::FDiv,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__divtf3")
-        );
-    }
-
-    #[test]
-    fn test_longdouble_binop_x86_64_legal() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // x86_64 long double (x87) is native — should be Legal
-        let insn = Instruction::binop(
-            Opcode::FAdd,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            80,
-        );
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-    }
-
-    #[test]
-    fn test_longdouble_binop_macos_legal() {
-        let target = Target::new(Arch::Aarch64, Os::MacOS);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        // macOS aarch64: long double == double, native
-        let insn = Instruction::binop(
-            Opcode::FAdd,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.longdouble_id,
-            64,
-        );
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-    }
-
-    #[test]
-    fn test_longdouble_neg_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        let insn = Instruction::unop(
-            Opcode::FNeg,
-            PseudoId(2),
-            PseudoId(0),
-            types.longdouble_id,
-            128,
-        );
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__negtf2")
-        );
-    }
-
-    #[test]
-    fn test_longdouble_cmp_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        let mut insn = Instruction::binop(
-            Opcode::FCmpOLt,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            128,
-        );
-        insn.src_typ = Some(types.longdouble_id);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::CmpLibCall("__lttf2", Opcode::SetLt)
-        );
-
-        let mut insn = Instruction::binop(
-            Opcode::FCmpOEq,
-            PseudoId(2),
-            PseudoId(0),
-            PseudoId(1),
-            types.int_id,
-            128,
-        );
-        insn.src_typ = Some(types.longdouble_id);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::CmpLibCall("__eqtf2", Opcode::SetEq)
-        );
-    }
-
-    #[test]
-    fn test_longdouble_convert_aarch64_linux() {
-        let target = Target::new(Arch::Aarch64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = Aarch64HwMap {
-            target: target.clone(),
-        };
-
-        // float → longdouble
-        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 128, types.float_id, 32);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__extendsftf2")
-        );
-
-        // longdouble → double
-        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.longdouble_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__trunctfdf2")
-        );
-
-        // int32 → longdouble
-        let insn = make_convert_insn(Opcode::SCvtF, types.longdouble_id, 128, types.int_id, 32);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__floatsitf")
-        );
-
-        // longdouble → int64
-        let insn = make_convert_insn(Opcode::FCvtS, types.long_id, 64, types.longdouble_id, 128);
-        assert_eq!(
-            hwmap.map_op(&insn, &types),
-            HwMapAction::LibCall("__fixtfdi")
-        );
-    }
-
-    #[test]
-    fn test_longdouble_convert_x86_64_legal() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        let types = TypeTable::new(&target);
-        let hwmap = X86_64HwMap {
-            target: target.clone(),
-        };
-
-        // x86_64 long double conversions are native
-        let insn = make_convert_insn(Opcode::FCvtF, types.longdouble_id, 80, types.float_id, 32);
-        assert_eq!(hwmap.map_op(&insn, &types), HwMapAction::Legal);
-    }
-
-    // ========================================================================
-    // Phase 2e: Complex mul/div rtlib name tests
-    // ========================================================================
-
-    #[test]
-    fn test_complex_mul_name_float() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        assert_eq!(complex_mul_name(TypeKind::Float, &target), "__mulsc3");
-    }
-
-    #[test]
-    fn test_complex_mul_name_double() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        assert_eq!(complex_mul_name(TypeKind::Double, &target), "__muldc3");
-    }
-
-    #[test]
-    fn test_complex_mul_name_longdouble() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        assert_eq!(complex_mul_name(TypeKind::LongDouble, &x86), "__mulxc3");
-
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        assert_eq!(
-            complex_mul_name(TypeKind::LongDouble, &arm_linux),
-            "__multc3"
-        );
-
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-        assert_eq!(
-            complex_mul_name(TypeKind::LongDouble, &arm_macos),
-            "__muldc3"
-        );
-    }
-
-    #[test]
-    fn test_complex_div_name_float() {
-        let target = Target::new(Arch::X86_64, Os::Linux);
-        assert_eq!(complex_div_name(TypeKind::Float, &target), "__divsc3");
-    }
-
-    #[test]
-    fn test_complex_div_name_longdouble() {
-        let x86 = Target::new(Arch::X86_64, Os::Linux);
-        assert_eq!(complex_div_name(TypeKind::LongDouble, &x86), "__divxc3");
-
-        let arm_linux = Target::new(Arch::Aarch64, Os::Linux);
-        assert_eq!(
-            complex_div_name(TypeKind::LongDouble, &arm_linux),
-            "__divtc3"
-        );
-
-        let arm_macos = Target::new(Arch::Aarch64, Os::MacOS);
-        assert_eq!(
-            complex_div_name(TypeKind::LongDouble, &arm_macos),
-            "__divdc3"
-        );
-    }
-}
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index 663e6dbe..cdf250da 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -7914,7 +7914,7 @@ impl<'a> Linearizer<'a> {
             BinaryOp::Mul => {
                 // Complex multiply via rtlib call (__mulsc3, __muldc3, etc.)
                 let base_kind = self.types.kind(base_typ);
-                let func_name = crate::ir::hwmap::complex_mul_name(base_kind, self.target);
+                let func_name = crate::arch::mapping::complex_mul_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
@@ -7938,7 +7938,7 @@ impl<'a> Linearizer<'a> {
             BinaryOp::Div => {
                 // Complex divide via rtlib call (__divsc3, __divdc3, etc.)
                 let base_kind = self.types.kind(base_typ);
-                let func_name = crate::ir::hwmap::complex_div_name(base_kind, self.target);
+                let func_name = crate::arch::mapping::complex_div_name(base_kind, self.target);
                 let call_result = self.emit_complex_rtlib_call(
                     func_name,
                     (left_real, left_imag),
diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 3bf63713..0fb0f0ff 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -16,7 +16,6 @@
 
 pub mod dce;
 pub mod dominate;
-pub mod hwmap;
 pub mod inline;
 pub mod instcombine;
 pub mod linearize;
@@ -233,7 +232,7 @@ pub enum Opcode {
     AtomicFetchXor, // Atomic fetch-and-xor
     Fence,          // Memory fence
 
-    // Int128 decomposition ops (used by hwmap expansion)
+    // Int128 decomposition ops (used by mapping pass expansion)
     Lo64,   // Extract low 64 bits from 128-bit pseudo
     Hi64,   // Extract high 64 bits from 128-bit pseudo
     Pair64, // Combine two 64-bit pseudos into 128-bit: target = (src[0]=lo, src[1]=hi)
diff --git a/cc/main.rs b/cc/main.rs
index ff6a96c5..a89237da 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -92,10 +92,10 @@ struct Args {
     dump_ast: bool,
 
     /// Dump IR at a named stage (for debugging)
-    /// Stages: post-linearize, post-hwmap, post-opt, post-lower, all
+    /// Stages: post-linearize, post-mapping, post-opt, post-lower, all
     /// Bare --dump-ir = post-opt (backward compat)
     #[arg(long = "dump-ir", value_name = "stage", default_missing_value = "post-opt",
-          num_args = 0..=1, help = gettext("Dump IR at stage (post-linearize, post-hwmap, post-opt, post-lower, all)"))]
+          num_args = 0..=1, help = gettext("Dump IR at stage (post-linearize, post-mapping, post-opt, post-lower, all)"))]
     dump_ir: Option<String>,
 
     /// Filter IR dumps to a specific function name
@@ -222,7 +222,7 @@ struct Args {
 /// Valid stage names for --dump-ir.
 const DUMP_IR_STAGES: &[&str] = &[
     "post-linearize",
-    "post-hwmap",
+    "post-mapping",
     "post-opt",
     "post-lower",
     "all",
@@ -498,9 +498,9 @@ fn process_file(
     dump_ir(args, &module, "post-linearize");
 
     // Hardware mapping pass — centralized target-specific lowering decisions
-    ir::hwmap::hwmap_module(&mut module, &types, target);
+    arch::mapping::run_mapping(&mut module, &types, target);
 
-    dump_ir(args, &module, "post-hwmap");
+    dump_ir(args, &module, "post-mapping");
 
     // Optimize IR (if enabled)
     if args.opt_level > 0 {

From 28c8fbfd79671c681308dc0aaad2f99f2c624645 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 06:16:30 +0000
Subject: [PATCH 10/18] cc: redesign IR pass interface, migrate shifts +
 Float16 to mapping pass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Function::create_reg_pseudo() and Instruction::call_with_abi() as
canonical IR primitives for passes that synthesize instructions. These
replace duplicated patterns across mapping.rs (alloc_reg64), lower.rs
(open-coded pseudo alloc), and the 6+ copies of the ABI-classify-and-
build-call sequence (build_rtlib_call, build_rtlib_call_explicit,
RtlibCallParams — all deleted).

Migrate int128 constant shifts (Shl/Lsr/Asr) from backend codegen to
the mapping pass as expand_int128_const_shl/lsr/asr. Variable shifts
remain in backends (require arch-specific branching).

Migrate Float16 conversions from linearizer (emit_float16_convert_call)
to the x86_64 mapping pass. The linearizer now emits standard FCvtF/
FCvtS/FCvtU/SCvtF/UCvtF ops; the mapper lowers Float16 variants to
rtlib calls with proper compiler-rt vs libgcc ABI dispatch.

Fix uint128 large constant sign-extension: (*v >> 64) as i64 sign-
extends for values like 0xFFFFFFFFFFFFFFFF; change to as u64 as i64
in three x86_64 locations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/aarch64/mapping.rs   |  26 +-
 cc/arch/codegen.rs           |   2 +-
 cc/arch/mapping.rs           | 659 ++++++++++++++++++++++++++++-------
 cc/arch/x86_64/codegen.rs    |   2 +-
 cc/arch/x86_64/expression.rs |   2 +-
 cc/arch/x86_64/mapping.rs    | 310 +++++++++++++++-
 cc/ir/linearize.rs           | 161 +--------
 cc/ir/lower.rs               |   5 +-
 cc/ir/mod.rs                 | 148 +++++++-
 cc/ir/test_linearize.rs      |  60 ++--
 cc/tests/codegen/misc.rs     | 175 ++++++++++
 11 files changed, 1214 insertions(+), 336 deletions(-)

diff --git a/cc/arch/aarch64/mapping.rs b/cc/arch/aarch64/mapping.rs
index 576d153d..94346ad2 100644
--- a/cc/arch/aarch64/mapping.rs
+++ b/cc/arch/aarch64/mapping.rs
@@ -9,10 +9,11 @@
 // AArch64 instruction mapping
 //
 
+use crate::abi::CallingConv;
 use crate::arch::mapping::{
-    alloc_reg64, build_binop_rtlib_call, build_convert_rtlib_call, build_rtlib_call_explicit,
-    int_suffix_for_longdouble, longdouble_needs_rtlib, map_int128_divmod, map_int128_expand,
-    map_int128_float_convert, ArchMapper, MappedInsn, MappingCtx, RtlibCallParams,
+    build_binop_rtlib_call, build_convert_rtlib_call, int_suffix_for_longdouble,
+    longdouble_needs_rtlib, map_int128_divmod, map_int128_expand, map_int128_float_convert,
+    ArchMapper, MappedInsn, MappingCtx,
 };
 use crate::ir::{Instruction, Opcode};
 use crate::types::TypeKind;
@@ -110,24 +111,23 @@ impl Aarch64Mapper {
                 let ld_type = ctx.types.longdouble_id;
 
                 // Allocate pseudo for cmp call result
-                let cmp_result = alloc_reg64(ctx.func);
+                let cmp_result = ctx.func.create_reg_pseudo();
                 let zero = ctx.func.create_const_pseudo(0);
 
                 // Build the rtlib call: cmp_result = __lttf2(left, right)
                 let arg_vals = insn.src.clone();
                 let arg_types = vec![ld_type; arg_vals.len()];
-                let call = build_rtlib_call_explicit(
-                    RtlibCallParams {
-                        target_pseudo: cmp_result,
-                        arg_vals: &arg_vals,
-                        func_name: name,
-                        arg_types,
-                        ret_type: int_type,
-                        pos: insn.pos,
-                    },
+                let mut call = Instruction::call_with_abi(
+                    Some(cmp_result),
+                    name,
+                    arg_vals,
+                    arg_types,
+                    int_type,
+                    CallingConv::C,
                     ctx.types,
                     ctx.target,
                 );
+                call.pos = insn.pos;
 
                 // Build the int comparison: result = cmp_op(cmp_result, 0)
                 let cmp =
diff --git a/cc/arch/codegen.rs b/cc/arch/codegen.rs
index 355742e4..3a4fe664 100644
--- a/cc/arch/codegen.rs
+++ b/cc/arch/codegen.rs
@@ -284,7 +284,7 @@ impl<I: LirInst + EmitAsm> CodeGenBase<I> {
                 if size > 8 {
                     // 128-bit: emit as two quads (little-endian: lo then hi)
                     let lo = *val as i64;
-                    let hi = (*val >> 64) as i64;
+                    let hi = (*val >> 64) as u64 as i64;
                     self.push_directive(Directive::Quad(lo));
                     self.push_directive(Directive::Quad(hi));
                 } else {
diff --git a/cc/arch/mapping.rs b/cc/arch/mapping.rs
index 3ea73be7..031e58c2 100644
--- a/cc/arch/mapping.rs
+++ b/cc/arch/mapping.rs
@@ -14,7 +14,7 @@
 //
 
 use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
-use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, Pseudo, PseudoId};
+use crate::ir::{CallAbiInfo, Function, Instruction, Module, Opcode, PseudoId};
 use crate::rtlib::{Float16Abi, RtlibNames};
 use crate::target::{Arch, Os, Target};
 use crate::types::{TypeId, TypeKind, TypeTable};
@@ -130,16 +130,9 @@ pub(crate) fn int_suffix_for_longdouble(types: &TypeTable, int_type: TypeId) ->
 }
 
 // ============================================================================
-// Pseudo/instruction helpers
+// Instruction helpers
 // ============================================================================
 
-/// Allocate a new 64-bit register pseudo.
-pub(crate) fn alloc_reg64(func: &mut Function) -> PseudoId {
-    let id = func.alloc_pseudo();
-    func.add_pseudo(Pseudo::reg(id, id.0));
-    id
-}
-
 /// Extract lo and hi 64-bit halves from a 128-bit pseudo.
 fn extract_halves(
     func: &mut Function,
@@ -147,120 +140,126 @@ fn extract_halves(
     src: PseudoId,
     long_type: TypeId,
 ) -> (PseudoId, PseudoId) {
-    let lo = alloc_reg64(func);
+    let lo = func.create_reg_pseudo();
     insns.push(Instruction::unop(Opcode::Lo64, lo, src, long_type, 64));
-    let hi = alloc_reg64(func);
+    let hi = func.create_reg_pseudo();
     insns.push(Instruction::unop(Opcode::Hi64, hi, src, long_type, 64));
     (lo, hi)
 }
 
 // ============================================================================
-// Rtlib call builders
+// Rtlib call builders (convenience wrappers over Instruction::call_with_abi)
 // ============================================================================
 
-/// Parameters for building an explicit rtlib call.
-pub(crate) struct RtlibCallParams<'a> {
-    pub target_pseudo: PseudoId,
-    pub arg_vals: &'a [PseudoId],
-    pub func_name: &'a str,
-    pub arg_types: Vec<TypeId>,
-    pub ret_type: TypeId,
-    pub pos: Option<crate::diag::Position>,
-}
-
-/// Build a runtime library call instruction replacing an IR instruction.
-///
-/// Creates a Call instruction with proper ABI classification using the
-/// C calling convention, mirroring the linearizer's `emit_rtlib_call`.
-fn build_rtlib_call(
+/// Build a rtlib call replacing a binop (both args same type as result).
+pub(crate) fn build_binop_rtlib_call(
     insn: &Instruction,
     func_name: &str,
-    arg_types: Vec<TypeId>,
-    ret_type: TypeId,
     types: &TypeTable,
     target: &Target,
 ) -> Instruction {
-    let target_pseudo = insn.target.expect("insn must have target");
-    let ret_size = types.size_bits(ret_type);
-
-    let arg_vals = insn.src.clone();
-
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let param_classes: Vec<_> = arg_types
-        .iter()
-        .map(|&t| abi.classify_param(t, types))
-        .collect();
-    let ret_class = abi.classify_return(ret_type, types);
-    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(target_pseudo),
+    let ret_type = insn.typ.expect("binop must have type");
+    let arg_types = vec![ret_type; insn.src.len()];
+    let mut call = Instruction::call_with_abi(
+        insn.target,
         func_name,
-        arg_vals,
+        insn.src.clone(),
         arg_types,
         ret_type,
-        ret_size,
-    );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = insn.pos;
-    call_insn
-}
-
-/// Build a rtlib call with explicit parameters.
-/// Used for expansion patterns where the call target differs from
-/// the original instruction's target.
-pub(crate) fn build_rtlib_call_explicit(
-    params: RtlibCallParams<'_>,
-    types: &TypeTable,
-    target: &Target,
-) -> Instruction {
-    let ret_size = types.size_bits(params.ret_type);
-
-    let abi = get_abi_for_conv(CallingConv::C, target);
-    let param_classes: Vec<_> = params
-        .arg_types
-        .iter()
-        .map(|&t| abi.classify_param(t, types))
-        .collect();
-    let ret_class = abi.classify_return(params.ret_type, types);
-    let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-    let mut call_insn = Instruction::call(
-        Some(params.target_pseudo),
-        params.func_name,
-        params.arg_vals.to_vec(),
-        params.arg_types,
-        params.ret_type,
-        ret_size,
+        CallingConv::C,
+        types,
+        target,
     );
-    call_insn.abi_info = Some(call_abi_info);
-    call_insn.pos = params.pos;
-    call_insn
+    call.pos = insn.pos;
+    call
 }
 
-/// Build a rtlib call for a binop (both args same type as result).
-pub(crate) fn build_binop_rtlib_call(
+/// Build a rtlib call replacing a conversion (single arg, different src/dst types).
+pub(crate) fn build_convert_rtlib_call(
     insn: &Instruction,
     func_name: &str,
     types: &TypeTable,
     target: &Target,
 ) -> Instruction {
-    let ret_type = insn.typ.expect("binop must have type");
-    let arg_types = vec![ret_type; insn.src.len()];
-    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+    let ret_type = insn.typ.expect("conversion must have type");
+    let src_type = insn.src_typ.expect("conversion must have src_typ");
+    let mut call = Instruction::call_with_abi(
+        insn.target,
+        func_name,
+        insn.src.clone(),
+        vec![src_type],
+        ret_type,
+        CallingConv::C,
+        types,
+        target,
+    );
+    call.pos = insn.pos;
+    call
 }
 
-/// Build a rtlib call for a conversion (single arg, different src/dst types).
-pub(crate) fn build_convert_rtlib_call(
+/// Build a call to a Float16 conversion rtlib function with correct ABI.
+///
+/// Handles the ABI difference between compiler-rt (Integer ABI: Float16
+/// passed/returned as u16 in GP registers) and libgcc (SSE ABI: Float16
+/// passed/returned in XMM registers).
+pub(crate) fn build_f16_convert_call(
     insn: &Instruction,
     func_name: &str,
+    src_type: TypeId,
+    dst_type: TypeId,
     types: &TypeTable,
     target: &Target,
 ) -> Instruction {
-    let ret_type = insn.typ.expect("conversion must have type");
-    let src_type = insn.src_typ.expect("conversion must have src_typ");
-    let arg_types = vec![src_type];
-    build_rtlib_call(insn, func_name, arg_types, ret_type, types, target)
+    let target_pseudo = insn.target.expect("conversion must have target");
+    let dst_size = types.size_bits(dst_type);
+    let src_kind = types.kind(src_type);
+    let dst_kind = types.kind(dst_type);
+
+    let rtlib = RtlibNames::new(target);
+    let f16_abi = rtlib.float16_abi();
+
+    // Arg type: ushort for compiler-rt if src is Float16, otherwise use actual type
+    let arg_type = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
+        types.ushort_id
+    } else {
+        src_type
+    };
+
+    // Arg classification
+    let param_class = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_param(arg_type, types)
+    };
+
+    // Return classification
+    let ret_class = if f16_abi == Float16Abi::Integer && dst_kind == TypeKind::Float16 {
+        ArgClass::Extend {
+            signed: false,
+            size_bits: 16,
+        }
+    } else {
+        let abi = get_abi_for_conv(CallingConv::C, target);
+        abi.classify_return(dst_type, types)
+    };
+
+    let call_abi_info = Box::new(CallAbiInfo::new(vec![param_class], ret_class));
+
+    let mut call_insn = Instruction::call(
+        Some(target_pseudo),
+        func_name,
+        insn.src.clone(),
+        vec![arg_type],
+        dst_type,
+        dst_size,
+    );
+    call_insn.abi_info = Some(call_abi_info);
+    call_insn.pos = insn.pos;
+    call_insn
 }
 
 /// Build a call to __extendhfsf2 (Float16 → float) with proper ABI.
@@ -373,9 +372,9 @@ fn expand_int128_bitwise(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let r_lo = alloc_reg64(func);
+    let r_lo = func.create_reg_pseudo();
     insns.push(Instruction::binop(insn.op, r_lo, a_lo, b_lo, long_type, 64));
-    let r_hi = alloc_reg64(func);
+    let r_hi = func.create_reg_pseudo();
     insns.push(Instruction::binop(insn.op, r_hi, a_hi, b_hi, long_type, 64));
 
     let int128_type = insn.typ.unwrap();
@@ -402,9 +401,9 @@ fn expand_int128_not(
 
     let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
 
-    let r_lo = alloc_reg64(func);
+    let r_lo = func.create_reg_pseudo();
     insns.push(Instruction::unop(Opcode::Not, r_lo, s_lo, long_type, 64));
-    let r_hi = alloc_reg64(func);
+    let r_hi = func.create_reg_pseudo();
     insns.push(Instruction::unop(Opcode::Not, r_hi, s_hi, long_type, 64));
 
     let int128_type = insn.typ.unwrap();
@@ -432,7 +431,7 @@ fn expand_int128_neg(
     let (s_lo, s_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let zero = func.create_const_pseudo(0);
 
-    let r_lo = alloc_reg64(func);
+    let r_lo = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::SubC,
         r_lo,
@@ -441,7 +440,7 @@ fn expand_int128_neg(
         long_type,
         64,
     ));
-    let r_hi = alloc_reg64(func);
+    let r_hi = func.create_reg_pseudo();
     let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, zero, s_hi, long_type, 64);
     sbc.src.push(r_lo);
     insns.push(sbc);
@@ -471,7 +470,7 @@ fn expand_int128_add(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let r_lo = alloc_reg64(func);
+    let r_lo = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::AddC,
         r_lo,
@@ -480,7 +479,7 @@ fn expand_int128_add(
         long_type,
         64,
     ));
-    let r_hi = alloc_reg64(func);
+    let r_hi = func.create_reg_pseudo();
     let mut adc = Instruction::binop(Opcode::AdcC, r_hi, a_hi, b_hi, long_type, 64);
     adc.src.push(r_lo);
     insns.push(adc);
@@ -510,7 +509,7 @@ fn expand_int128_sub(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let r_lo = alloc_reg64(func);
+    let r_lo = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::SubC,
         r_lo,
@@ -519,7 +518,7 @@ fn expand_int128_sub(
         long_type,
         64,
     ));
-    let r_hi = alloc_reg64(func);
+    let r_hi = func.create_reg_pseudo();
     let mut sbc = Instruction::binop(Opcode::SbcC, r_hi, a_hi, b_hi, long_type, 64);
     sbc.src.push(r_lo);
     insns.push(sbc);
@@ -549,7 +548,7 @@ fn expand_int128_mul(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let low_result = alloc_reg64(func);
+    let low_result = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Mul,
         low_result,
@@ -559,7 +558,7 @@ fn expand_int128_mul(
         64,
     ));
 
-    let high_part = alloc_reg64(func);
+    let high_part = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::UMulHi,
         high_part,
@@ -569,7 +568,7 @@ fn expand_int128_mul(
         64,
     ));
 
-    let cross1 = alloc_reg64(func);
+    let cross1 = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Mul,
         cross1,
@@ -579,7 +578,7 @@ fn expand_int128_mul(
         64,
     ));
 
-    let cross2 = alloc_reg64(func);
+    let cross2 = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Mul,
         cross2,
@@ -589,7 +588,7 @@ fn expand_int128_mul(
         64,
     ));
 
-    let sum1 = alloc_reg64(func);
+    let sum1 = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Add,
         sum1,
@@ -598,7 +597,7 @@ fn expand_int128_mul(
         long_type,
         64,
     ));
-    let final_hi = alloc_reg64(func);
+    let final_hi = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Add,
         final_hi,
@@ -633,7 +632,7 @@ fn expand_int128_cmp_eq(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let xor_lo = alloc_reg64(func);
+    let xor_lo = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Xor,
         xor_lo,
@@ -642,7 +641,7 @@ fn expand_int128_cmp_eq(
         long_type,
         64,
     ));
-    let xor_hi = alloc_reg64(func);
+    let xor_hi = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Xor,
         xor_hi,
@@ -651,7 +650,7 @@ fn expand_int128_cmp_eq(
         long_type,
         64,
     ));
-    let or_result = alloc_reg64(func);
+    let or_result = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Or,
         or_result,
@@ -681,7 +680,7 @@ fn expand_int128_cmp_ord(
     let (a_lo, a_hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
     let (b_lo, b_hi) = extract_halves(func, &mut insns, insn.src[1], long_type);
 
-    let hi_eq = alloc_reg64(func);
+    let hi_eq = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::SetEq,
         hi_eq,
@@ -691,7 +690,7 @@ fn expand_int128_cmp_ord(
         64,
     ));
 
-    let hi_cmp = alloc_reg64(func);
+    let hi_cmp = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         insn.op, hi_cmp, a_hi, b_hi, long_type, 64,
     ));
@@ -704,7 +703,7 @@ fn expand_int128_cmp_ord(
         Opcode::SetGe | Opcode::SetAe => Opcode::SetAe,
         _ => unreachable!(),
     };
-    let lo_cmp = alloc_reg64(func);
+    let lo_cmp = func.create_reg_pseudo();
     insns.push(Instruction::binop(lo_op, lo_cmp, a_lo, b_lo, long_type, 64));
 
     insns.push(Instruction::select(
@@ -728,7 +727,7 @@ fn expand_int128_zext(
 
     // Zero-extend src to 64-bit if needed
     let lo = if src_size < 64 {
-        let ext = alloc_reg64(func);
+        let ext = func.create_reg_pseudo();
         let mut zext_insn = Instruction::unop(Opcode::Zext, ext, src, long_type, 64);
         zext_insn.src_size = src_size;
         insns.push(zext_insn);
@@ -765,7 +764,7 @@ fn expand_int128_sext(
 
     // Sign-extend src to 64-bit if needed
     let lo = if src_size < 64 {
-        let ext = alloc_reg64(func);
+        let ext = func.create_reg_pseudo();
         let mut sext_insn = Instruction::unop(Opcode::Sext, ext, src, long_type, 64);
         sext_insn.src_size = src_size;
         insns.push(sext_insn);
@@ -775,7 +774,7 @@ fn expand_int128_sext(
     };
 
     let shift_amount = func.create_const_pseudo(63);
-    let hi = alloc_reg64(func);
+    let hi = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         Opcode::Asr,
         hi,
@@ -796,6 +795,401 @@ fn expand_int128_sext(
     insns
 }
 
+// ============================================================================
+// Int128 constant shift expansion helpers
+// ============================================================================
+
+/// Expand int128 Shl by a constant amount into 64-bit operations.
+fn expand_int128_const_shl(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_hi = (hi << n) | (lo >> (64-n))
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_hi,
+            hi_shifted,
+            lo_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_lo = lo << n
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            new_lo,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (lo, _hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            lo,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (lo, _hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            new_hi,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else {
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            zero,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
+/// Expand int128 Lsr (logical shift right) by a constant amount into 64-bit operations.
+fn expand_int128_const_lsr(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_lo = (lo >> n) | (hi << (64-n))
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_lo,
+            lo_shifted,
+            hi_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_hi = hi >> n (logical)
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            new_hi,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            hi,
+            zero,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            new_lo,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            zero,
+            int128_type,
+            128,
+        ));
+    } else {
+        let zero = func.create_const_pseudo(0);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            zero,
+            zero,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
+/// Expand int128 Asr (arithmetic shift right) by a constant amount into 64-bit operations.
+fn expand_int128_const_asr(
+    insn: &Instruction,
+    func: &mut Function,
+    types: &TypeTable,
+    n: u32,
+) -> Vec<Instruction> {
+    let result = insn.target.expect("int128 op must have target");
+    let long_type = types.ulong_id;
+    let int128_type = insn.typ.unwrap();
+    let mut insns = Vec::new();
+
+    if n == 0 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            lo,
+            hi,
+            int128_type,
+            128,
+        ));
+    } else if n < 64 {
+        let (lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo(n as i128);
+        let shift_compl = func.create_const_pseudo((64 - n) as i128);
+
+        // new_lo = (lo >> n) | (hi << (64-n))
+        let lo_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Lsr,
+            lo_shifted,
+            lo,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let hi_shifted = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Shl,
+            hi_shifted,
+            hi,
+            shift_compl,
+            long_type,
+            64,
+        ));
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Or,
+            new_lo,
+            lo_shifted,
+            hi_shifted,
+            long_type,
+            64,
+        ));
+
+        // new_hi = hi >>> n (arithmetic)
+        let new_hi = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            new_hi,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            new_hi,
+            int128_type,
+            128,
+        ));
+    } else if n == 64 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            hi,
+            sign,
+            int128_type,
+            128,
+        ));
+    } else if n < 128 {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_n = func.create_const_pseudo((n - 64) as i128);
+        let new_lo = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            new_lo,
+            hi,
+            shift_n,
+            long_type,
+            64,
+        ));
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            new_lo,
+            sign,
+            int128_type,
+            128,
+        ));
+    } else {
+        let (_lo, hi) = extract_halves(func, &mut insns, insn.src[0], long_type);
+        let shift_63 = func.create_const_pseudo(63);
+        let sign = func.create_reg_pseudo();
+        insns.push(Instruction::binop(
+            Opcode::Asr,
+            sign,
+            hi,
+            shift_63,
+            long_type,
+            64,
+        ));
+        insns.push(Instruction::binop(
+            Opcode::Pair64,
+            result,
+            sign,
+            sign,
+            int128_type,
+            128,
+        ));
+    }
+    insns
+}
+
 // ============================================================================
 // Float16 expansion helpers
 // ============================================================================
@@ -816,15 +1210,15 @@ pub(crate) fn expand_float16_arith(
     let mut insns = Vec::new();
 
     // Extend left to float
-    let left_ext = alloc_reg64(func);
+    let left_ext = func.create_reg_pseudo();
     insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
 
     // Extend right to float
-    let right_ext = alloc_reg64(func);
+    let right_ext = func.create_reg_pseudo();
     insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
 
     // Native float operation
-    let float_result = alloc_reg64(func);
+    let float_result = func.create_reg_pseudo();
     insns.push(Instruction::binop(
         insn.op,
         float_result,
@@ -859,10 +1253,10 @@ pub(crate) fn expand_float16_neg(
     let src = insn.src[0];
     let mut insns = Vec::new();
 
-    let src_ext = alloc_reg64(func);
+    let src_ext = func.create_reg_pseudo();
     insns.push(build_f16_extend_call(src_ext, src, pos, types, target));
 
-    let neg_result = alloc_reg64(func);
+    let neg_result = func.create_reg_pseudo();
     insns.push(Instruction::unop(
         Opcode::FNeg,
         neg_result,
@@ -892,10 +1286,10 @@ pub(crate) fn expand_float16_cmp(
     let right = insn.src[1];
     let mut insns = Vec::new();
 
-    let left_ext = alloc_reg64(func);
+    let left_ext = func.create_reg_pseudo();
     insns.push(build_f16_extend_call(left_ext, left, pos, types, target));
 
-    let right_ext = alloc_reg64(func);
+    let right_ext = func.create_reg_pseudo();
     insns.push(build_f16_extend_call(right_ext, right, pos, types, target));
 
     // Float comparison — result type is int, keep original type/size
@@ -1005,6 +1399,23 @@ pub(crate) fn map_int128_expand(
                 insn, ctx.func, types,
             )))
         }
+        // Constant-amount shifts: expand if shift amount is a known constant
+        Opcode::Shl | Opcode::Lsr | Opcode::Asr => {
+            let typ = insn.typ?;
+            if types.kind(typ) != TypeKind::Int128 {
+                return None;
+            }
+            // Only expand if shift amount is a compile-time constant
+            let shift_val = ctx.func.const_val(insn.src[1])?;
+            let n = shift_val as u32;
+            let expanded = match insn.op {
+                Opcode::Shl => expand_int128_const_shl(insn, ctx.func, types, n),
+                Opcode::Lsr => expand_int128_const_lsr(insn, ctx.func, types, n),
+                Opcode::Asr => expand_int128_const_asr(insn, ctx.func, types, n),
+                _ => unreachable!(),
+            };
+            Some(MappedInsn::Replace(expanded))
+        }
         // Extensions to 128: result type is int128
         Opcode::Zext => {
             let typ = insn.typ?;
diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs
index 5ee4f475..87a28e9e 100644
--- a/cc/arch/x86_64/codegen.rs
+++ b/cc/arch/x86_64/codegen.rs
@@ -2846,7 +2846,7 @@ impl X86_64CodeGen {
         match &src_loc {
             Loc::Imm(v) => {
                 let lo = *v as i64;
-                let hi = (*v >> 64) as i64;
+                let hi = (*v >> 64) as u64 as i64;
                 // Store lo half
                 if lo > i32::MAX as i64 || lo < i32::MIN as i64 {
                     self.push_lir(X86Inst::MovAbs {
diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index 091133f4..46fa2a8e 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -610,7 +610,7 @@ impl X86_64CodeGen {
         let loc = self.get_location(pseudo);
         match &loc {
             Loc::Imm(v) => {
-                let hi = (*v >> 64) as i64;
+                let hi = (*v >> 64) as u64 as i64;
                 if hi > i32::MAX as i64 || hi < i32::MIN as i64 {
                     self.push_lir(X86Inst::MovAbs { imm: hi, dst });
                 } else {
diff --git a/cc/arch/x86_64/mapping.rs b/cc/arch/x86_64/mapping.rs
index 616567dc..f0982e0c 100644
--- a/cc/arch/x86_64/mapping.rs
+++ b/cc/arch/x86_64/mapping.rs
@@ -10,10 +10,12 @@
 //
 
 use crate::arch::mapping::{
-    expand_float16_arith, expand_float16_cmp, expand_float16_neg, map_int128_divmod,
-    map_int128_expand, map_int128_float_convert, ArchMapper, MappedInsn, MappingCtx,
+    build_f16_convert_call, expand_float16_arith, expand_float16_cmp, expand_float16_neg,
+    float_suffix, map_int128_divmod, map_int128_expand, map_int128_float_convert, ArchMapper,
+    MappedInsn, MappingCtx,
 };
 use crate::ir::{Instruction, Opcode};
+use crate::rtlib::RtlibNames;
 use crate::types::TypeKind;
 
 /// x86-64 instruction mapper.
@@ -90,8 +92,85 @@ impl X86_64Mapper {
                 }
                 None
             }
-            // Float16 conversions handled by linearizer, no mapping needed
-            Opcode::FCvtF => None,
+            // Float16↔float/double/longdouble conversions
+            Opcode::FCvtF => {
+                let src_typ = insn.src_typ?;
+                let dst_typ = insn.typ?;
+                let src_kind = types.kind(src_typ);
+                let dst_kind = types.kind(dst_typ);
+                if src_kind == TypeKind::Float16 {
+                    let to_suffix = float_suffix(dst_kind, ctx.target);
+                    let rtlib = RtlibNames::new(ctx.target);
+                    let func_name = rtlib.float16_convert("hf", to_suffix)?;
+                    let call = build_f16_convert_call(
+                        insn, func_name, src_typ, dst_typ, types, ctx.target,
+                    );
+                    Some(MappedInsn::Replace(vec![call]))
+                } else if dst_kind == TypeKind::Float16 {
+                    let from_suffix = float_suffix(src_kind, ctx.target);
+                    let rtlib = RtlibNames::new(ctx.target);
+                    let func_name = rtlib.float16_convert(from_suffix, "hf")?;
+                    let call = build_f16_convert_call(
+                        insn, func_name, src_typ, dst_typ, types, ctx.target,
+                    );
+                    Some(MappedInsn::Replace(vec![call]))
+                } else {
+                    None
+                }
+            }
+            // Float16↔integer conversions
+            Opcode::FCvtS | Opcode::FCvtU => {
+                // Float16 → int
+                let src_typ = insn.src_typ?;
+                if types.kind(src_typ) != TypeKind::Float16 {
+                    return None;
+                }
+                let dst_typ = insn.typ?;
+                let dst_size = types.size_bits(dst_typ);
+                let is_unsigned = insn.op == Opcode::FCvtU;
+                let to_suffix = if is_unsigned {
+                    if dst_size <= 32 {
+                        "usi"
+                    } else {
+                        "udi"
+                    }
+                } else if dst_size <= 32 {
+                    "si"
+                } else {
+                    "di"
+                };
+                let rtlib = RtlibNames::new(ctx.target);
+                let func_name = rtlib.float16_convert("hf", to_suffix)?;
+                let call =
+                    build_f16_convert_call(insn, func_name, src_typ, dst_typ, types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
+            Opcode::SCvtF | Opcode::UCvtF => {
+                // int → Float16
+                let dst_typ = insn.typ?;
+                if types.kind(dst_typ) != TypeKind::Float16 {
+                    return None;
+                }
+                let src_typ = insn.src_typ?;
+                let src_size = types.size_bits(src_typ);
+                let is_unsigned = insn.op == Opcode::UCvtF;
+                let from_suffix = if is_unsigned {
+                    if src_size <= 32 {
+                        "usi"
+                    } else {
+                        "udi"
+                    }
+                } else if src_size <= 32 {
+                    "si"
+                } else {
+                    "di"
+                };
+                let rtlib = RtlibNames::new(ctx.target);
+                let func_name = rtlib.float16_convert(from_suffix, "hf")?;
+                let call =
+                    build_f16_convert_call(insn, func_name, src_typ, dst_typ, types, ctx.target);
+                Some(MappedInsn::Replace(vec![call]))
+            }
             _ => None,
         }
     }
@@ -464,4 +543,227 @@ mod tests {
         };
         assert_legal(&mapper.map_insn(&insn, &mut ctx));
     }
+
+    // ========================================================================
+    // Int128 constant shifts
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_int128_const_shl_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Create Shl.128 with constant shift amount
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(5);
+        let insn = Instruction::binop(
+            Opcode::Shl,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.int128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_const_lsr_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(64);
+        let insn = Instruction::binop(
+            Opcode::Lsr,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.uint128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_const_asr_expands() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let mut func = make_minimal_func(&types);
+        let shift_const = func.create_const_pseudo(127);
+        let insn = Instruction::binop(
+            Opcode::Asr,
+            PseudoId(2),
+            PseudoId(0),
+            shift_const,
+            types.int128_id,
+            128,
+        );
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_expand(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    #[test]
+    fn test_x86_64_int128_variable_shift_legal() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Variable shift (PseudoId(1) is a register, not a constant) → stays Legal
+        let insn = Instruction::binop(
+            Opcode::Shl,
+            PseudoId(2),
+            PseudoId(0),
+            PseudoId(1),
+            types.int128_id,
+            128,
+        );
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_legal(&mapper.map_insn(&insn, &mut ctx));
+    }
+
+    // ========================================================================
+    // Float16 conversions
+    // ========================================================================
+
+    #[test]
+    fn test_x86_64_float16_to_float_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → float should expand to __extendhfsf2
+        let insn = make_convert_insn(Opcode::FCvtF, types.float_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendhfsf2");
+    }
+
+    #[test]
+    fn test_x86_64_float_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // float → Float16 should expand to __truncsfhf2
+        let insn = make_convert_insn(Opcode::FCvtF, types.float16_id, 16, types.float_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__truncsfhf2");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_double_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        let insn = make_convert_insn(Opcode::FCvtF, types.double_id, 64, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__extendhfdf2");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_int_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → int (signed) should call __fixhfsi
+        let insn = make_convert_insn(Opcode::FCvtS, types.int_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixhfsi");
+    }
+
+    #[test]
+    fn test_x86_64_int_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // int (signed) → Float16 should call __floatsihf
+        let insn = make_convert_insn(Opcode::SCvtF, types.float16_id, 16, types.int_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatsihf");
+    }
+
+    #[test]
+    fn test_x86_64_float16_to_uint_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // Float16 → unsigned int should call __fixunshfsi
+        let insn = make_convert_insn(Opcode::FCvtU, types.uint_id, 32, types.float16_id, 16);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__fixunshfsi");
+    }
+
+    #[test]
+    fn test_x86_64_uint_to_float16_conversion() {
+        let target = Target::new(Arch::X86_64, Os::Linux);
+        let types = TypeTable::new(&target);
+        let mapper = X86_64Mapper;
+
+        // unsigned int → Float16 should call __floatunsihf
+        let insn = make_convert_insn(Opcode::UCvtF, types.float16_id, 16, types.uint_id, 32);
+        let mut func = make_minimal_func(&types);
+        let mut ctx = MappingCtx {
+            func: &mut func,
+            types: &types,
+            target: &target,
+        };
+        assert_libcall(&mapper.map_insn(&insn, &mut ctx), "__floatunsihf");
+    }
 }
diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs
index cdf250da..b3e813f1 100644
--- a/cc/ir/linearize.rs
+++ b/cc/ir/linearize.rs
@@ -15,13 +15,12 @@ use super::{
     AsmConstraint, AsmData, BasicBlock, BasicBlockId, CallAbiInfo, Function, Initializer,
     Instruction, MemoryOrder, Module, Opcode, Pseudo, PseudoId,
 };
-use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
+use crate::abi::{get_abi_for_conv, CallingConv};
 use crate::diag::{error, get_all_stream_names, Position};
 use crate::parse::ast::{
     AsmOperand, AssignOp, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind,
     ExternalDecl, ForInit, FunctionDef, InitElement, OffsetOfPath, Stmt, TranslationUnit, UnaryOp,
 };
-use crate::rtlib::{Float16Abi, RtlibNames};
 use crate::strings::{StringId, StringTable};
 use crate::symbol::{SymbolId, SymbolTable};
 use crate::target::Target;
@@ -4652,87 +4651,6 @@ impl<'a> Linearizer<'a> {
         // Emit conversion if needed
         let src_is_float = self.types.is_float(src_type);
         let dst_is_float = self.types.is_float(cast_type);
-        let src_kind = self.types.kind(src_type);
-        let dst_kind = self.types.kind(cast_type);
-
-        // Check for Float16 conversions that need rtlib
-        let src_is_float16 = src_kind == TypeKind::Float16;
-        let dst_is_float16 = dst_kind == TypeKind::Float16;
-
-        // Get long double suffix based on target architecture
-        let ld_suffix = if self.target.arch == crate::target::Arch::X86_64 {
-            "xf"
-        } else {
-            "tf"
-        };
-
-        // Skip Float16 handling for Int128 operands — no direct hf↔ti rtlib exists.
-        let src_is_int128 = src_kind == TypeKind::Int128;
-        let dst_is_int128 = dst_kind == TypeKind::Int128;
-        if (src_is_float16 || dst_is_float16) && !src_is_int128 && !dst_is_int128 {
-            let rtlib = RtlibNames::new(self.target);
-
-            let (from_suffix, to_suffix) = if src_is_float16 && dst_is_float {
-                // Float16 -> float/double/long double
-                let to = match dst_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                ("hf", to)
-            } else if dst_is_float16 && src_is_float {
-                // float/double/long double -> Float16
-                let from = match src_kind {
-                    TypeKind::Float => "sf",
-                    TypeKind::Double => "df",
-                    TypeKind::LongDouble => ld_suffix,
-                    _ => "",
-                };
-                (from, "hf")
-            } else if src_is_float16 && !dst_is_float {
-                // Float16 -> integer
-                let dst_size = self.types.size_bits(cast_type);
-                let is_unsigned = self.types.is_unsigned(cast_type);
-                let to = if is_unsigned {
-                    if dst_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if dst_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                ("hf", to)
-            } else if dst_is_float16 && !src_is_float {
-                // Integer -> Float16
-                let src_size = self.types.size_bits(src_type);
-                let is_unsigned = self.types.is_unsigned(src_type);
-                let from = if is_unsigned {
-                    if src_size <= 32 {
-                        "usi"
-                    } else {
-                        "udi"
-                    }
-                } else if src_size <= 32 {
-                    "si"
-                } else {
-                    "di"
-                };
-                (from, "hf")
-            } else {
-                ("", "")
-            };
-
-            if !from_suffix.is_empty() && !to_suffix.is_empty() {
-                if let Some(func_name) = rtlib.float16_convert(from_suffix, to_suffix) {
-                    // Use Float16-specific call that handles x86-64 soft-float ABI
-                    return self.emit_float16_convert_call(func_name, src, src_type, cast_type);
-                }
-            }
-        }
 
         if src_is_float && !dst_is_float {
             // Float to integer conversion
@@ -8063,83 +7981,6 @@ impl<'a> Linearizer<'a> {
         result_sym
     }
 
-    /// Emit a call to a Float16 conversion rtlib function with correct ABI for x86-64.
-    ///
-    /// On x86-64 without native FP16, Float16 values are passed/returned as integers:
-    /// - Float16 argument: 16-bit value in RDI (zero-extended)
-    /// - Float16 return: 16-bit value in AX
-    /// - Other float types: standard SSE ABI (XMM0)
-    /// - Integer types: standard integer ABI
-    fn emit_float16_convert_call(
-        &mut self,
-        func_name: &str,
-        src: PseudoId,
-        src_type: TypeId,
-        dst_type: TypeId,
-    ) -> PseudoId {
-        let result = self.alloc_pseudo();
-        let dst_size = self.types.size_bits(dst_type);
-        let src_kind = self.types.kind(src_type);
-        let dst_kind = self.types.kind(dst_type);
-
-        // Query rtlib for Float16 ABI - this is an rtlib attribute
-        let rtlib = RtlibNames::new(self.target);
-        let f16_abi = rtlib.float16_abi();
-
-        // For argument type, use u16 if src is Float16 with integer ABI (compiler-rt)
-        let arg_type_for_abi = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
-            self.types.ushort_id
-        } else {
-            src_type
-        };
-
-        let arg_vals = vec![src];
-        let arg_types = vec![arg_type_for_abi];
-
-        // Compute ABI classification based on rtlib requirements
-        let param_classes = if f16_abi == Float16Abi::Integer && src_kind == TypeKind::Float16 {
-            // compiler-rt: Float16 passed as 16-bit integer (zero-extended)
-            vec![ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }]
-        } else {
-            // libgcc or non-Float16: use standard ABI classification
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            arg_types
-                .iter()
-                .map(|&t| abi.classify_param(t, self.types))
-                .collect()
-        };
-
-        let ret_class = if f16_abi == Float16Abi::Integer && dst_kind == TypeKind::Float16 {
-            // compiler-rt: Float16 returned as 16-bit integer
-            ArgClass::Extend {
-                signed: false,
-                size_bits: 16,
-            }
-        } else {
-            // libgcc or non-Float16: use standard ABI classification
-            let abi = get_abi_for_conv(self.current_calling_conv, self.target);
-            abi.classify_return(dst_type, self.types)
-        };
-
-        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
-
-        let mut call_insn = Instruction::call(
-            Some(result),
-            func_name,
-            arg_vals,
-            arg_types,
-            dst_type,
-            dst_size,
-        );
-        call_insn.abi_info = Some(call_abi_info);
-        self.emit(call_insn);
-
-        result
-    }
-
     fn emit_compare_zero(&mut self, val: PseudoId, operand_typ: TypeId) -> PseudoId {
         let result = self.alloc_pseudo();
         let zero = self.emit_const(0, operand_typ);
diff --git a/cc/ir/lower.rs b/cc/ir/lower.rs
index fefc8e1a..d9a006b7 100644
--- a/cc/ir/lower.rs
+++ b/cc/ir/lower.rs
@@ -202,10 +202,7 @@ fn sequentialize_copies(copies: &[CopyInfo], func: &mut Function) -> Vec<CopyInf
             let copy_typ = copy.typ;
 
             // Create a temporary pseudo to hold the original source value
-            let temp_id = super::PseudoId(func.next_pseudo);
-            func.next_pseudo += 1;
-            let temp_pseudo = super::Pseudo::reg(temp_id, temp_id.0);
-            func.add_pseudo(temp_pseudo);
+            let temp_id = func.create_reg_pseudo();
 
             // Emit: temp = copy source (save the source before it gets overwritten)
             result.push(CopyInfo {
diff --git a/cc/ir/mod.rs b/cc/ir/mod.rs
index 0fb0f0ff..a5038132 100644
--- a/cc/ir/mod.rs
+++ b/cc/ir/mod.rs
@@ -22,9 +22,10 @@ pub mod linearize;
 pub mod lower;
 pub mod ssa;
 
-use crate::abi::ArgClass;
+use crate::abi::{get_abi_for_conv, ArgClass, CallingConv};
 use crate::diag::Position;
-use crate::types::TypeId;
+use crate::target::Target;
+use crate::types::{TypeId, TypeTable};
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 
@@ -942,6 +943,37 @@ impl Instruction {
         insn
     }
 
+    /// Create a call instruction with ABI classification.
+    ///
+    /// This is the canonical way for IR passes to synthesize call instructions
+    /// (e.g., runtime library calls). It classifies parameters and return value
+    /// using the given calling convention, attaches `CallAbiInfo`, and returns
+    /// a ready-to-emit instruction.
+    #[allow(clippy::too_many_arguments)]
+    pub fn call_with_abi(
+        target: Option<PseudoId>,
+        func_name: &str,
+        args: Vec<PseudoId>,
+        arg_types: Vec<TypeId>,
+        ret_type: TypeId,
+        conv: CallingConv,
+        types: &TypeTable,
+        target_info: &Target,
+    ) -> Self {
+        let ret_size = types.size_bits(ret_type);
+        let abi = get_abi_for_conv(conv, target_info);
+        let param_classes: Vec<_> = arg_types
+            .iter()
+            .map(|&t| abi.classify_param(t, types))
+            .collect();
+        let ret_class = abi.classify_return(ret_type, types);
+        let call_abi_info = Box::new(CallAbiInfo::new(param_classes, ret_class));
+
+        let mut insn = Self::call(target, func_name, args, arg_types, ret_type, ret_size);
+        insn.abi_info = Some(call_abi_info);
+        insn
+    }
+
     /// Create an indirect call instruction (call through function pointer)
     pub fn call_indirect(
         target: Option<PseudoId>,
@@ -1473,8 +1505,16 @@ impl Function {
         id
     }
 
-    /// Create a new constant integer pseudo and return its ID
-    /// The pseudo is added to self.pseudos
+    /// Create a new register pseudo and return its ID.
+    /// The pseudo is added to self.pseudos.
+    pub fn create_reg_pseudo(&mut self) -> PseudoId {
+        let id = self.alloc_pseudo();
+        self.add_pseudo(Pseudo::reg(id, id.0));
+        id
+    }
+
+    /// Create a new constant integer pseudo and return its ID.
+    /// The pseudo is added to self.pseudos.
     pub fn create_const_pseudo(&mut self, value: i128) -> PseudoId {
         let id = self.alloc_pseudo();
         let pseudo = Pseudo::val(id, value);
@@ -2308,4 +2348,104 @@ mod tests {
         )));
         assert!(insn.returns_two_regs());
     }
+
+    // ========================================================================
+    // Function::create_reg_pseudo
+    // ========================================================================
+
+    #[test]
+    fn test_create_reg_pseudo() {
+        let types = TypeTable::new(&Target::host());
+        let mut func = Function::new("test", types.int_id);
+        func.next_pseudo = 10;
+
+        let id1 = func.create_reg_pseudo();
+        assert_eq!(id1, PseudoId(10));
+        assert_eq!(func.next_pseudo, 11);
+
+        // Verify the pseudo was registered
+        let pseudo = func.get_pseudo(id1).expect("pseudo must exist");
+        assert!(matches!(pseudo.kind, PseudoKind::Reg(_)));
+
+        let id2 = func.create_reg_pseudo();
+        assert_eq!(id2, PseudoId(11));
+        assert_eq!(func.next_pseudo, 12);
+
+        // IDs must be distinct
+        assert_ne!(id1, id2);
+    }
+
+    // ========================================================================
+    // Instruction::call_with_abi
+    // ========================================================================
+
+    #[test]
+    fn test_call_with_abi_basic() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(2)),
+            "__divti3",
+            vec![PseudoId(0), PseudoId(1)],
+            vec![types.int128_id, types.int128_id],
+            types.int128_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        assert_eq!(insn.op, Opcode::Call);
+        assert_eq!(insn.target, Some(PseudoId(2)));
+        assert_eq!(insn.func_name.as_deref(), Some("__divti3"));
+        assert_eq!(insn.src.len(), 2);
+        assert_eq!(insn.arg_types.len(), 2);
+        assert!(insn.abi_info.is_some());
+
+        let abi = insn.abi_info.as_ref().unwrap();
+        assert_eq!(abi.params.len(), 2);
+    }
+
+    #[test]
+    fn test_call_with_abi_conversion() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        // float → signed int128 (__fixsfti)
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(1)),
+            "__fixsfti",
+            vec![PseudoId(0)],
+            vec![types.float_id],
+            types.int128_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        assert_eq!(insn.op, Opcode::Call);
+        assert_eq!(insn.func_name.as_deref(), Some("__fixsfti"));
+        assert!(insn.abi_info.is_some());
+        assert_eq!(insn.abi_info.as_ref().unwrap().params.len(), 1);
+    }
+
+    #[test]
+    fn test_call_with_abi_sets_size() {
+        let target = Target::host();
+        let types = TypeTable::new(&target);
+
+        let insn = Instruction::call_with_abi(
+            Some(PseudoId(1)),
+            "__addtf3",
+            vec![PseudoId(0)],
+            vec![types.double_id],
+            types.double_id,
+            CallingConv::C,
+            &types,
+            &target,
+        );
+
+        // Size should be set from ret_type
+        assert_eq!(insn.size, types.size_bits(types.double_id));
+    }
 }
diff --git a/cc/ir/test_linearize.rs b/cc/ir/test_linearize.rs
index 710ee9e3..52ab2e6f 100644
--- a/cc/ir/test_linearize.rs
+++ b/cc/ir/test_linearize.rs
@@ -3789,16 +3789,19 @@ fn test_float16_to_float_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __extendhfsf2
+    // Float16→float now emits FCvtF with src_typ=Float16 (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
+    let has_fcvtf = func.blocks.iter().any(|bb| {
         bb.insns.iter().any(|insn| {
-            insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__extendhfsf2")
+            insn.op == Opcode::FCvtF
+                && insn
+                    .src_typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
         })
     });
     assert!(
-        has_rtlib_call,
-        "Float16 to float conversion should call __extendhfsf2"
+        has_fcvtf,
+        "Float16 to float conversion should emit FCvtF with Float16 src_typ"
     );
 }
 
@@ -3838,16 +3841,19 @@ fn test_float_to_float16_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __truncsfhf2
+    // float→Float16 now emits FCvtF with Float16 dst type (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
+    let has_fcvtf = func.blocks.iter().any(|bb| {
         bb.insns.iter().any(|insn| {
-            insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__truncsfhf2")
+            insn.op == Opcode::FCvtF
+                && insn
+                    .typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
         })
     });
     assert!(
-        has_rtlib_call,
-        "Float to Float16 conversion should call __truncsfhf2"
+        has_fcvtf,
+        "Float to Float16 conversion should emit FCvtF with Float16 dst type"
     );
 }
 
@@ -3887,16 +3893,19 @@ fn test_float16_to_int_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __fixhfsi
+    // Float16→int now emits FCvtS with Float16 src_typ (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
-        bb.insns
-            .iter()
-            .any(|insn| insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__fixhfsi"))
+    let has_fcvts = func.blocks.iter().any(|bb| {
+        bb.insns.iter().any(|insn| {
+            insn.op == Opcode::FCvtS
+                && insn
+                    .src_typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
+        })
     });
     assert!(
-        has_rtlib_call,
-        "Float16 to int conversion should call __fixhfsi"
+        has_fcvts,
+        "Float16 to int conversion should emit FCvtS with Float16 src_typ"
     );
 }
 
@@ -3936,16 +3945,19 @@ fn test_int_to_float16_conversion() {
     };
     let module = ctx.linearize(&tu);
 
-    // Find call to __floatsihf
+    // int→Float16 now emits SCvtF with Float16 dst type (mapping pass lowers to rtlib)
     let func = &module.functions[0];
-    let has_rtlib_call = func.blocks.iter().any(|bb| {
-        bb.insns
-            .iter()
-            .any(|insn| insn.op == Opcode::Call && insn.func_name.as_deref() == Some("__floatsihf"))
+    let has_scvtf = func.blocks.iter().any(|bb| {
+        bb.insns.iter().any(|insn| {
+            insn.op == Opcode::SCvtF
+                && insn
+                    .typ
+                    .is_some_and(|t| ctx.types.kind(t) == TypeKind::Float16)
+        })
     });
     assert!(
-        has_rtlib_call,
-        "Int to Float16 conversion should call __floatsihf"
+        has_scvtf,
+        "Int to Float16 conversion should emit SCvtF with Float16 dst type"
     );
 }
 
diff --git a/cc/tests/codegen/misc.rs b/cc/tests/codegen/misc.rs
index 999eee64..4239a1b2 100644
--- a/cc/tests/codegen/misc.rs
+++ b/cc/tests/codegen/misc.rs
@@ -4810,3 +4810,178 @@ int main(void) {
         0
     );
 }
+
+/// Test uint128 large constant sign-extension bug fix.
+/// Verifies that (uint128)0xFFFFFFFFFFFFFFFFULL has hi=0, lo=max64.
+#[test]
+fn codegen_uint128_large_constant() {
+    let code = r#"
+typedef unsigned __int128 uint128;
+
+int main(void) {
+    /* Build 0xFFFFFFFFFFFFFFFF via runtime to ensure it's not constant-folded
+       differently. */
+    unsigned long long max64 = ~0ULL;
+    uint128 val = (uint128)max64;
+
+    /* lo half should be all 1s, hi half should be 0 */
+    unsigned long long lo = (unsigned long long)val;
+    unsigned long long hi = (unsigned long long)(val >> 64);
+    if (lo != max64) return 1;
+    if (hi != 0) return 2;
+
+    /* Zero */
+    uint128 z = 0;
+    if ((unsigned long long)z != 0) return 3;
+    if ((unsigned long long)(z >> 64) != 0) return 4;
+
+    /* Value 1 */
+    uint128 one = 1;
+    if ((unsigned long long)one != 1) return 5;
+    if ((unsigned long long)(one >> 64) != 0) return 6;
+
+    /* Constant that fills both halves */
+    uint128 full = ((uint128)max64 << 64) | (uint128)max64;
+    if ((unsigned long long)full != max64) return 7;
+    if ((unsigned long long)(full >> 64) != max64) return 8;
+
+    /* Value that fits in 64 bits exactly */
+    uint128 mid = (uint128)0x123456789ABCDEF0ULL;
+    if ((unsigned long long)mid != 0x123456789ABCDEF0ULL) return 9;
+    if ((unsigned long long)(mid >> 64) != 0) return 10;
+
+    return 0;
+}
+"#;
+    assert_eq!(
+        compile_and_run("codegen_uint128_large_constant", code, &[]),
+        0
+    );
+}
+
+/// Test int128 constant shifts (Shl/Lsr/Asr) decomposed in the mapping pass.
+#[test]
+fn codegen_int128_const_shifts() {
+    let code = r#"
+typedef unsigned __int128 uint128;
+typedef __int128 int128;
+
+int main(void) {
+    unsigned long long max64 = ~0ULL;
+
+    /* ===== SHL tests (returns 1-19) ===== */
+    {
+        uint128 a = 1;
+
+        /* shift by 0: identity */
+        uint128 r = a << 0;
+        if ((unsigned long long)r != 1) return 1;
+        if ((unsigned long long)(r >> 64) != 0) return 2;
+
+        /* shift by 1 */
+        r = a << 1;
+        if ((unsigned long long)r != 2) return 3;
+
+        /* shift by 32 */
+        r = a << 32;
+        if ((unsigned long long)r != (1ULL << 32)) return 4;
+
+        /* shift by 63: crosses lo/hi boundary */
+        r = a << 63;
+        if ((unsigned long long)r != (1ULL << 63)) return 5;
+        if ((unsigned long long)(r >> 64) != 0) return 6;
+
+        /* shift by 64: lo moves to hi entirely */
+        r = a << 64;
+        if ((unsigned long long)r != 0) return 7;
+        if ((unsigned long long)(r >> 64) != 1) return 8;
+
+        /* shift by 65 */
+        r = a << 65;
+        if ((unsigned long long)r != 0) return 9;
+        if ((unsigned long long)(r >> 64) != 2) return 10;
+
+        /* shift by 127 */
+        r = a << 127;
+        if ((unsigned long long)r != 0) return 11;
+        if ((unsigned long long)(r >> 64) != (1ULL << 63)) return 12;
+    }
+
+    /* ===== LSR tests (returns 20-39) ===== */
+    {
+        /* Start with hi bit set */
+        uint128 a = (uint128)1 << 127;
+
+        /* shift by 0: identity */
+        uint128 r = a >> 0;
+        if ((unsigned long long)(r >> 64) != (1ULL << 63)) return 20;
+
+        /* shift by 1 */
+        r = a >> 1;
+        if ((unsigned long long)(r >> 64) != (1ULL << 62)) return 21;
+
+        /* shift by 32 */
+        r = a >> 32;
+        if ((unsigned long long)(r >> 64) != (1ULL << 31)) return 22;
+
+        /* shift by 63 */
+        r = a >> 63;
+        if ((unsigned long long)(r >> 64) != 1) return 23;
+        if ((unsigned long long)r != 0) return 24;
+
+        /* shift by 64 */
+        r = a >> 64;
+        if ((unsigned long long)(r >> 64) != 0) return 25;
+        if ((unsigned long long)r != (1ULL << 63)) return 26;
+
+        /* shift by 65 */
+        r = a >> 65;
+        if ((unsigned long long)r != (1ULL << 62)) return 27;
+
+        /* shift by 127 */
+        r = a >> 127;
+        if ((unsigned long long)r != 1) return 28;
+        if ((unsigned long long)(r >> 64) != 0) return 29;
+    }
+
+    /* ===== ASR tests (returns 40-59) ===== */
+    {
+        /* Negative int128 */
+        int128 neg = -1;
+
+        /* shift by 0: identity */
+        int128 r = neg >> 0;
+        if (r != -1) return 40;
+
+        /* shift by 1: still all 1s */
+        r = neg >> 1;
+        if (r != -1) return 41;
+
+        /* shift by 63 */
+        r = neg >> 63;
+        if (r != -1) return 42;
+
+        /* shift by 64 */
+        r = neg >> 64;
+        if (r != -1) return 43;
+
+        /* shift by 127 */
+        r = neg >> 127;
+        if (r != -1) return 44;
+
+        /* Negative with specific pattern: -2 = 0xFFF...FFFE */
+        int128 neg2 = -2;
+        r = neg2 >> 1;
+        if (r != -1) return 45;
+
+        /* Large positive shifted right arithmetically stays positive */
+        int128 big = (int128)1 << 126;  /* 0x40...0 */
+        r = big >> 1;
+        if ((unsigned long long)(r >> 64) != (1ULL << 61)) return 46;
+    }
+
+    return 0;
+}
+"#;
+    assert_eq!(compile_and_run("codegen_int128_const_shifts", code, &[]), 0);
+}

From 2fb14de6c50640078b4418ad763614114ceeed16 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 06:25:02 +0000
Subject: [PATCH 11/18] cc: panic on unexpected int128 binop in x86_64 backend

The mapping pass now expands all int128 binops except variable shifts
(Shl/Lsr/Asr). Replace the silent `_ => {}` catch-all in
emit_int128_binop with a panic so any unexpanded opcode reaching the
backend is caught immediately instead of silently dropped.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/x86_64/expression.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cc/arch/x86_64/expression.rs b/cc/arch/x86_64/expression.rs
index 46fa2a8e..c3403e94 100644
--- a/cc/arch/x86_64/expression.rs
+++ b/cc/arch/x86_64/expression.rs
@@ -731,7 +731,10 @@ impl X86_64CodeGen {
             Opcode::Asr => {
                 self.emit_int128_asr(src1, src2, &dst_loc);
             }
-            _ => {}
+            _ => panic!(
+                "emit_int128_binop: unexpected opcode {:?} (mapping pass should have expanded it)",
+                insn.op
+            ),
         }
     }
 

From 541f92c214e1518c61fd27c4bb56dd625ffd5e9a Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 06:39:30 +0000
Subject: [PATCH 12/18] cc: replace lexer char_class() match with compile-time
 256-byte lookup table

Add QUOTE and COMMENT flags to the character classification, build the
table via const fn at compile time, and use pre-computed class bits in
get_special() to dispatch string literals and comments. Includes unit
tests covering every byte category.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/token/lexer.rs | 159 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 138 insertions(+), 21 deletions(-)

diff --git a/cc/token/lexer.rs b/cc/token/lexer.rs
index 13f089f4..f387d6af 100644
--- a/cc/token/lexer.rs
+++ b/cc/token/lexer.rs
@@ -177,15 +177,17 @@ const HEX: u8 = 4;
 const EXP: u8 = 8;
 const DOT: u8 = 16;
 const VALID_SECOND: u8 = 32; // Can be second char of 2-char operator
+const QUOTE: u8 = 64; // ' "
+const COMMENT: u8 = 128; // /
 
-/// Character classification table
-fn char_class(c: u8) -> u8 {
+/// Classify a single byte (mirrors the old match arms exactly, plus QUOTE and COMMENT).
+const fn classify_char(c: u8) -> u8 {
     match c {
         b'0'..=b'9' => DIGIT | HEX,
         b'A'..=b'D' | b'F' => LETTER | HEX,
-        b'E' => LETTER | HEX | EXP, // E for exponent
+        b'E' => LETTER | HEX | EXP,
         b'G'..=b'O' => LETTER,
-        b'P' => LETTER | EXP, // P for hex float exponent
+        b'P' => LETTER | EXP,
         b'Q'..=b'Z' => LETTER,
         b'a'..=b'd' | b'f' => LETTER | HEX,
         b'e' => LETTER | HEX | EXP,
@@ -195,10 +197,32 @@ fn char_class(c: u8) -> u8 {
         b'_' => LETTER,
         b'.' => DOT | VALID_SECOND,
         b'=' | b'+' | b'-' | b'>' | b'<' | b'&' | b'|' | b'#' => VALID_SECOND,
+        b'\'' | b'"' => QUOTE,
+        b'/' => COMMENT,
         _ => 0,
     }
 }
 
+/// Build the 256-byte lookup table at compile time.
+const fn build_char_table() -> [u8; 256] {
+    let mut table = [0u8; 256];
+    let mut i: usize = 0;
+    while i < 256 {
+        table[i] = classify_char(i as u8);
+        i += 1;
+    }
+    table
+}
+
+/// Compile-time character classification table.
+const CHAR_TABLE: [u8; 256] = build_char_table();
+
+/// Character classification via table lookup.
+#[inline(always)]
+fn char_class(c: u8) -> u8 {
+    CHAR_TABLE[c as usize]
+}
+
 #[inline]
 fn is_digit(c: u8) -> bool {
     char_class(c) & DIGIT != 0
@@ -770,15 +794,12 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
     }
 
     /// Get a special token (operator/punctuator)
-    fn get_special(&mut self, first: u8) -> Option<Token> {
+    fn get_special(&mut self, first: u8, class: u8) -> Option<Token> {
         let pos = self.pos();
 
         // Check for string/char literals
-        if first == b'"' {
-            return Some(self.get_string_or_char(b'"', false));
-        }
-        if first == b'\'' {
-            return Some(self.get_string_or_char(b'\'', false));
+        if class & QUOTE != 0 {
+            return Some(self.get_string_or_char(first, false));
         }
 
         // Check for .digit (floating point number)
@@ -790,10 +811,10 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
         }
 
         // Check for comments (mode-dependent)
-        match self.mode {
-            LexerMode::C => {
-                // C mode: // and /* */ comments
-                if first == b'/' {
+        if class & COMMENT != 0 {
+            match self.mode {
+                LexerMode::C => {
+                    // C mode: // and /* */ comments
                     let next = self.peekchar();
                     if next == b'/' as i32 {
                         self.nextchar();
@@ -806,12 +827,9 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
                         return None; // No token, continue tokenizing
                     }
                 }
-            }
-            LexerMode::Assembly => {
-                // Assembly mode: do not treat ';' as a line comment delimiter.
-                // Different assemblers (e.g., GAS, Apple as) use ';' with different
-                // meanings (statement separator vs. comment). Comment handling is
-                // left to the assembler.
+                LexerMode::Assembly => {
+                    // Assembly mode: comment handling is left to the assembler.
+                }
             }
         }
 
@@ -996,7 +1014,7 @@ impl<'a, 'b> Tokenizer<'a, 'b> {
             }
         }
 
-        self.get_special(c)
+        self.get_special(c, class)
     }
 
     /// Tokenize the entire input, returning all tokens
@@ -2105,4 +2123,103 @@ mod tests {
             matches!(&tokens[1].value, TokenValue::Special(c) if *c == SpecialToken::HashHash as u32)
         );
     }
+
+    // ========================================================================
+    // Character classification table tests
+    // ========================================================================
+
+    #[test]
+    fn test_char_table_digits() {
+        for c in b'0'..=b'9' {
+            let cl = char_class(c);
+            assert_eq!(cl & DIGIT, DIGIT, "digit {}", c as char);
+            assert_eq!(cl & HEX, HEX, "digit hex {}", c as char);
+            assert_eq!(cl & LETTER, 0, "digit not letter {}", c as char);
+        }
+    }
+
+    #[test]
+    fn test_char_table_hex_letters() {
+        for c in [b'A', b'B', b'C', b'D', b'F', b'a', b'b', b'c', b'd', b'f'] {
+            let cl = char_class(c);
+            assert_eq!(cl & LETTER, LETTER, "hex letter {}", c as char);
+            assert_eq!(cl & HEX, HEX, "hex flag {}", c as char);
+        }
+    }
+
+    #[test]
+    fn test_char_table_exp_letters() {
+        for c in [b'E', b'e', b'P', b'p'] {
+            let cl = char_class(c);
+            assert_eq!(cl & EXP, EXP, "exp {}", c as char);
+            assert_eq!(cl & LETTER, LETTER, "exp letter {}", c as char);
+        }
+        // E and e are also hex
+        assert_ne!(char_class(b'E') & HEX, 0);
+        assert_ne!(char_class(b'e') & HEX, 0);
+        // P and p are NOT hex
+        assert_eq!(char_class(b'P') & HEX, 0);
+        assert_eq!(char_class(b'p') & HEX, 0);
+    }
+
+    #[test]
+    fn test_char_table_plain_letters() {
+        // Non-hex, non-exp uppercase
+        for c in b'G'..=b'O' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain upper {}", c as char);
+        }
+        for c in b'Q'..=b'Z' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain upper {}", c as char);
+        }
+        // Non-hex, non-exp lowercase
+        for c in b'g'..=b'o' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain lower {}", c as char);
+        }
+        for c in b'q'..=b'z' {
+            let cl = char_class(c);
+            assert_eq!(cl, LETTER, "plain lower {}", c as char);
+        }
+        assert_eq!(char_class(b'_'), LETTER);
+    }
+
+    #[test]
+    fn test_char_table_dot() {
+        let cl = char_class(b'.');
+        assert_ne!(cl & DOT, 0);
+        assert_ne!(cl & VALID_SECOND, 0);
+    }
+
+    #[test]
+    fn test_char_table_valid_second() {
+        for c in [b'=', b'+', b'-', b'>', b'<', b'&', b'|', b'#'] {
+            assert_ne!(
+                char_class(c) & VALID_SECOND,
+                0,
+                "valid_second {}",
+                c as char
+            );
+        }
+    }
+
+    #[test]
+    fn test_char_table_quote() {
+        assert_ne!(char_class(b'\'') & QUOTE, 0);
+        assert_ne!(char_class(b'"') & QUOTE, 0);
+    }
+
+    #[test]
+    fn test_char_table_comment() {
+        assert_ne!(char_class(b'/') & COMMENT, 0);
+    }
+
+    #[test]
+    fn test_char_table_zero_for_others() {
+        // Control characters, whitespace, misc punctuation not in the table
+        for c in [0u8, b' ', b'\t', b'\n', b'@', b'$', b'`', b'~', 0x80, 0xFF] {
+            assert_eq!(char_class(c), 0, "zero for byte {:#x}", c);
+        }
+    }
 }

From be1492927e90d86a308474aa07aee9074e6701d4 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 08:12:34 +0000
Subject: [PATCH 13/18] cc: add pre-interned keyword system with tag-based O(1)
 classification

Pre-intern ~240 well-known strings (C keywords, builtins, attribute names,
preprocessor directives) at StringTable creation time. Each gets a
deterministic StringId and a u32 tag bitmask, replacing string comparisons
with integer comparisons in all hot parser/preprocessor paths.

New file cc/kw.rs provides: define_keywords! macro, 14 tag bit constants,
DECL_START composite, has_tag()/tags() query API, and 11 unit tests.

Converted dispatch sites: is_declaration_start (43 arms), parse_type_specifiers
(35 arms), parse_statement (12 arms), try_parse_type_name (25 arms),
consume_type_qualifiers (5 arms), builtin dispatch (60 arms),
handle_directive (14 arms), is_type_keyword, is_nullability_qualifier,
is_attribute_keyword, is_asm_keyword, is_static_assert, is_builtin,
is_supported_attribute, sizeof/alignof, pointer/array qualifier parsing
(4 sites), and parse_asm_statement qualifiers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/builtins.rs         |   8 +-
 cc/kw.rs               | 671 +++++++++++++++++++++++++++++++++++++++++
 cc/lib.rs              |   3 +
 cc/main.rs             |   3 +
 cc/parse/expression.rs | 332 ++++++++++----------
 cc/parse/mod.rs        |  14 +-
 cc/parse/parser.rs     | 238 ++++++---------
 cc/strings.rs          |   7 +
 cc/token/preprocess.rs |  79 +++--
 9 files changed, 991 insertions(+), 364 deletions(-)
 create mode 100644 cc/kw.rs

diff --git a/cc/builtins.rs b/cc/builtins.rs
index 601647de..b4a7d27c 100644
--- a/cc/builtins.rs
+++ b/cc/builtins.rs
@@ -113,12 +113,18 @@ pub const SUPPORTED_BUILTINS: &[&str] = &[
 ];
 
 /// Check if a name is a supported builtin function.
-/// Used by __has_builtin() in the preprocessor.
+/// Used by __has_builtin() in the preprocessor when only a string is available.
 #[inline]
 pub fn is_builtin(name: &str) -> bool {
     SUPPORTED_BUILTINS.contains(&name)
 }
 
+/// Check if a StringId is a supported builtin function (O(1) via tag lookup).
+#[inline]
+pub fn is_builtin_id(id: crate::strings::StringId) -> bool {
+    crate::kw::has_tag(id, crate::kw::BUILTIN)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/cc/kw.rs b/cc/kw.rs
new file mode 100644
index 00000000..f3db3369
--- /dev/null
+++ b/cc/kw.rs
@@ -0,0 +1,671 @@
+//
+// Copyright (c) 2025-2026 Jeff Garzik
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+// Pre-interned keyword system for pcc C99 compiler
+//
+// All well-known strings (C keywords, builtins, attribute names, preprocessor
+// directives) are pre-interned at StringTable creation time. Each gets a
+// deterministic StringId and a u32 tag bitmask for O(1) set-membership queries.
+//
+// This eliminates string comparisons in hot paths (is_declaration_start,
+// parse_type_specifiers, parse_statement, is_builtin, etc.) by replacing them
+// with integer comparisons.
+//
+
+use crate::strings::StringId;
+
+// ============================================================================
+// Tag bit constants (u32, 14 of 32 used)
+// ============================================================================
+
+pub const TYPE_SPEC: u32 = 1 << 0;
+pub const STORAGE: u32 = 1 << 1;
+pub const QUALIFIER: u32 = 1 << 2;
+pub const INLINE_KW: u32 = 1 << 3;
+pub const NORETURN_KW: u32 = 1 << 4;
+pub const ATTR_KW: u32 = 1 << 5;
+pub const ASM_KW: u32 = 1 << 6;
+pub const ASSERT_KW: u32 = 1 << 7;
+pub const NULLABILITY: u32 = 1 << 8;
+pub const STMT_KW: u32 = 1 << 9;
+pub const BUILTIN: u32 = 1 << 10;
+pub const SUPPORTED_ATTR: u32 = 1 << 11;
+pub const ALIGNAS_KW: u32 = 1 << 12;
+pub const TYPE_KEYWORD: u32 = 1 << 13;
+
+/// Composite: all tags that start a declaration
+pub const DECL_START: u32 =
+    TYPE_SPEC | STORAGE | QUALIFIER | INLINE_KW | NORETURN_KW | ATTR_KW | ASSERT_KW | ALIGNAS_KW;
+
+// ============================================================================
+// Keyword definition macros
+// ============================================================================
+
+/// Helper macro: recursive counter that assigns sequential StringId values starting from 1.
+macro_rules! define_ids {
+    // Base case: no more entries
+    ($counter:expr; ) => {};
+    // Recursive case: emit one const, recurse with counter+1
+    ($counter:expr; ($name:ident, $str:literal, $tags:expr) $(, ($name_rest:ident, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
+        pub const $name: StringId = StringId($counter);
+        define_ids!($counter + 1; $(($name_rest, $str_rest, $tags_rest)),*);
+    };
+}
+
+/// Main keyword definition macro. Generates:
+/// - KEYWORD_COUNT: total number of keywords
+/// - One `pub const NAME: StringId` per keyword
+/// - KEYWORD_STRINGS: array of string literals
+/// - KEYWORD_TAGS: array of tag bitmasks
+macro_rules! define_keywords {
+    ( $( ($name:ident, $str:literal, $tags:expr) ),* $(,)? ) => {
+        pub const KEYWORD_COUNT: usize = [ $( $str ),* ].len();
+        define_ids!(1u32; $( ($name, $str, $tags) ),* );
+        pub(crate) const KEYWORD_STRINGS: [&str; KEYWORD_COUNT] = [ $( $str ),* ];
+        pub(crate) const KEYWORD_TAGS: [u32; KEYWORD_COUNT] = [ $( $tags ),* ];
+    };
+}
+
+// ============================================================================
+// Keyword table — single source of truth
+// ============================================================================
+//
+// Naming convention:
+//   FOO          — standard C keyword: const, inline, _Noreturn
+//   GNU_FOO      — __foo__ (double-underscore-wrapped GNU spelling)
+//   GNU_FOO2     — __foo (leading-underscore-only GNU spelling)
+//   FOO_C23      — C23 spelling: static_assert
+//   BUILTIN_*    — __builtin_* compiler builtins
+//   C11_ATOMIC_* — __c11_atomic_* builtins
+//   ATTR_*       — attribute names (plain)
+//   GNU_ATTR_*   — attribute names (__foo__ form)
+//   PP_*         — preprocessor directives that conflict with Rust keywords
+
+define_keywords! {
+    // ---- Type specifiers (TYPE_SPEC) ----
+    (VOID,              "void",              TYPE_SPEC | TYPE_KEYWORD),
+    (CHAR,              "char",              TYPE_SPEC | TYPE_KEYWORD),
+    (SHORT,             "short",             TYPE_SPEC | TYPE_KEYWORD),
+    (INT,               "int",               TYPE_SPEC | TYPE_KEYWORD),
+    (LONG,              "long",              TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT,             "float",             TYPE_SPEC | TYPE_KEYWORD),
+    (DOUBLE,            "double",            TYPE_SPEC | TYPE_KEYWORD),
+    (SIGNED,            "signed",            TYPE_SPEC | TYPE_KEYWORD),
+    (UNSIGNED,          "unsigned",          TYPE_SPEC | TYPE_KEYWORD),
+    (BOOL,              "_Bool",             TYPE_SPEC | TYPE_KEYWORD),
+    (COMPLEX,           "_Complex",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT16,           "_Float16",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT32,           "_Float32",          TYPE_SPEC | TYPE_KEYWORD),
+    (FLOAT64,           "_Float64",          TYPE_SPEC | TYPE_KEYWORD),
+    (INT128,            "__int128",          TYPE_SPEC | TYPE_KEYWORD),
+    (INT128_T,          "__int128_t",        TYPE_SPEC | TYPE_KEYWORD),
+    (UINT128_T,         "__uint128_t",       TYPE_SPEC | TYPE_KEYWORD),
+    (BUILTIN_VA_LIST,   "__builtin_va_list", TYPE_SPEC | TYPE_KEYWORD | BUILTIN),
+    (STRUCT,            "struct",            TYPE_SPEC | TYPE_KEYWORD),
+    (UNION,             "union",             TYPE_SPEC | TYPE_KEYWORD),
+    (ENUM,              "enum",              TYPE_SPEC | TYPE_KEYWORD),
+    (TYPEOF,            "typeof",            TYPE_SPEC | TYPE_KEYWORD),
+    (GNU_TYPEOF,        "__typeof__",        TYPE_SPEC | TYPE_KEYWORD),
+    (GNU_TYPEOF2,       "__typeof",          TYPE_SPEC | TYPE_KEYWORD),
+    (ATOMIC,            "_Atomic",           TYPE_SPEC | QUALIFIER | TYPE_KEYWORD),
+
+    // ---- Storage class (STORAGE) ----
+    (STATIC,            "static",            STORAGE),
+    (EXTERN,            "extern",            STORAGE),
+    (AUTO,              "auto",              STORAGE),
+    (REGISTER,          "register",          STORAGE),
+    (TYPEDEF,           "typedef",           STORAGE),
+    (THREAD_LOCAL,      "_Thread_local",     STORAGE),
+    (GNU_THREAD,        "__thread",          STORAGE),
+
+    // ---- Type qualifiers (QUALIFIER) ----
+    (CONST,             "const",             QUALIFIER | TYPE_KEYWORD),
+    (VOLATILE,          "volatile",          QUALIFIER | TYPE_KEYWORD),
+    (RESTRICT,          "restrict",          QUALIFIER),
+    (GNU_CONST,         "__const__",         QUALIFIER),
+    (GNU_CONST2,        "__const",           QUALIFIER),
+    (GNU_VOLATILE,      "__volatile__",      QUALIFIER),
+    (GNU_VOLATILE2,     "__volatile",        QUALIFIER),
+    (GNU_RESTRICT,      "__restrict__",      QUALIFIER),
+    (GNU_RESTRICT2,     "__restrict",        QUALIFIER),
+
+    // ---- Inline (INLINE_KW) ----
+    (INLINE,            "inline",            INLINE_KW),
+    (GNU_INLINE,        "__inline__",        INLINE_KW),
+    (GNU_INLINE2,       "__inline",          INLINE_KW),
+
+    // ---- Noreturn (NORETURN_KW) ----
+    (NORETURN,          "_Noreturn",         NORETURN_KW),
+    (GNU_NORETURN,      "__noreturn__",      NORETURN_KW | SUPPORTED_ATTR),
+
+    // ---- Attribute keyword (ATTR_KW) ----
+    (GNU_ATTRIBUTE,     "__attribute__",     ATTR_KW),
+    (GNU_ATTRIBUTE2,    "__attribute",       ATTR_KW),
+
+    // ---- Asm keyword (ASM_KW) ----
+    (ASM,               "asm",               ASM_KW),
+    (GNU_ASM,           "__asm__",           ASM_KW),
+    (GNU_ASM2,          "__asm",             ASM_KW),
+
+    // ---- Static assert (ASSERT_KW) ----
+    (STATIC_ASSERT,     "_Static_assert",    ASSERT_KW),
+    (STATIC_ASSERT_C23, "static_assert",     ASSERT_KW),
+
+    // ---- Alignas (ALIGNAS_KW) ----
+    (ALIGNAS,           "_Alignas",          ALIGNAS_KW),
+
+    // ---- Nullability qualifiers (NULLABILITY) ----
+    (NONNULL,           "_Nonnull",          NULLABILITY),
+    (GNU_NONNULL,       "__nonnull",         NULLABILITY),
+    (NULLABLE,          "_Nullable",         NULLABILITY),
+    (GNU_NULLABLE,      "__nullable",        NULLABILITY),
+    (NULL_UNSPECIFIED,  "_Null_unspecified",  NULLABILITY),
+    (GNU_NULL_UNSPECIFIED, "__null_unspecified", NULLABILITY),
+
+    // ---- Statement keywords (STMT_KW) ----
+    (IF,                "if",                STMT_KW),
+    (ELSE,              "else",              STMT_KW),
+    (WHILE,             "while",             STMT_KW),
+    (DO,                "do",                STMT_KW),
+    (FOR,               "for",               STMT_KW),
+    (RETURN,            "return",            STMT_KW),
+    (BREAK,             "break",             STMT_KW),
+    (CONTINUE,          "continue",          STMT_KW),
+    (GOTO,              "goto",              STMT_KW),
+    (SWITCH,            "switch",            STMT_KW),
+    (CASE,              "case",              STMT_KW),
+    (DEFAULT,           "default",           STMT_KW),
+
+    // ---- Sizeof / Alignof ----
+    (SIZEOF,            "sizeof",            0),
+    (ALIGNOF,           "_Alignof",          0),
+    (GNU_ALIGNOF,       "__alignof__",       0),
+    (GNU_ALIGNOF2,      "__alignof",         0),
+    (ALIGNOF_C23,       "alignof",           0),
+
+    // ---- Wide char prefix ----
+    (WIDE_PREFIX,       "L",                 0),
+
+    // ---- Preprocessor directives ----
+    (DEFINE,            "define",            0),
+    (UNDEF,             "undef",             0),
+    (IFDEF,             "ifdef",             0),
+    (IFNDEF,            "ifndef",            0),
+    (ELIF,              "elif",              0),
+    (ENDIF,             "endif",             0),
+    (INCLUDE,           "include",           0),
+    (INCLUDE_NEXT,      "include_next",      0),
+    (PP_ERROR,          "error",             0),
+    (WARNING,           "warning",           0),
+    (PRAGMA,            "pragma",            0),
+    (LINE,              "line",              0),
+
+    // ---- Preprocessor special names ----
+    (DEFINED,           "defined",           0),
+    (VA_ARGS,           "__VA_ARGS__",       0),
+    (ONCE,              "once",              0),
+
+    // ---- Predefined identifiers ----
+    (FUNC,              "__func__",          0),
+    (FUNCTION,          "__FUNCTION__",      0),
+    (PRETTY_FUNCTION,   "__PRETTY_FUNCTION__", 0),
+
+    // ---- Builtins (BUILTIN) ----
+    (BUILTIN_VA_START,  "__builtin_va_start", BUILTIN),
+    (BUILTIN_VA_END,    "__builtin_va_end",   BUILTIN),
+    (BUILTIN_VA_ARG,    "__builtin_va_arg",   BUILTIN),
+    (BUILTIN_VA_COPY,   "__builtin_va_copy",  BUILTIN),
+    (BUILTIN_BSWAP16,   "__builtin_bswap16",  BUILTIN),
+    (BUILTIN_BSWAP32,   "__builtin_bswap32",  BUILTIN),
+    (BUILTIN_BSWAP64,   "__builtin_bswap64",  BUILTIN),
+    (BUILTIN_CTZ,       "__builtin_ctz",      BUILTIN),
+    (BUILTIN_CTZL,      "__builtin_ctzl",     BUILTIN),
+    (BUILTIN_CTZLL,     "__builtin_ctzll",    BUILTIN),
+    (BUILTIN_CLZ,       "__builtin_clz",      BUILTIN),
+    (BUILTIN_CLZL,      "__builtin_clzl",     BUILTIN),
+    (BUILTIN_CLZLL,     "__builtin_clzll",    BUILTIN),
+    (BUILTIN_POPCOUNT,  "__builtin_popcount", BUILTIN),
+    (BUILTIN_POPCOUNTL, "__builtin_popcountl", BUILTIN),
+    (BUILTIN_POPCOUNTLL, "__builtin_popcountll", BUILTIN),
+    (BUILTIN_ALLOCA,    "__builtin_alloca",   BUILTIN),
+    (BUILTIN_MEMSET,    "__builtin_memset",   BUILTIN),
+    (BUILTIN_MEMCPY,    "__builtin_memcpy",   BUILTIN),
+    (BUILTIN_MEMMOVE,   "__builtin_memmove",  BUILTIN),
+    (BUILTIN_CONSTANT_P, "__builtin_constant_p", BUILTIN),
+    (BUILTIN_TYPES_COMPATIBLE_P, "__builtin_types_compatible_p", BUILTIN),
+    (BUILTIN_UNREACHABLE, "__builtin_unreachable", BUILTIN),
+    (BUILTIN_OFFSETOF,  "__builtin_offsetof", BUILTIN),
+    (OFFSETOF,          "offsetof",           BUILTIN),
+    (BUILTIN_INF,       "__builtin_inf",      BUILTIN),
+    (BUILTIN_INFF,      "__builtin_inff",     BUILTIN),
+    (BUILTIN_INFL,      "__builtin_infl",     BUILTIN),
+    (BUILTIN_HUGE_VAL,  "__builtin_huge_val", BUILTIN),
+    (BUILTIN_HUGE_VALF, "__builtin_huge_valf", BUILTIN),
+    (BUILTIN_HUGE_VALL, "__builtin_huge_vall", BUILTIN),
+    (BUILTIN_FABS,      "__builtin_fabs",     BUILTIN),
+    (BUILTIN_FABSF,     "__builtin_fabsf",    BUILTIN),
+    (BUILTIN_FABSL,     "__builtin_fabsl",    BUILTIN),
+    (BUILTIN_SIGNBIT,   "__builtin_signbit",  BUILTIN),
+    (BUILTIN_SIGNBITF,  "__builtin_signbitf", BUILTIN),
+    (BUILTIN_SIGNBITL,  "__builtin_signbitl", BUILTIN),
+    (BUILTIN_NAN,       "__builtin_nan",      BUILTIN),
+    (BUILTIN_NANF,      "__builtin_nanf",     BUILTIN),
+    (BUILTIN_NANL,      "__builtin_nanl",     BUILTIN),
+    (BUILTIN_NANS,      "__builtin_nans",     BUILTIN),
+    (BUILTIN_NANSF,     "__builtin_nansf",    BUILTIN),
+    (BUILTIN_NANSL,     "__builtin_nansl",    BUILTIN),
+    (BUILTIN_EXPECT,    "__builtin_expect",   BUILTIN),
+    (BUILTIN_ASSUME_ALIGNED, "__builtin_assume_aligned", BUILTIN),
+    (BUILTIN_PREFETCH,  "__builtin_prefetch", BUILTIN),
+    (BUILTIN_FLT_ROUNDS, "__builtin_flt_rounds", BUILTIN),
+    (BUILTIN_FRAME_ADDRESS, "__builtin_frame_address", BUILTIN),
+    (BUILTIN_RETURN_ADDRESS, "__builtin_return_address", BUILTIN),
+    (BUILTIN_OBJECT_SIZE, "__builtin_object_size", BUILTIN),
+    (BUILTIN_SNPRINTF_CHK, "__builtin___snprintf_chk", BUILTIN),
+    (BUILTIN_VSNPRINTF_CHK, "__builtin___vsnprintf_chk", BUILTIN),
+    (BUILTIN_SPRINTF_CHK, "__builtin___sprintf_chk", BUILTIN),
+    (BUILTIN_FPRINTF_CHK, "__builtin___fprintf_chk", BUILTIN),
+    (BUILTIN_PRINTF_CHK, "__builtin___printf_chk", BUILTIN),
+    (BUILTIN_MEMCPY_CHK, "__builtin___memcpy_chk", BUILTIN),
+    (BUILTIN_MEMMOVE_CHK, "__builtin___memmove_chk", BUILTIN),
+    (BUILTIN_MEMSET_CHK, "__builtin___memset_chk", BUILTIN),
+    (BUILTIN_STPCPY_CHK, "__builtin___stpcpy_chk", BUILTIN),
+    (BUILTIN_STRCAT_CHK, "__builtin___strcat_chk", BUILTIN),
+    (BUILTIN_STRCPY_CHK, "__builtin___strcpy_chk", BUILTIN),
+    (BUILTIN_STRNCAT_CHK, "__builtin___strncat_chk", BUILTIN),
+    (BUILTIN_STRNCPY_CHK, "__builtin___strncpy_chk", BUILTIN),
+
+    // ---- C11 atomic builtins (BUILTIN) ----
+    (C11_ATOMIC_INIT,    "__c11_atomic_init",    BUILTIN),
+    (C11_ATOMIC_LOAD,    "__c11_atomic_load",    BUILTIN),
+    (C11_ATOMIC_STORE,   "__c11_atomic_store",   BUILTIN),
+    (C11_ATOMIC_EXCHANGE, "__c11_atomic_exchange", BUILTIN),
+    (C11_ATOMIC_COMPARE_EXCHANGE_STRONG, "__c11_atomic_compare_exchange_strong", BUILTIN),
+    (C11_ATOMIC_COMPARE_EXCHANGE_WEAK, "__c11_atomic_compare_exchange_weak", BUILTIN),
+    (C11_ATOMIC_FETCH_ADD, "__c11_atomic_fetch_add", BUILTIN),
+    (C11_ATOMIC_FETCH_SUB, "__c11_atomic_fetch_sub", BUILTIN),
+    (C11_ATOMIC_FETCH_AND, "__c11_atomic_fetch_and", BUILTIN),
+    (C11_ATOMIC_FETCH_OR,  "__c11_atomic_fetch_or",  BUILTIN),
+    (C11_ATOMIC_FETCH_XOR, "__c11_atomic_fetch_xor", BUILTIN),
+    (C11_ATOMIC_THREAD_FENCE, "__c11_atomic_thread_fence", BUILTIN),
+    (C11_ATOMIC_SIGNAL_FENCE, "__c11_atomic_signal_fence", BUILTIN),
+
+    // ---- setjmp/longjmp (special-cased in parser, not true builtins) ----
+    (SETJMP,            "setjmp",            0),
+    (SETJMP2,           "_setjmp",           0),
+    (LONGJMP,           "longjmp",           0),
+    (LONGJMP2,          "_longjmp",          0),
+
+    // ---- Supported attribute names (SUPPORTED_ATTR) ----
+    // Plain forms
+    (ATTR_NORETURN,             "noreturn",             SUPPORTED_ATTR),
+    (ATTR_UNUSED,               "unused",               SUPPORTED_ATTR),
+    (ATTR_ALIGNED,              "aligned",              SUPPORTED_ATTR),
+    (ATTR_PACKED,               "packed",               SUPPORTED_ATTR),
+    (ATTR_DEPRECATED,           "deprecated",           SUPPORTED_ATTR),
+    (ATTR_WEAK,                 "weak",                 SUPPORTED_ATTR),
+    (ATTR_SECTION,              "section",              SUPPORTED_ATTR),
+    (ATTR_VISIBILITY,           "visibility",           SUPPORTED_ATTR),
+    (ATTR_CONSTRUCTOR,          "constructor",          SUPPORTED_ATTR),
+    (ATTR_DESTRUCTOR,           "destructor",           SUPPORTED_ATTR),
+    (ATTR_USED,                 "used",                 SUPPORTED_ATTR),
+    (ATTR_NOINLINE,             "noinline",             SUPPORTED_ATTR),
+    (ATTR_ALWAYS_INLINE,        "always_inline",        SUPPORTED_ATTR),
+    (ATTR_HOT,                  "hot",                  SUPPORTED_ATTR),
+    (ATTR_COLD,                 "cold",                 SUPPORTED_ATTR),
+    (ATTR_WARN_UNUSED_RESULT,   "warn_unused_result",   SUPPORTED_ATTR),
+    (ATTR_FORMAT,               "format",               SUPPORTED_ATTR),
+    (ATTR_FALLTHROUGH,          "fallthrough",          SUPPORTED_ATTR),
+    (ATTR_NONSTRING,            "nonstring",            SUPPORTED_ATTR),
+    (ATTR_MALLOC,               "malloc",               SUPPORTED_ATTR),
+    (ATTR_PURE,                 "pure",                 SUPPORTED_ATTR),
+    (ATTR_SENTINEL,             "sentinel",             SUPPORTED_ATTR),
+    (ATTR_NO_SANITIZE_MEMORY,   "no_sanitize_memory",   SUPPORTED_ATTR),
+    (ATTR_NO_SANITIZE_ADDRESS,  "no_sanitize_address",  SUPPORTED_ATTR),
+    (ATTR_NO_SANITIZE_THREAD,   "no_sanitize_thread",   SUPPORTED_ATTR),
+    // GNU forms (__foo__)
+    // Note: __noreturn__ is already defined above with NORETURN_KW | SUPPORTED_ATTR
+    (GNU_ATTR_UNUSED,           "__unused__",           SUPPORTED_ATTR),
+    (GNU_ATTR_ALIGNED,          "__aligned__",          SUPPORTED_ATTR),
+    (GNU_ATTR_PACKED,           "__packed__",           SUPPORTED_ATTR),
+    (GNU_ATTR_DEPRECATED,       "__deprecated__",       SUPPORTED_ATTR),
+    (GNU_ATTR_WEAK,             "__weak__",             SUPPORTED_ATTR),
+    (GNU_ATTR_SECTION,          "__section__",          SUPPORTED_ATTR),
+    (GNU_ATTR_VISIBILITY,       "__visibility__",       SUPPORTED_ATTR),
+    (GNU_ATTR_CONSTRUCTOR,      "__constructor__",      SUPPORTED_ATTR),
+    (GNU_ATTR_DESTRUCTOR,       "__destructor__",       SUPPORTED_ATTR),
+    (GNU_ATTR_USED,             "__used__",             SUPPORTED_ATTR),
+    (GNU_ATTR_NOINLINE,         "__noinline__",         SUPPORTED_ATTR),
+    (GNU_ATTR_ALWAYS_INLINE,    "__always_inline__",    SUPPORTED_ATTR),
+    (GNU_ATTR_HOT,              "__hot__",              SUPPORTED_ATTR),
+    (GNU_ATTR_COLD,             "__cold__",             SUPPORTED_ATTR),
+    (GNU_ATTR_WARN_UNUSED_RESULT, "__warn_unused_result__", SUPPORTED_ATTR),
+    (GNU_ATTR_FORMAT,           "__format__",           SUPPORTED_ATTR),
+    (GNU_ATTR_FALLTHROUGH,      "__fallthrough__",      SUPPORTED_ATTR),
+    (GNU_ATTR_NONSTRING,        "__nonstring__",        SUPPORTED_ATTR),
+    (GNU_ATTR_MALLOC,           "__malloc__",           SUPPORTED_ATTR),
+    (GNU_ATTR_PURE,             "__pure__",             SUPPORTED_ATTR),
+    (GNU_ATTR_SENTINEL,         "__sentinel__",         SUPPORTED_ATTR),
+}
+
+// ============================================================================
+// Tag query API
+// ============================================================================
+
+/// Check if a StringId has any of the given tag bits set.
+/// Returns false for non-keyword IDs (dynamic strings interned after keywords).
+#[inline]
+pub fn has_tag(id: StringId, mask: u32) -> bool {
+    let idx = id.0 as usize;
+    idx > 0 && idx <= KEYWORD_COUNT && KEYWORD_TAGS[idx - 1] & mask != 0
+}
+
+/// Get the full tag bitmask for a StringId.
+/// Returns 0 for non-keyword IDs.
+#[inline]
+pub fn tags(id: StringId) -> u32 {
+    let idx = id.0 as usize;
+    if idx > 0 && idx <= KEYWORD_COUNT {
+        KEYWORD_TAGS[idx - 1]
+    } else {
+        0
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::strings::StringTable;
+    use std::collections::HashSet;
+
+    #[test]
+    fn test_keyword_ids_deterministic() {
+        let table = StringTable::new();
+        assert_eq!(table.get(VOID), "void");
+        assert_eq!(table.get(CHAR), "char");
+        assert_eq!(table.get(INT), "int");
+        assert_eq!(table.get(STATIC), "static");
+        assert_eq!(table.get(CONST), "const");
+        assert_eq!(table.get(INLINE), "inline");
+        assert_eq!(table.get(IF), "if");
+        assert_eq!(table.get(RETURN), "return");
+        assert_eq!(table.get(BUILTIN_VA_START), "__builtin_va_start");
+        assert_eq!(table.get(C11_ATOMIC_LOAD), "__c11_atomic_load");
+        assert_eq!(table.get(ATTR_NORETURN), "noreturn");
+        assert_eq!(table.get(GNU_ATTR_PACKED), "__packed__");
+    }
+
+    #[test]
+    fn test_no_duplicate_strings() {
+        let mut seen = HashSet::new();
+        for (i, &s) in KEYWORD_STRINGS.iter().enumerate() {
+            assert!(
+                seen.insert(s),
+                "duplicate keyword string '{}' at index {}",
+                s,
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_type_spec() {
+        let type_specs = [
+            VOID,
+            CHAR,
+            SHORT,
+            INT,
+            LONG,
+            FLOAT,
+            DOUBLE,
+            SIGNED,
+            UNSIGNED,
+            BOOL,
+            COMPLEX,
+            FLOAT16,
+            FLOAT32,
+            FLOAT64,
+            INT128,
+            INT128_T,
+            UINT128_T,
+            BUILTIN_VA_LIST,
+            STRUCT,
+            UNION,
+            ENUM,
+            TYPEOF,
+            GNU_TYPEOF,
+            GNU_TYPEOF2,
+            ATOMIC,
+        ];
+        for &id in &type_specs {
+            assert!(
+                has_tag(id, TYPE_SPEC),
+                "'{}' (id={}) should have TYPE_SPEC",
+                KEYWORD_STRINGS[id.0 as usize - 1],
+                id.0
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_qualifier() {
+        let qualifiers = [
+            CONST,
+            VOLATILE,
+            RESTRICT,
+            ATOMIC,
+            GNU_CONST,
+            GNU_CONST2,
+            GNU_VOLATILE,
+            GNU_VOLATILE2,
+            GNU_RESTRICT,
+            GNU_RESTRICT2,
+        ];
+        for &id in &qualifiers {
+            assert!(
+                has_tag(id, QUALIFIER),
+                "'{}' should have QUALIFIER",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_type_keyword() {
+        // Exact 25 members that match is_type_keyword()
+        let type_keywords = [
+            VOID,
+            BOOL,
+            COMPLEX,
+            ATOMIC,
+            CHAR,
+            SHORT,
+            INT,
+            LONG,
+            FLOAT,
+            DOUBLE,
+            FLOAT16,
+            FLOAT32,
+            FLOAT64,
+            SIGNED,
+            UNSIGNED,
+            CONST,
+            VOLATILE,
+            STRUCT,
+            UNION,
+            ENUM,
+            INT128,
+            INT128_T,
+            UINT128_T,
+            BUILTIN_VA_LIST,
+            TYPEOF,
+            GNU_TYPEOF,
+            GNU_TYPEOF2,
+        ];
+        for &id in &type_keywords {
+            assert!(
+                has_tag(id, TYPE_KEYWORD),
+                "'{}' should have TYPE_KEYWORD",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_decl_start() {
+        // All 43+ entries from current is_declaration_start()
+        let decl_start = [
+            VOID,
+            CHAR,
+            SHORT,
+            INT,
+            LONG,
+            FLOAT,
+            DOUBLE,
+            FLOAT16,
+            FLOAT32,
+            FLOAT64,
+            COMPLEX,
+            ATOMIC,
+            ALIGNAS,
+            SIGNED,
+            UNSIGNED,
+            CONST,
+            VOLATILE,
+            STATIC,
+            EXTERN,
+            AUTO,
+            REGISTER,
+            TYPEDEF,
+            INLINE,
+            GNU_INLINE2,
+            GNU_INLINE,
+            NORETURN,
+            GNU_NORETURN,
+            STRUCT,
+            UNION,
+            ENUM,
+            BOOL,
+            GNU_ATTRIBUTE,
+            GNU_ATTRIBUTE2,
+            INT128,
+            INT128_T,
+            UINT128_T,
+            BUILTIN_VA_LIST,
+            TYPEOF,
+            GNU_TYPEOF,
+            GNU_TYPEOF2,
+            THREAD_LOCAL,
+            GNU_THREAD,
+            STATIC_ASSERT,
+            STATIC_ASSERT_C23,
+        ];
+        for &id in &decl_start {
+            assert!(
+                has_tag(id, DECL_START),
+                "'{}' should have DECL_START",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_nullability() {
+        let nullability = [
+            NONNULL,
+            GNU_NONNULL,
+            NULLABLE,
+            GNU_NULLABLE,
+            NULL_UNSPECIFIED,
+            GNU_NULL_UNSPECIFIED,
+        ];
+        for &id in &nullability {
+            assert!(
+                has_tag(id, NULLABILITY),
+                "'{}' should have NULLABILITY",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+    }
+
+    #[test]
+    fn test_tags_builtin() {
+        // Spot-check some builtins
+        let builtins = [
+            BUILTIN_VA_START,
+            BUILTIN_VA_END,
+            BUILTIN_VA_ARG,
+            BUILTIN_VA_COPY,
+            BUILTIN_BSWAP16,
+            BUILTIN_MEMCPY,
+            BUILTIN_UNREACHABLE,
+            BUILTIN_EXPECT,
+            BUILTIN_VA_LIST,
+            OFFSETOF,
+            BUILTIN_OBJECT_SIZE,
+            C11_ATOMIC_LOAD,
+            C11_ATOMIC_STORE,
+            C11_ATOMIC_EXCHANGE,
+        ];
+        for &id in &builtins {
+            assert!(
+                has_tag(id, BUILTIN),
+                "'{}' should have BUILTIN",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+        // Count total builtins
+        let builtin_count = KEYWORD_TAGS.iter().filter(|&&t| t & BUILTIN != 0).count();
+        assert!(
+            builtin_count >= 68,
+            "expected at least 68 builtins, got {}",
+            builtin_count
+        );
+    }
+
+    #[test]
+    fn test_tags_supported_attr() {
+        let attrs = [
+            ATTR_NORETURN,
+            GNU_NORETURN,
+            ATTR_UNUSED,
+            GNU_ATTR_UNUSED,
+            ATTR_ALIGNED,
+            GNU_ATTR_ALIGNED,
+            ATTR_PACKED,
+            GNU_ATTR_PACKED,
+            ATTR_ALWAYS_INLINE,
+            GNU_ATTR_ALWAYS_INLINE,
+        ];
+        for &id in &attrs {
+            assert!(
+                has_tag(id, SUPPORTED_ATTR),
+                "'{}' should have SUPPORTED_ATTR",
+                KEYWORD_STRINGS[id.0 as usize - 1]
+            );
+        }
+    }
+
+    #[test]
+    fn test_has_tag_returns_false_for_dynamic() {
+        assert!(!has_tag(StringId(9999), TYPE_SPEC));
+        assert!(!has_tag(StringId(9999), BUILTIN));
+        assert!(!has_tag(StringId(9999), DECL_START));
+    }
+
+    #[test]
+    fn test_has_tag_returns_false_for_empty() {
+        assert!(!has_tag(StringId::EMPTY, TYPE_SPEC));
+        assert!(!has_tag(StringId::EMPTY, BUILTIN));
+        assert!(!has_tag(StringId::EMPTY, DECL_START));
+    }
+}
diff --git a/cc/lib.rs b/cc/lib.rs
index f8b663ae..4c382964 100644
--- a/cc/lib.rs
+++ b/cc/lib.rs
@@ -12,12 +12,15 @@
 // for use by other crates (cflow, ctags, cxref).
 //
 
+#![recursion_limit = "512"]
+
 pub mod abi;
 pub mod arch;
 pub mod builtin_headers;
 pub mod builtins;
 pub mod diag;
 pub mod ir;
+pub mod kw;
 pub mod opt;
 pub mod os;
 pub mod parse;
diff --git a/cc/main.rs b/cc/main.rs
index a89237da..44635767 100644
--- a/cc/main.rs
+++ b/cc/main.rs
@@ -9,12 +9,15 @@
 // pcc - A POSIX C99 compiler
 //
 
+#![recursion_limit = "512"]
+
 mod abi;
 mod arch;
 mod builtin_headers;
 mod builtins;
 mod diag;
 mod ir;
+mod kw;
 mod opt;
 mod os;
 mod parse;
diff --git a/cc/parse/expression.rs b/cc/parse/expression.rs
index 561305f4..fd97274b 100644
--- a/cc/parse/expression.rs
+++ b/cc/parse/expression.rs
@@ -687,16 +687,18 @@ impl<'a> Parser<'a> {
 
         // sizeof and _Alignof
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "sizeof" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::SIZEOF {
                     self.advance();
                     return self.parse_sizeof();
                 }
-                if name == "_Alignof"
-                    || name == "__alignof__"
-                    || name == "__alignof"
-                    || name == "alignof"
-                {
+                if matches!(
+                    name_id,
+                    crate::kw::ALIGNOF
+                        | crate::kw::GNU_ALIGNOF
+                        | crate::kw::GNU_ALIGNOF2
+                        | crate::kw::ALIGNOF_C23
+                ) {
                     self.advance();
                     return self.parse_alignof();
                 }
@@ -780,37 +782,8 @@ impl<'a> Parser<'a> {
     }
 
     /// Check if identifier is a type-starting keyword (for cast/sizeof disambiguation)
-    pub(crate) fn is_type_keyword(name: &str) -> bool {
-        matches!(
-            name,
-            "void"
-                | "_Bool"
-                | "_Complex"
-                | "_Atomic"
-                | "char"
-                | "short"
-                | "int"
-                | "long"
-                | "float"
-                | "double"
-                | "_Float16"
-                | "_Float32"
-                | "_Float64"
-                | "signed"
-                | "unsigned"
-                | "const"
-                | "volatile"
-                | "struct"
-                | "union"
-                | "enum"
-                | "__int128"
-                | "__int128_t"
-                | "__uint128_t"
-                | "__builtin_va_list"
-                | "typeof"
-                | "__typeof__"
-                | "__typeof"
-        )
+    pub(crate) fn is_type_keyword(id: crate::strings::StringId) -> bool {
+        crate::kw::has_tag(id, crate::kw::TYPE_KEYWORD)
     }
 
     /// Consume type qualifiers (const, volatile, restrict)
@@ -823,25 +796,24 @@ impl<'a> Parser<'a> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-            match name {
-                "const" | "__const" | "__const__" => {
+            match name_id {
+                crate::kw::CONST | crate::kw::GNU_CONST2 | crate::kw::GNU_CONST => {
                     self.advance();
                     mods |= TypeModifiers::CONST;
                 }
-                "volatile" | "__volatile" | "__volatile__" => {
+                crate::kw::VOLATILE | crate::kw::GNU_VOLATILE2 | crate::kw::GNU_VOLATILE => {
                     self.advance();
                     mods |= TypeModifiers::VOLATILE;
                 }
-                "restrict" | "__restrict" | "__restrict__" => {
+                crate::kw::RESTRICT | crate::kw::GNU_RESTRICT2 | crate::kw::GNU_RESTRICT => {
                     self.advance();
                     mods |= TypeModifiers::RESTRICT;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     mods |= TypeModifiers::ATOMIC;
                 }
-                n if super::is_nullability_qualifier(n) => {
+                _ if super::is_nullability_qualifier(name_id) => {
                     self.advance();
                 }
                 _ => break,
@@ -891,8 +863,7 @@ impl<'a> Parser<'a> {
 
         // Check if this looks like a type name (keyword or typedef)
         let name_id = self.get_ident_id(self.current())?;
-        let name = self.str(name_id);
-        if !Self::is_type_keyword(name) && self.symbols.lookup_typedef(name_id).is_none() {
+        if !Self::is_type_keyword(name_id) && self.symbols.lookup_typedef(name_id).is_none() {
             // Not a type keyword and not a typedef
             return None;
         }
@@ -914,35 +885,33 @@ impl<'a> Parser<'a> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-
-            match name {
-                "const" => {
+            match name_id {
+                crate::kw::CONST => {
                     self.advance();
                     modifiers |= TypeModifiers::CONST;
                     parsed_something = true;
                 }
-                "volatile" => {
+                crate::kw::VOLATILE => {
                     self.advance();
                     modifiers |= TypeModifiers::VOLATILE;
                     parsed_something = true;
                 }
-                "signed" => {
+                crate::kw::SIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::SIGNED;
                     parsed_something = true;
                 }
-                "unsigned" => {
+                crate::kw::UNSIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     parsed_something = true;
                 }
-                "_Complex" => {
+                crate::kw::COMPLEX => {
                     self.advance();
                     modifiers |= TypeModifiers::COMPLEX;
                     parsed_something = true;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     // _Atomic can be:
                     // 1. Type specifier: _Atomic(type-name)
@@ -971,7 +940,7 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "short" => {
+                crate::kw::SHORT => {
                     self.advance();
                     modifiers |= TypeModifiers::SHORT;
                     if base_kind.is_none() {
@@ -979,7 +948,7 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "long" => {
+                crate::kw::LONG => {
                     self.advance();
                     if modifiers.contains(TypeModifiers::LONG) {
                         modifiers |= TypeModifiers::LONGLONG;
@@ -995,17 +964,17 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "void" => {
+                crate::kw::VOID => {
                     self.advance();
                     base_kind = Some(TypeKind::Void);
                     parsed_something = true;
                 }
-                "char" => {
+                crate::kw::CHAR => {
                     self.advance();
                     base_kind = Some(TypeKind::Char);
                     parsed_something = true;
                 }
-                "int" => {
+                crate::kw::INT => {
                     self.advance();
                     if base_kind.is_none()
                         || !matches!(
@@ -1017,12 +986,12 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "float" => {
+                crate::kw::FLOAT => {
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                     parsed_something = true;
                 }
-                "double" => {
+                crate::kw::DOUBLE => {
                     self.advance();
                     // Handle long double
                     if modifiers.contains(TypeModifiers::LONG) {
@@ -1032,50 +1001,50 @@ impl<'a> Parser<'a> {
                     }
                     parsed_something = true;
                 }
-                "_Float16" => {
+                crate::kw::FLOAT16 => {
                     self.advance();
                     base_kind = Some(TypeKind::Float16);
                     parsed_something = true;
                 }
-                "_Float32" => {
+                crate::kw::FLOAT32 => {
                     // _Float32 is an alias for float (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                     parsed_something = true;
                 }
-                "_Float64" => {
+                crate::kw::FLOAT64 => {
                     // _Float64 is an alias for double (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Double);
                     parsed_something = true;
                 }
-                "_Bool" => {
+                crate::kw::BOOL => {
                     self.advance();
                     base_kind = Some(TypeKind::Bool);
                     parsed_something = true;
                 }
-                "__int128" => {
+                crate::kw::INT128 => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__int128_t" => {
+                crate::kw::INT128_T => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__uint128_t" => {
+                crate::kw::UINT128_T => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     base_kind = Some(TypeKind::Int128);
                     parsed_something = true;
                 }
-                "__builtin_va_list" => {
+                crate::kw::BUILTIN_VA_LIST => {
                     self.advance();
                     base_kind = Some(TypeKind::VaList);
                     parsed_something = true;
                 }
-                "typeof" | "__typeof__" | "__typeof" => {
+                crate::kw::TYPEOF | crate::kw::GNU_TYPEOF | crate::kw::GNU_TYPEOF2 => {
                     self.advance(); // consume typeof
                     if !self.is_special(b'(') {
                         return None;
@@ -1105,7 +1074,7 @@ impl<'a> Parser<'a> {
                     let expr_type = expr.typ.unwrap_or(self.types.int_id);
                     return Some(self.parse_pointer_chain(expr_type));
                 }
-                "struct" => {
+                crate::kw::STRUCT => {
                     self.advance(); // consume 'struct'
                                     // For struct tag reference, look up directly in symbol table
                     if let Some(tag_name) = self.get_ident_id(self.current()) {
@@ -1168,7 +1137,7 @@ impl<'a> Parser<'a> {
                     }
                     return None;
                 }
-                "union" => {
+                crate::kw::UNION => {
                     self.advance(); // consume 'union'
                                     // For union tag reference, look up directly in symbol table
                     if let Some(tag_name) = self.get_ident_id(self.current()) {
@@ -1231,7 +1200,7 @@ impl<'a> Parser<'a> {
                     }
                     return None;
                 }
-                "enum" => {
+                crate::kw::ENUM => {
                     if let Ok(enum_type) = self.parse_enum_specifier() {
                         let mut typ = enum_type;
                         typ.modifiers |= modifiers | self.consume_type_qualifiers();
@@ -1779,11 +1748,10 @@ impl<'a> Parser<'a> {
                 let token_pos = token.pos;
                 if let TokenValue::Ident(id) = &token.value {
                     let name_id = *id;
-                    let name_str = self.idents.get_opt(name_id).unwrap_or("");
 
                     // Check for varargs builtins that need special parsing
-                    match name_str {
-                        "__builtin_va_start" => {
+                    match name_id {
+                        crate::kw::BUILTIN_VA_START => {
                             // __builtin_va_start(ap, last_param)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1800,7 +1768,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_arg" => {
+                        crate::kw::BUILTIN_VA_ARG => {
                             // __builtin_va_arg(ap, type)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1817,7 +1785,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_end" => {
+                        crate::kw::BUILTIN_VA_END => {
                             // __builtin_va_end(ap)
                             self.expect_special(b'(')?;
                             let ap = self.parse_assignment_expr()?;
@@ -1828,7 +1796,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_va_copy" => {
+                        crate::kw::BUILTIN_VA_COPY => {
                             // __builtin_va_copy(dest, src)
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -1844,7 +1812,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap16" => {
+                        crate::kw::BUILTIN_BSWAP16 => {
                             // __builtin_bswap16(x) - returns uint16_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1855,7 +1823,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap32" => {
+                        crate::kw::BUILTIN_BSWAP32 => {
                             // __builtin_bswap32(x) - returns uint32_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1866,7 +1834,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_bswap64" => {
+                        crate::kw::BUILTIN_BSWAP64 => {
                             // __builtin_bswap64(x) - returns uint64_t
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1877,7 +1845,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctz" => {
+                        crate::kw::BUILTIN_CTZ => {
                             // __builtin_ctz(x) - returns int, counts trailing zeros in unsigned int
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1889,7 +1857,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctzl" => {
+                        crate::kw::BUILTIN_CTZL => {
                             // __builtin_ctzl(x) - returns int, counts trailing zeros in unsigned long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1901,7 +1869,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_ctzll" => {
+                        crate::kw::BUILTIN_CTZLL => {
                             // __builtin_ctzll(x) - returns int, counts trailing zeros in unsigned long long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1913,7 +1881,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clz" => {
+                        crate::kw::BUILTIN_CLZ => {
                             // __builtin_clz(x) - returns int, counts leading zeros in unsigned int
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1925,7 +1893,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clzl" => {
+                        crate::kw::BUILTIN_CLZL => {
                             // __builtin_clzl(x) - returns int, counts leading zeros in unsigned long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1937,7 +1905,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_clzll" => {
+                        crate::kw::BUILTIN_CLZLL => {
                             // __builtin_clzll(x) - returns int, counts leading zeros in unsigned long long
                             // Result is undefined if x is 0
                             self.expect_special(b'(')?;
@@ -1949,7 +1917,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcount" => {
+                        crate::kw::BUILTIN_POPCOUNT => {
                             // __builtin_popcount(x) - returns int, counts set bits in unsigned int
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1960,7 +1928,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcountl" => {
+                        crate::kw::BUILTIN_POPCOUNTL => {
                             // __builtin_popcountl(x) - returns int, counts set bits in unsigned long
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1971,7 +1939,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_popcountll" => {
+                        crate::kw::BUILTIN_POPCOUNTLL => {
                             // __builtin_popcountll(x) - returns int, counts set bits in unsigned long long
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
@@ -1982,7 +1950,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_alloca" => {
+                        crate::kw::BUILTIN_ALLOCA => {
                             // __builtin_alloca(size) - returns void*
                             self.expect_special(b'(')?;
                             let size = self.parse_assignment_expr()?;
@@ -1996,7 +1964,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Memory builtins - generate calls to C library functions
-                        "__builtin_memset" => {
+                        crate::kw::BUILTIN_MEMSET => {
                             // __builtin_memset(dest, c, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2015,7 +1983,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_memcpy" => {
+                        crate::kw::BUILTIN_MEMCPY => {
                             // __builtin_memcpy(dest, src, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2034,7 +2002,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_memmove" => {
+                        crate::kw::BUILTIN_MEMMOVE => {
                             // __builtin_memmove(dest, src, n) - returns void*
                             self.expect_special(b'(')?;
                             let dest = self.parse_assignment_expr()?;
@@ -2054,7 +2022,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Infinity builtins - return float constants
-                        "__builtin_inf" | "__builtin_huge_val" => {
+                        crate::kw::BUILTIN_INF | crate::kw::BUILTIN_HUGE_VAL => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2063,7 +2031,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_inff" | "__builtin_huge_valf" => {
+                        crate::kw::BUILTIN_INFF | crate::kw::BUILTIN_HUGE_VALF => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2072,7 +2040,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_infl" | "__builtin_huge_vall" => {
+                        crate::kw::BUILTIN_INFL | crate::kw::BUILTIN_HUGE_VALL => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2083,7 +2051,7 @@ impl<'a> Parser<'a> {
                         }
                         // NaN builtins - returns quiet NaN
                         // The string argument is typically empty "" for quiet NaN
-                        "__builtin_nan" | "__builtin_nans" => {
+                        crate::kw::BUILTIN_NAN | crate::kw::BUILTIN_NANS => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2093,7 +2061,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_nanf" | "__builtin_nansf" => {
+                        crate::kw::BUILTIN_NANF | crate::kw::BUILTIN_NANSF => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2103,7 +2071,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_nanl" | "__builtin_nansl" => {
+                        crate::kw::BUILTIN_NANL | crate::kw::BUILTIN_NANSL => {
                             self.expect_special(b'(')?;
                             let _arg = self.parse_assignment_expr()?; // string argument (ignored)
                             self.expect_special(b')')?;
@@ -2114,7 +2082,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // FLT_ROUNDS - returns current rounding mode (1 = to nearest)
-                        "__builtin_flt_rounds" => {
+                        crate::kw::BUILTIN_FLT_ROUNDS => {
                             self.expect_special(b'(')?;
                             self.expect_special(b')')?;
                             return Ok(Self::typed_expr(
@@ -2124,7 +2092,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Fabs builtins - absolute value for floats
-                        "__builtin_fabs" => {
+                        crate::kw::BUILTIN_FABS => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2134,7 +2102,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_fabsf" => {
+                        crate::kw::BUILTIN_FABSF => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2144,7 +2112,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_fabsl" => {
+                        crate::kw::BUILTIN_FABSL => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2155,7 +2123,7 @@ impl<'a> Parser<'a> {
                             ));
                         }
                         // Signbit builtins - test sign bit of floats
-                        "__builtin_signbit" => {
+                        crate::kw::BUILTIN_SIGNBIT => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2165,7 +2133,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_signbitf" => {
+                        crate::kw::BUILTIN_SIGNBITF => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2175,7 +2143,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_signbitl" => {
+                        crate::kw::BUILTIN_SIGNBITL => {
                             self.expect_special(b'(')?;
                             let arg = self.parse_assignment_expr()?;
                             self.expect_special(b')')?;
@@ -2185,7 +2153,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_unreachable" => {
+                        crate::kw::BUILTIN_UNREACHABLE => {
                             // __builtin_unreachable() - marks code as unreachable
                             // Takes no arguments, returns void
                             // Behavior is undefined if actually reached at runtime
@@ -2197,7 +2165,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_constant_p" => {
+                        crate::kw::BUILTIN_CONSTANT_P => {
                             // __builtin_constant_p(expr) - returns 1 if expr is a constant, 0 otherwise
                             // This is evaluated at compile time, not runtime
                             self.expect_special(b'(')?;
@@ -2211,7 +2179,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_expect" => {
+                        crate::kw::BUILTIN_EXPECT => {
                             // __builtin_expect(expr, c) - branch prediction hint
                             // Returns expr, the second argument is the expected value (for optimization hints)
                             // We just return expr since we don't do branch prediction optimization
@@ -2222,7 +2190,7 @@ impl<'a> Parser<'a> {
                             self.expect_special(b')')?;
                             return Ok(expr);
                         }
-                        "__builtin_assume_aligned" => {
+                        crate::kw::BUILTIN_ASSUME_ALIGNED => {
                             // __builtin_assume_aligned(ptr, align) or
                             // __builtin_assume_aligned(ptr, align, offset)
                             // Returns ptr, hints that ptr is aligned to align bytes
@@ -2239,7 +2207,7 @@ impl<'a> Parser<'a> {
                             self.expect_special(b')')?;
                             return Ok(ptr);
                         }
-                        "__builtin_prefetch" => {
+                        crate::kw::BUILTIN_PREFETCH => {
                             // __builtin_prefetch(addr) or
                             // __builtin_prefetch(addr, rw) or
                             // __builtin_prefetch(addr, rw, locality)
@@ -2264,7 +2232,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_types_compatible_p" => {
+                        crate::kw::BUILTIN_TYPES_COMPATIBLE_P => {
                             // __builtin_types_compatible_p(type1, type2) - returns 1 if types are compatible
                             // This is evaluated at compile time, ignoring top-level qualifiers
                             self.expect_special(b'(')?;
@@ -2280,7 +2248,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_frame_address" => {
+                        crate::kw::BUILTIN_FRAME_ADDRESS => {
                             // __builtin_frame_address(level) - returns void*, address of frame at level
                             // Level 0 is the current frame, 1 is the caller's frame, etc.
                             // Returns NULL for invalid levels (beyond stack bounds)
@@ -2295,7 +2263,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_return_address" => {
+                        crate::kw::BUILTIN_RETURN_ADDRESS => {
                             // __builtin_return_address(level) - returns void*, return address at level
                             // Level 0 is the current function's return address
                             // Returns NULL for invalid levels (beyond stack bounds)
@@ -2310,7 +2278,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "setjmp" | "_setjmp" => {
+                        crate::kw::SETJMP | crate::kw::SETJMP2 => {
                             // setjmp(env) - saves execution context, returns int
                             // Returns 0 on direct call, non-zero when returning via longjmp
                             self.expect_special(b'(')?;
@@ -2322,7 +2290,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "longjmp" | "_longjmp" => {
+                        crate::kw::LONGJMP | crate::kw::LONGJMP2 => {
                             // longjmp(env, val) - restores execution context (never returns)
                             // Causes corresponding setjmp to return val (or 1 if val == 0)
                             self.expect_special(b'(')?;
@@ -2339,7 +2307,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_offsetof" | "offsetof" => {
+                        crate::kw::BUILTIN_OFFSETOF | crate::kw::OFFSETOF => {
                             // __builtin_offsetof(type, member-designator)
                             // Returns the byte offset of a member within a struct/union
                             // member-designator can be .field or [index] chains
@@ -2395,7 +2363,7 @@ impl<'a> Parser<'a> {
                         // ================================================================
                         // Atomic builtins (Clang __c11_atomic_* for C11 stdatomic.h)
                         // ================================================================
-                        "__c11_atomic_init" => {
+                        crate::kw::C11_ATOMIC_INIT => {
                             // __c11_atomic_init(ptr, val) - initialize atomic (no ordering)
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2411,7 +2379,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_load" => {
+                        crate::kw::C11_ATOMIC_LOAD => {
                             // __c11_atomic_load(ptr, order) - returns *ptr atomically
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2431,7 +2399,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_store" => {
+                        crate::kw::C11_ATOMIC_STORE => {
                             // __c11_atomic_store(ptr, val, order) - *ptr = val atomically
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2450,7 +2418,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_exchange" => {
+                        crate::kw::C11_ATOMIC_EXCHANGE => {
                             // __c11_atomic_exchange(ptr, val, order) - swap and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2473,7 +2441,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_compare_exchange_strong" => {
+                        crate::kw::C11_ATOMIC_COMPARE_EXCHANGE_STRONG => {
                             // __c11_atomic_compare_exchange_strong(ptr, expected, desired, succ, fail)
                             // Note: fail_order is parsed but ignored (we use succ_order for both)
                             self.expect_special(b'(')?;
@@ -2499,7 +2467,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_compare_exchange_weak" => {
+                        crate::kw::C11_ATOMIC_COMPARE_EXCHANGE_WEAK => {
                             // __c11_atomic_compare_exchange_weak(ptr, expected, desired, succ, fail)
                             // Note: Implemented as strong (no spurious failures)
                             self.expect_special(b'(')?;
@@ -2525,7 +2493,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_add" => {
+                        crate::kw::C11_ATOMIC_FETCH_ADD => {
                             // __c11_atomic_fetch_add(ptr, val, order) - add and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2548,7 +2516,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_sub" => {
+                        crate::kw::C11_ATOMIC_FETCH_SUB => {
                             // __c11_atomic_fetch_sub(ptr, val, order) - subtract and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2571,7 +2539,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_and" => {
+                        crate::kw::C11_ATOMIC_FETCH_AND => {
                             // __c11_atomic_fetch_and(ptr, val, order) - AND and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2594,7 +2562,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_or" => {
+                        crate::kw::C11_ATOMIC_FETCH_OR => {
                             // __c11_atomic_fetch_or(ptr, val, order) - OR and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2617,7 +2585,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_fetch_xor" => {
+                        crate::kw::C11_ATOMIC_FETCH_XOR => {
                             // __c11_atomic_fetch_xor(ptr, val, order) - XOR and return old
                             self.expect_special(b'(')?;
                             let ptr = self.parse_assignment_expr()?;
@@ -2640,7 +2608,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_thread_fence" => {
+                        crate::kw::C11_ATOMIC_THREAD_FENCE => {
                             // __c11_atomic_thread_fence(order) - memory fence
                             self.expect_special(b'(')?;
                             let order = self.parse_assignment_expr()?;
@@ -2653,7 +2621,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__c11_atomic_signal_fence" => {
+                        crate::kw::C11_ATOMIC_SIGNAL_FENCE => {
                             // __c11_atomic_signal_fence(order) - compiler barrier
                             self.expect_special(b'(')?;
                             let order = self.parse_assignment_expr()?;
@@ -2666,7 +2634,7 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        "__builtin_object_size" => {
+                        crate::kw::BUILTIN_OBJECT_SIZE => {
                             // __builtin_object_size(ptr, type) - returns (size_t)-1
                             // at compile time without optimization (conservative "don't know")
                             self.expect_special(b'(')?;
@@ -2680,63 +2648,70 @@ impl<'a> Parser<'a> {
                                 token_pos,
                             ));
                         }
-                        name if name.starts_with("__builtin___") => {
-                            // Fortified builtins: __builtin___snprintf_chk etc.
-                            // Strip __builtin_ prefix → __snprintf_chk, which is a
-                            // real libc function (declared by macOS/glibc headers).
-                            let real_name = &name["__builtin_".len()..];
-                            // Parse arguments first (must consume tokens regardless)
-                            self.expect_special(b'(')?;
-                            let mut args = Vec::new();
-                            if !self.is_special(b')') {
-                                args.push(self.parse_assignment_expr()?);
-                                while self.is_special(b',') {
-                                    self.advance();
+                        _ => {
+                            let name_str = self.idents.get_opt(name_id).unwrap_or("");
+                            if name_str.starts_with("__builtin___") {
+                                // Fortified builtins: __builtin___snprintf_chk etc.
+                                // Strip __builtin_ prefix → __snprintf_chk, which is a
+                                // real libc function (declared by macOS/glibc headers).
+                                let real_name = &name_str["__builtin_".len()..];
+                                // Parse arguments first (must consume tokens regardless)
+                                self.expect_special(b'(')?;
+                                let mut args = Vec::new();
+                                if !self.is_special(b')') {
                                     args.push(self.parse_assignment_expr()?);
+                                    while self.is_special(b',') {
+                                        self.advance();
+                                        args.push(self.parse_assignment_expr()?);
+                                    }
                                 }
-                            }
-                            self.expect_special(b')')?;
-                            // Look up the real function by its de-prefixed name
-                            let real_name_id = self.idents.lookup(real_name);
-                            let symbol_id = real_name_id.and_then(|id| {
-                                self.symbols
-                                    .lookup_id(id, crate::symbol::Namespace::Ordinary)
-                            });
-                            if let Some(symbol_id) = symbol_id {
-                                let func_type = self.symbols.get(symbol_id).typ;
-                                let ret_type =
-                                    self.types.base_type(func_type).unwrap_or(self.types.int_id);
-                                let func_expr = Self::typed_expr(
-                                    ExprKind::Ident(symbol_id),
-                                    func_type,
+                                self.expect_special(b')')?;
+                                // Look up the real function by its de-prefixed name
+                                let real_name_id = self.idents.lookup(real_name);
+                                let symbol_id = real_name_id.and_then(|id| {
+                                    self.symbols
+                                        .lookup_id(id, crate::symbol::Namespace::Ordinary)
+                                });
+                                if let Some(symbol_id) = symbol_id {
+                                    let func_type = self.symbols.get(symbol_id).typ;
+                                    let ret_type = self
+                                        .types
+                                        .base_type(func_type)
+                                        .unwrap_or(self.types.int_id);
+                                    let func_expr = Self::typed_expr(
+                                        ExprKind::Ident(symbol_id),
+                                        func_type,
+                                        token_pos,
+                                    );
+                                    return Ok(Self::typed_expr(
+                                        ExprKind::Call {
+                                            func: Box::new(func_expr),
+                                            args,
+                                        },
+                                        ret_type,
+                                        token_pos,
+                                    ));
+                                }
+                                // Not declared — return 0 as fallback
+                                diag::error(
                                     token_pos,
+                                    &format!("undeclared function '{}'", real_name),
                                 );
                                 return Ok(Self::typed_expr(
-                                    ExprKind::Call {
-                                        func: Box::new(func_expr),
-                                        args,
-                                    },
-                                    ret_type,
+                                    ExprKind::IntLit(0),
+                                    self.types.int_id,
                                     token_pos,
                                 ));
                             }
-                            // Not declared — return 0 as fallback
-                            diag::error(token_pos, &format!("undeclared function '{}'", real_name));
-                            return Ok(Self::typed_expr(
-                                ExprKind::IntLit(0),
-                                self.types.int_id,
-                                token_pos,
-                            ));
                         }
-                        _ => {}
                     }
 
                     // Look up symbol to get type (during parsing, symbol is in scope)
                     // C99 6.4.2.2: __func__ is a predefined identifier with type const char[]
                     // GCC extensions: __FUNCTION__ and __PRETTY_FUNCTION__ behave similarly
-                    if name_str == "__func__"
-                        || name_str == "__FUNCTION__"
-                        || name_str == "__PRETTY_FUNCTION__"
+                    if name_id == crate::kw::FUNC
+                        || name_id == crate::kw::FUNCTION
+                        || name_id == crate::kw::PRETTY_FUNCTION
                     {
                         // These behave like a string literal (const char[])
                         // Linearization handles mapping to __func__ behavior
@@ -2766,6 +2741,7 @@ impl<'a> Parser<'a> {
                     } else {
                         // C99 6.5.1: Undeclared identifier is an error
                         // (implicit int was removed in C99)
+                        let name_str = self.idents.get_opt(name_id).unwrap_or("");
                         diag::error(token_pos, &format!("undeclared identifier '{}'", name_str));
                         // Return a dummy expression to continue parsing
                         Ok(Self::typed_expr(
diff --git a/cc/parse/mod.rs b/cc/parse/mod.rs
index 86049761..8cc41111 100644
--- a/cc/parse/mod.rs
+++ b/cc/parse/mod.rs
@@ -19,16 +19,8 @@ mod test_parser;
 // Re-export parser used by main.rs
 pub use parser::Parser;
 
-/// Check if a name is a C11 nullability qualifier.
+/// Check if a StringId is a C11 nullability qualifier.
 /// Single source of truth — used by all qualifier-parsing paths.
-pub(crate) fn is_nullability_qualifier(name: &str) -> bool {
-    matches!(
-        name,
-        "_Nonnull"
-            | "__nonnull"
-            | "_Nullable"
-            | "__nullable"
-            | "_Null_unspecified"
-            | "__null_unspecified"
-    )
+pub(crate) fn is_nullability_qualifier(id: crate::strings::StringId) -> bool {
+    crate::kw::has_tag(id, crate::kw::NULLABILITY)
 }
diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs
index aed2475a..5d2f14e9 100644
--- a/cc/parse/parser.rs
+++ b/cc/parse/parser.rs
@@ -391,7 +391,7 @@ impl<'a> Parser<'a> {
         if self.peek() == TokenType::Ident {
             if let Some(name_id) = self.get_ident_id(self.current()) {
                 let is_type = self.symbols.lookup_typedef(name_id).is_some()
-                    || Self::is_type_keyword(self.str(name_id));
+                    || crate::kw::has_tag(name_id, crate::kw::TYPE_KEYWORD);
                 // If not a type, this is a grouped declarator
                 return !is_type;
             }
@@ -491,8 +491,8 @@ impl<'a> Parser<'a> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name) = self.get_ident_name(self.current()) {
-            name == "__attribute__" || name == "__attribute"
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ATTR_KW)
         } else {
             false
         }
@@ -645,8 +645,8 @@ impl<'a> Parser<'a> {
     fn is_nullability_qualifier(&self) -> bool {
         self.peek() == TokenType::Ident
             && self
-                .get_ident_name(self.current())
-                .is_some_and(|n| super::is_nullability_qualifier(n.as_str()))
+                .get_ident_id(self.current())
+                .is_some_and(|id| super::is_nullability_qualifier(id))
     }
 
     /// Check if current token is __asm or __asm__
@@ -654,8 +654,8 @@ impl<'a> Parser<'a> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name) = self.get_ident_name(self.current()) {
-            name == "__asm__" || name == "__asm" || name == "asm"
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ASM_KW)
         } else {
             false
         }
@@ -695,18 +695,16 @@ impl<'a> Parser<'a> {
         let mut is_volatile = false;
         let mut _is_goto = false;
         while self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                match name.as_str() {
-                    "volatile" | "__volatile__" => {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                match name_id {
+                    crate::kw::VOLATILE | crate::kw::GNU_VOLATILE => {
                         is_volatile = true;
                         self.advance();
                     }
-                    "inline" | "__inline__" => {
-                        // inline qualifier - just consume it (affects inlining decisions)
+                    crate::kw::INLINE | crate::kw::GNU_INLINE => {
                         self.advance();
                     }
-                    "goto" => {
-                        // goto qualifier - indicates asm can jump to C labels
+                    crate::kw::GOTO => {
                         _is_goto = true;
                         self.advance();
                     }
@@ -948,34 +946,34 @@ impl Parser<'_> {
     pub fn parse_statement(&mut self) -> ParseResult<Stmt> {
         // Check for keywords
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                match name.as_str() {
-                    "if" => return self.parse_if_stmt(),
-                    "while" => return self.parse_while_stmt(),
-                    "do" => return self.parse_do_while_stmt(),
-                    "for" => return self.parse_for_stmt(),
-                    "return" => return self.parse_return_stmt(),
-                    "break" => {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                match name_id {
+                    crate::kw::IF => return self.parse_if_stmt(),
+                    crate::kw::WHILE => return self.parse_while_stmt(),
+                    crate::kw::DO => return self.parse_do_while_stmt(),
+                    crate::kw::FOR => return self.parse_for_stmt(),
+                    crate::kw::RETURN => return self.parse_return_stmt(),
+                    crate::kw::BREAK => {
                         self.advance();
                         self.expect_special(b';')?;
                         return Ok(Stmt::Break);
                     }
-                    "continue" => {
+                    crate::kw::CONTINUE => {
                         self.advance();
                         self.expect_special(b';')?;
                         return Ok(Stmt::Continue);
                     }
-                    "goto" => {
+                    crate::kw::GOTO => {
                         self.advance();
                         let label = self.expect_identifier()?;
                         self.expect_special(b';')?;
                         return Ok(Stmt::Goto(label));
                     }
-                    "switch" => return self.parse_switch_stmt(),
-                    "case" => return self.parse_case_label(),
-                    "default" => return self.parse_default_label(),
+                    crate::kw::SWITCH => return self.parse_switch_stmt(),
+                    crate::kw::CASE => return self.parse_case_label(),
+                    crate::kw::DEFAULT => return self.parse_default_label(),
                     // GCC extended inline assembly
-                    "__asm__" | "__asm" | "asm" => {
+                    crate::kw::ASM | crate::kw::GNU_ASM | crate::kw::GNU_ASM2 => {
                         return self.parse_asm_statement();
                     }
                     _ => {}
@@ -1026,8 +1024,8 @@ impl Parser<'_> {
         let then_stmt = self.parse_statement()?;
 
         let else_stmt = if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "else" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::ELSE {
                     self.advance();
                     Some(Box::new(self.parse_statement()?))
                 } else {
@@ -1303,55 +1301,7 @@ impl Parser<'_> {
         }
 
         if let Some(name_id) = self.get_ident_id(self.current()) {
-            let name = self.str(name_id);
-            // Check for type keywords first
-            if matches!(
-                name,
-                "void"
-                    | "char"
-                    | "short"
-                    | "int"
-                    | "long"
-                    | "float"
-                    | "double"
-                    | "_Float16"
-                    | "_Float32"
-                    | "_Float64"
-                    | "_Complex"
-                    | "_Atomic"
-                    | "_Alignas"
-                    | "signed"
-                    | "unsigned"
-                    | "const"
-                    | "volatile"
-                    | "static"
-                    | "extern"
-                    | "auto"
-                    | "register"
-                    | "typedef"
-                    | "inline"
-                    | "__inline"
-                    | "__inline__"
-                    | "_Noreturn"
-                    | "__noreturn__"
-                    | "struct"
-                    | "union"
-                    | "enum"
-                    | "_Bool"
-                    | "__attribute__"
-                    | "__attribute"
-                    | "__int128"
-                    | "__int128_t"
-                    | "__uint128_t"
-                    | "__builtin_va_list"
-                    | "typeof"
-                    | "__typeof__"
-                    | "__typeof"
-                    | "_Thread_local"
-                    | "__thread"
-                    | "_Static_assert"
-                    | "static_assert"
-            ) {
+            if crate::kw::has_tag(name_id, crate::kw::DECL_START) {
                 return true;
             }
             // Also check for typedef names
@@ -1716,67 +1666,65 @@ impl Parser<'_> {
                 Some(id) => id,
                 None => break,
             };
-            let name = self.str(name_id);
-
-            match name {
+            match name_id {
                 // Skip __attribute__ in the type specifier loop
-                "__attribute__" | "__attribute" => {
+                crate::kw::GNU_ATTRIBUTE | crate::kw::GNU_ATTRIBUTE2 => {
                     self.skip_extensions();
                     continue;
                 }
-                "const" => {
+                crate::kw::CONST => {
                     self.advance();
                     modifiers |= TypeModifiers::CONST;
                 }
-                "volatile" => {
+                crate::kw::VOLATILE => {
                     self.advance();
                     modifiers |= TypeModifiers::VOLATILE;
                 }
-                "static" => {
+                crate::kw::STATIC => {
                     self.advance();
                     modifiers |= TypeModifiers::STATIC;
                 }
-                "extern" => {
+                crate::kw::EXTERN => {
                     self.advance();
                     modifiers |= TypeModifiers::EXTERN;
                 }
-                "register" => {
+                crate::kw::REGISTER => {
                     self.advance();
                     modifiers |= TypeModifiers::REGISTER;
                 }
-                "auto" => {
+                crate::kw::AUTO => {
                     self.advance();
                     modifiers |= TypeModifiers::AUTO;
                 }
-                "typedef" => {
+                crate::kw::TYPEDEF => {
                     self.advance();
                     modifiers |= TypeModifiers::TYPEDEF;
                 }
-                "_Thread_local" | "__thread" => {
+                crate::kw::THREAD_LOCAL | crate::kw::GNU_THREAD => {
                     self.advance();
                     modifiers |= TypeModifiers::THREAD_LOCAL;
                 }
-                "inline" | "__inline" | "__inline__" => {
+                crate::kw::INLINE | crate::kw::GNU_INLINE2 | crate::kw::GNU_INLINE => {
                     self.advance();
                     modifiers |= TypeModifiers::INLINE;
                 }
-                "_Noreturn" | "__noreturn__" => {
+                crate::kw::NORETURN | crate::kw::GNU_NORETURN => {
                     self.advance();
                     modifiers |= TypeModifiers::NORETURN;
                 }
-                "signed" => {
+                crate::kw::SIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::SIGNED;
                 }
-                "unsigned" => {
+                crate::kw::UNSIGNED => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                 }
-                "_Complex" => {
+                crate::kw::COMPLEX => {
                     self.advance();
                     modifiers |= TypeModifiers::COMPLEX;
                 }
-                "_Atomic" => {
+                crate::kw::ATOMIC => {
                     self.advance();
                     // _Atomic can be:
                     // 1. Type specifier: _Atomic(type-name)
@@ -1803,7 +1751,7 @@ impl Parser<'_> {
                         modifiers |= TypeModifiers::ATOMIC;
                     }
                 }
-                "_Alignas" => {
+                crate::kw::ALIGNAS => {
                     // C11 alignment specifier: _Alignas(type-name) or _Alignas(constant-expression)
                     let alignas_pos = self.current_pos();
                     self.advance();
@@ -1837,14 +1785,14 @@ impl Parser<'_> {
                         }
                     }
                 }
-                "short" => {
+                crate::kw::SHORT => {
                     self.advance();
                     modifiers |= TypeModifiers::SHORT;
                     if base_kind.is_none() {
                         base_kind = Some(TypeKind::Short);
                     }
                 }
-                "long" => {
+                crate::kw::LONG => {
                     self.advance();
                     if modifiers.contains(TypeModifiers::LONG) {
                         modifiers |= TypeModifiers::LONGLONG;
@@ -1859,15 +1807,15 @@ impl Parser<'_> {
                         }
                     }
                 }
-                "void" => {
+                crate::kw::VOID => {
                     self.advance();
                     base_kind = Some(TypeKind::Void);
                 }
-                "char" => {
+                crate::kw::CHAR => {
                     self.advance();
                     base_kind = Some(TypeKind::Char);
                 }
-                "int" => {
+                crate::kw::INT => {
                     self.advance();
                     if base_kind.is_none()
                         || !matches!(
@@ -1878,11 +1826,11 @@ impl Parser<'_> {
                         base_kind = Some(TypeKind::Int);
                     }
                 }
-                "float" => {
+                crate::kw::FLOAT => {
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                 }
-                "double" => {
+                crate::kw::DOUBLE => {
                     self.advance();
                     // Handle long double
                     if modifiers.contains(TypeModifiers::LONG) {
@@ -1891,42 +1839,42 @@ impl Parser<'_> {
                         base_kind = Some(TypeKind::Double);
                     }
                 }
-                "_Float16" => {
+                crate::kw::FLOAT16 => {
                     self.advance();
                     base_kind = Some(TypeKind::Float16);
                 }
-                "_Float32" => {
+                crate::kw::FLOAT32 => {
                     // _Float32 is an alias for float (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Float);
                 }
-                "_Float64" => {
+                crate::kw::FLOAT64 => {
                     // _Float64 is an alias for double (TS 18661-3 / C23)
                     self.advance();
                     base_kind = Some(TypeKind::Double);
                 }
-                "_Bool" => {
+                crate::kw::BOOL => {
                     self.advance();
                     base_kind = Some(TypeKind::Bool);
                 }
-                "__int128" => {
+                crate::kw::INT128 => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__int128_t" => {
+                crate::kw::INT128_T => {
                     self.advance();
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__uint128_t" => {
+                crate::kw::UINT128_T => {
                     self.advance();
                     modifiers |= TypeModifiers::UNSIGNED;
                     base_kind = Some(TypeKind::Int128);
                 }
-                "__builtin_va_list" => {
+                crate::kw::BUILTIN_VA_LIST => {
                     self.advance();
                     base_kind = Some(TypeKind::VaList);
                 }
-                "typeof" | "__typeof__" | "__typeof" => {
+                crate::kw::TYPEOF | crate::kw::GNU_TYPEOF | crate::kw::GNU_TYPEOF2 => {
                     self.advance(); // consume typeof
                     self.expect_special(b'(')?;
 
@@ -1954,7 +1902,7 @@ impl Parser<'_> {
                         ..result_type
                     });
                 }
-                "enum" => {
+                crate::kw::ENUM => {
                     let mut enum_type = self.parse_enum_specifier()?;
                     // Consume trailing qualifiers (e.g., "enum foo const")
                     let trailing_mods = self.consume_type_qualifiers();
@@ -1962,14 +1910,14 @@ impl Parser<'_> {
                     enum_type.modifiers |= modifiers | trailing_mods;
                     return Ok(enum_type);
                 }
-                "struct" => {
+                crate::kw::STRUCT => {
                     let mut struct_type = self.parse_struct_or_union_specifier(false)?;
                     // Consume trailing qualifiers (e.g., "struct foo const")
                     let trailing_mods = self.consume_type_qualifiers();
                     struct_type.modifiers |= modifiers | trailing_mods;
                     return Ok(struct_type);
                 }
-                "union" => {
+                crate::kw::UNION => {
                     let mut union_type = self.parse_struct_or_union_specifier(true)?;
                     // Consume trailing qualifiers (e.g., "union foo const")
                     let trailing_mods = self.consume_type_qualifiers();
@@ -2418,25 +2366,25 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers (const, volatile, restrict, _Atomic, nullability)
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        "_Atomic" => {
+                        crate::kw::ATOMIC => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::ATOMIC;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
@@ -2509,9 +2457,12 @@ impl Parser<'_> {
             // Parse optional qualifiers and static (C99 6.7.5.3)
             // These are valid in function parameter array declarators
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "static" | "const" | "volatile" | "restrict" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::STATIC
+                        | crate::kw::CONST
+                        | crate::kw::VOLATILE
+                        | crate::kw::RESTRICT => {
                             self.advance();
                         }
                         _ => break,
@@ -2770,21 +2721,21 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
@@ -2875,8 +2826,8 @@ impl Parser<'_> {
 
         // Check for (void)
         if self.peek() == TokenType::Ident {
-            if let Some(name) = self.get_ident_name(self.current()) {
-                if name == "void" {
+            if let Some(name_id) = self.get_ident_id(self.current()) {
+                if name_id == crate::kw::VOID {
                     let saved_pos = self.pos;
                     self.advance();
                     if self.is_special(b')') {
@@ -2993,9 +2944,8 @@ impl Parser<'_> {
         if self.peek() != TokenType::Ident {
             return false;
         }
-        if let Some(name_id) = self.get_ident_id(self.current()) {
-            let name = self.str(name_id);
-            matches!(name, "_Static_assert" | "static_assert")
+        if let Some(id) = self.get_ident_id(self.current()) {
+            crate::kw::has_tag(id, crate::kw::ASSERT_KW)
         } else {
             false
         }
@@ -3249,21 +3199,21 @@ impl Parser<'_> {
 
             // Parse pointer qualifiers
             while self.peek() == TokenType::Ident {
-                if let Some(name) = self.get_ident_name(self.current()) {
-                    match name.as_str() {
-                        "const" => {
+                if let Some(name_id) = self.get_ident_id(self.current()) {
+                    match name_id {
+                        crate::kw::CONST => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::CONST;
                         }
-                        "volatile" => {
+                        crate::kw::VOLATILE => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::VOLATILE;
                         }
-                        "restrict" => {
+                        crate::kw::RESTRICT => {
                             self.advance();
                             ptr_modifiers |= TypeModifiers::RESTRICT;
                         }
-                        n if super::is_nullability_qualifier(n) => {
+                        _ if super::is_nullability_qualifier(name_id) => {
                             self.advance();
                         }
                         _ => break,
diff --git a/cc/strings.rs b/cc/strings.rs
index 333c0293..84d10d2d 100644
--- a/cc/strings.rs
+++ b/cc/strings.rs
@@ -85,6 +85,13 @@ impl StringTable {
         // Pre-intern empty string as ID 0
         let empty_id = table.intern_internal("");
         debug_assert_eq!(empty_id, StringId::EMPTY);
+
+        // Pre-intern all keywords at deterministic slots 1..=KEYWORD_COUNT
+        for (i, &s) in crate::kw::KEYWORD_STRINGS.iter().enumerate() {
+            let id = table.intern_internal(s);
+            debug_assert_eq!(id.0, (i + 1) as u32, "keyword '{}' got wrong ID", s);
+        }
+
         table
     }
 
diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs
index 6ec5a839..5e43504a 100644
--- a/cc/token/preprocess.rs
+++ b/cc/token/preprocess.rs
@@ -895,11 +895,11 @@ impl<'a> Preprocessor<'a> {
             None => return, // Empty directive, ignore
         };
 
-        // Get directive name
-        let directive_name = match &directive_token.typ {
+        // Get directive StringId
+        let directive_id = match &directive_token.typ {
             TokenType::Ident => {
                 if let TokenValue::Ident(id) = &directive_token.value {
-                    idents.get_opt(*id).map(|s| s.to_string())
+                    Some(*id)
                 } else {
                     None
                 }
@@ -907,8 +907,8 @@ impl<'a> Preprocessor<'a> {
             _ => None,
         };
 
-        let directive = match directive_name {
-            Some(name) => name,
+        let directive_id = match directive_id {
+            Some(id) => id,
             None => {
                 // Consume rest of line
                 self.skip_to_eol(iter);
@@ -916,27 +916,28 @@ impl<'a> Preprocessor<'a> {
             }
         };
 
-        match directive.as_str() {
-            "define" => self.handle_define(iter, idents),
-            "undef" => self.handle_undef(iter, idents),
-            "ifdef" => self.handle_ifdef(iter, idents, hash_token.pos),
-            "ifndef" => self.handle_ifndef(iter, idents, hash_token.pos),
-            "if" => self.handle_if(iter, idents, hash_token.pos),
-            "elif" => self.handle_elif(iter, idents),
-            "else" => self.handle_else(iter),
-            "endif" => self.handle_endif(iter),
-            "include" => self.handle_include(iter, output, idents, hash_token, false),
-            "include_next" => self.handle_include(iter, output, idents, hash_token, true),
-            "error" => self.handle_error(iter, &hash_token.pos, idents),
-            "warning" => self.handle_warning(iter, &hash_token.pos, idents),
-            "pragma" => self.handle_pragma(iter, idents),
-            "line" => self.handle_line(iter, idents),
+        match directive_id {
+            crate::kw::DEFINE => self.handle_define(iter, idents),
+            crate::kw::UNDEF => self.handle_undef(iter, idents),
+            crate::kw::IFDEF => self.handle_ifdef(iter, idents, hash_token.pos),
+            crate::kw::IFNDEF => self.handle_ifndef(iter, idents, hash_token.pos),
+            crate::kw::IF => self.handle_if(iter, idents, hash_token.pos),
+            crate::kw::ELIF => self.handle_elif(iter, idents),
+            crate::kw::ELSE => self.handle_else(iter),
+            crate::kw::ENDIF => self.handle_endif(iter),
+            crate::kw::INCLUDE => self.handle_include(iter, output, idents, hash_token, false),
+            crate::kw::INCLUDE_NEXT => self.handle_include(iter, output, idents, hash_token, true),
+            crate::kw::PP_ERROR => self.handle_error(iter, &hash_token.pos, idents),
+            crate::kw::WARNING => self.handle_warning(iter, &hash_token.pos, idents),
+            crate::kw::PRAGMA => self.handle_pragma(iter, idents),
+            crate::kw::LINE => self.handle_line(iter, idents),
             _ => {
                 // Unknown directive
                 if !self.is_skipping() {
+                    let name = idents.get_opt(directive_id).unwrap_or("unknown");
                     diag::warning(
                         hash_token.pos,
-                        &format!("unknown preprocessor directive #{}", directive),
+                        &format!("unknown preprocessor directive #{}", name),
                     );
                 }
                 self.skip_to_eol(iter);
@@ -3205,20 +3206,38 @@ impl<'a> Preprocessor<'a> {
             return false;
         }
 
-        // Get the argument name
-        let name = if let Some(tok) = args[0].first() {
-            self.token_to_string(tok, idents)
-        } else {
-            return false;
-        };
+        // Try to get StringId directly for O(1) tag-based lookup
+        let arg_id = args[0].first().and_then(|tok| {
+            if let TokenValue::Ident(id) = &tok.value {
+                Some(*id)
+            } else {
+                None
+            }
+        });
 
         match builtin {
-            BuiltinMacro::HasAttribute => is_supported_attribute(&name),
+            BuiltinMacro::HasAttribute => {
+                if let Some(id) = arg_id {
+                    crate::kw::has_tag(id, crate::kw::SUPPORTED_ATTR)
+                } else {
+                    let name = self.token_to_string(args[0].first().unwrap(), idents);
+                    is_supported_attribute(&name)
+                }
+            }
             BuiltinMacro::HasBuiltin => {
-                // Use centralized builtin registry
-                crate::builtins::is_builtin(name.as_str())
+                if let Some(id) = arg_id {
+                    crate::builtins::is_builtin_id(id)
+                } else {
+                    let name = self.token_to_string(args[0].first().unwrap(), idents);
+                    crate::builtins::is_builtin(name.as_str())
+                }
             }
             BuiltinMacro::HasFeature | BuiltinMacro::HasExtension => {
+                let name = if let Some(tok) = args[0].first() {
+                    self.token_to_string(tok, idents)
+                } else {
+                    return false;
+                };
                 // Return true for features/extensions we implement
                 matches!(
                     name.as_str(),

From 2b596f18af964a96d8028f14b33118a86135d71c Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 14:23:26 +0000
Subject: [PATCH 14/18] =?UTF-8?q?cc:=20fix=20kw.rs=20warnings=20=E2=80=94?=
 =?UTF-8?q?=20use=20anonymous=20entries=20for=20tag-only=20keywords?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use `_` as the name for keyword entries that are only needed for interning
and tag-based membership (nullability qualifiers, attribute names, fortified
builtins, etc.). The define_ids! macro skips pub const emission for `_`
entries, eliminating 71 dead_code warnings without #[allow(dead_code)].

Also fix redundant_closure clippy lint in is_nullability_qualifier call,
and remove unused tags() function.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/kw.rs           | 512 ++++++++++++++++++++++-----------------------
 cc/parse/parser.rs |   2 +-
 2 files changed, 251 insertions(+), 263 deletions(-)

diff --git a/cc/kw.rs b/cc/kw.rs
index f3db3369..a656f4ff 100644
--- a/cc/kw.rs
+++ b/cc/kw.rs
@@ -47,11 +47,16 @@ pub const DECL_START: u32 =
 // ============================================================================
 
 /// Helper macro: recursive counter that assigns sequential StringId values starting from 1.
+/// Entries named `_` are anonymous — they get interned and tagged but no `pub const` is emitted.
 macro_rules! define_ids {
     // Base case: no more entries
     ($counter:expr; ) => {};
-    // Recursive case: emit one const, recurse with counter+1
-    ($counter:expr; ($name:ident, $str:literal, $tags:expr) $(, ($name_rest:ident, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
+    // Anonymous entry (name is `_`): skip const, just recurse
+    ($counter:expr; (_, $str:literal, $tags:expr) $(, ($name_rest:tt, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
+        define_ids!($counter + 1; $(($name_rest, $str_rest, $tags_rest)),*);
+    };
+    // Named entry: emit pub const, then recurse
+    ($counter:expr; ($name:ident, $str:literal, $tags:expr) $(, ($name_rest:tt, $str_rest:literal, $tags_rest:expr))* $(,)? ) => {
         pub const $name: StringId = StringId($counter);
         define_ids!($counter + 1; $(($name_rest, $str_rest, $tags_rest)),*);
     };
@@ -59,11 +64,11 @@ macro_rules! define_ids {
 
 /// Main keyword definition macro. Generates:
 /// - KEYWORD_COUNT: total number of keywords
-/// - One `pub const NAME: StringId` per keyword
-/// - KEYWORD_STRINGS: array of string literals
-/// - KEYWORD_TAGS: array of tag bitmasks
+/// - One `pub const NAME: StringId` per named keyword (entries with `_` are anonymous)
+/// - KEYWORD_STRINGS: array of string literals (all entries)
+/// - KEYWORD_TAGS: array of tag bitmasks (all entries)
 macro_rules! define_keywords {
-    ( $( ($name:ident, $str:literal, $tags:expr) ),* $(,)? ) => {
+    ( $( ($name:tt, $str:literal, $tags:expr) ),* $(,)? ) => {
         pub const KEYWORD_COUNT: usize = [ $( $str ),* ].len();
         define_ids!(1u32; $( ($name, $str, $tags) ),* );
         pub(crate) const KEYWORD_STRINGS: [&str; KEYWORD_COUNT] = [ $( $str ),* ];
@@ -153,19 +158,19 @@ define_keywords! {
     (GNU_ASM2,          "__asm",             ASM_KW),
 
     // ---- Static assert (ASSERT_KW) ----
-    (STATIC_ASSERT,     "_Static_assert",    ASSERT_KW),
-    (STATIC_ASSERT_C23, "static_assert",     ASSERT_KW),
+    (_,                 "_Static_assert",    ASSERT_KW),
+    (_,                 "static_assert",     ASSERT_KW),
 
     // ---- Alignas (ALIGNAS_KW) ----
     (ALIGNAS,           "_Alignas",          ALIGNAS_KW),
 
     // ---- Nullability qualifiers (NULLABILITY) ----
-    (NONNULL,           "_Nonnull",          NULLABILITY),
-    (GNU_NONNULL,       "__nonnull",         NULLABILITY),
-    (NULLABLE,          "_Nullable",         NULLABILITY),
-    (GNU_NULLABLE,      "__nullable",        NULLABILITY),
-    (NULL_UNSPECIFIED,  "_Null_unspecified",  NULLABILITY),
-    (GNU_NULL_UNSPECIFIED, "__null_unspecified", NULLABILITY),
+    (_,                 "_Nonnull",          NULLABILITY),
+    (_,                 "__nonnull",         NULLABILITY),
+    (_,                 "_Nullable",         NULLABILITY),
+    (_,                 "__nullable",        NULLABILITY),
+    (_,                 "_Null_unspecified",  NULLABILITY),
+    (_,                 "__null_unspecified", NULLABILITY),
 
     // ---- Statement keywords (STMT_KW) ----
     (IF,                "if",                STMT_KW),
@@ -189,7 +194,7 @@ define_keywords! {
     (ALIGNOF_C23,       "alignof",           0),
 
     // ---- Wide char prefix ----
-    (WIDE_PREFIX,       "L",                 0),
+    (_,                 "L",                 0),
 
     // ---- Preprocessor directives ----
     (DEFINE,            "define",            0),
@@ -206,9 +211,9 @@ define_keywords! {
     (LINE,              "line",              0),
 
     // ---- Preprocessor special names ----
-    (DEFINED,           "defined",           0),
-    (VA_ARGS,           "__VA_ARGS__",       0),
-    (ONCE,              "once",              0),
+    (_,                 "defined",           0),
+    (_,                 "__VA_ARGS__",       0),
+    (_,                 "once",              0),
 
     // ---- Predefined identifiers ----
     (FUNC,              "__func__",          0),
@@ -266,19 +271,19 @@ define_keywords! {
     (BUILTIN_FRAME_ADDRESS, "__builtin_frame_address", BUILTIN),
     (BUILTIN_RETURN_ADDRESS, "__builtin_return_address", BUILTIN),
     (BUILTIN_OBJECT_SIZE, "__builtin_object_size", BUILTIN),
-    (BUILTIN_SNPRINTF_CHK, "__builtin___snprintf_chk", BUILTIN),
-    (BUILTIN_VSNPRINTF_CHK, "__builtin___vsnprintf_chk", BUILTIN),
-    (BUILTIN_SPRINTF_CHK, "__builtin___sprintf_chk", BUILTIN),
-    (BUILTIN_FPRINTF_CHK, "__builtin___fprintf_chk", BUILTIN),
-    (BUILTIN_PRINTF_CHK, "__builtin___printf_chk", BUILTIN),
-    (BUILTIN_MEMCPY_CHK, "__builtin___memcpy_chk", BUILTIN),
-    (BUILTIN_MEMMOVE_CHK, "__builtin___memmove_chk", BUILTIN),
-    (BUILTIN_MEMSET_CHK, "__builtin___memset_chk", BUILTIN),
-    (BUILTIN_STPCPY_CHK, "__builtin___stpcpy_chk", BUILTIN),
-    (BUILTIN_STRCAT_CHK, "__builtin___strcat_chk", BUILTIN),
-    (BUILTIN_STRCPY_CHK, "__builtin___strcpy_chk", BUILTIN),
-    (BUILTIN_STRNCAT_CHK, "__builtin___strncat_chk", BUILTIN),
-    (BUILTIN_STRNCPY_CHK, "__builtin___strncpy_chk", BUILTIN),
+    (_, "__builtin___snprintf_chk", BUILTIN),
+    (_, "__builtin___vsnprintf_chk", BUILTIN),
+    (_, "__builtin___sprintf_chk", BUILTIN),
+    (_, "__builtin___fprintf_chk", BUILTIN),
+    (_, "__builtin___printf_chk", BUILTIN),
+    (_, "__builtin___memcpy_chk", BUILTIN),
+    (_, "__builtin___memmove_chk", BUILTIN),
+    (_, "__builtin___memset_chk", BUILTIN),
+    (_, "__builtin___stpcpy_chk", BUILTIN),
+    (_, "__builtin___strcat_chk", BUILTIN),
+    (_, "__builtin___strcpy_chk", BUILTIN),
+    (_, "__builtin___strncat_chk", BUILTIN),
+    (_, "__builtin___strncpy_chk", BUILTIN),
 
     // ---- C11 atomic builtins (BUILTIN) ----
     (C11_ATOMIC_INIT,    "__c11_atomic_init",    BUILTIN),
@@ -303,54 +308,54 @@ define_keywords! {
 
     // ---- Supported attribute names (SUPPORTED_ATTR) ----
     // Plain forms
-    (ATTR_NORETURN,             "noreturn",             SUPPORTED_ATTR),
-    (ATTR_UNUSED,               "unused",               SUPPORTED_ATTR),
-    (ATTR_ALIGNED,              "aligned",              SUPPORTED_ATTR),
-    (ATTR_PACKED,               "packed",               SUPPORTED_ATTR),
-    (ATTR_DEPRECATED,           "deprecated",           SUPPORTED_ATTR),
-    (ATTR_WEAK,                 "weak",                 SUPPORTED_ATTR),
-    (ATTR_SECTION,              "section",              SUPPORTED_ATTR),
-    (ATTR_VISIBILITY,           "visibility",           SUPPORTED_ATTR),
-    (ATTR_CONSTRUCTOR,          "constructor",          SUPPORTED_ATTR),
-    (ATTR_DESTRUCTOR,           "destructor",           SUPPORTED_ATTR),
-    (ATTR_USED,                 "used",                 SUPPORTED_ATTR),
-    (ATTR_NOINLINE,             "noinline",             SUPPORTED_ATTR),
-    (ATTR_ALWAYS_INLINE,        "always_inline",        SUPPORTED_ATTR),
-    (ATTR_HOT,                  "hot",                  SUPPORTED_ATTR),
-    (ATTR_COLD,                 "cold",                 SUPPORTED_ATTR),
-    (ATTR_WARN_UNUSED_RESULT,   "warn_unused_result",   SUPPORTED_ATTR),
-    (ATTR_FORMAT,               "format",               SUPPORTED_ATTR),
-    (ATTR_FALLTHROUGH,          "fallthrough",          SUPPORTED_ATTR),
-    (ATTR_NONSTRING,            "nonstring",            SUPPORTED_ATTR),
-    (ATTR_MALLOC,               "malloc",               SUPPORTED_ATTR),
-    (ATTR_PURE,                 "pure",                 SUPPORTED_ATTR),
-    (ATTR_SENTINEL,             "sentinel",             SUPPORTED_ATTR),
-    (ATTR_NO_SANITIZE_MEMORY,   "no_sanitize_memory",   SUPPORTED_ATTR),
-    (ATTR_NO_SANITIZE_ADDRESS,  "no_sanitize_address",  SUPPORTED_ATTR),
-    (ATTR_NO_SANITIZE_THREAD,   "no_sanitize_thread",   SUPPORTED_ATTR),
+    (_, "noreturn",             SUPPORTED_ATTR),
+    (_, "unused",               SUPPORTED_ATTR),
+    (_, "aligned",              SUPPORTED_ATTR),
+    (_, "packed",               SUPPORTED_ATTR),
+    (_, "deprecated",           SUPPORTED_ATTR),
+    (_, "weak",                 SUPPORTED_ATTR),
+    (_, "section",              SUPPORTED_ATTR),
+    (_, "visibility",           SUPPORTED_ATTR),
+    (_, "constructor",          SUPPORTED_ATTR),
+    (_, "destructor",           SUPPORTED_ATTR),
+    (_, "used",                 SUPPORTED_ATTR),
+    (_, "noinline",             SUPPORTED_ATTR),
+    (_, "always_inline",        SUPPORTED_ATTR),
+    (_, "hot",                  SUPPORTED_ATTR),
+    (_, "cold",                 SUPPORTED_ATTR),
+    (_, "warn_unused_result",   SUPPORTED_ATTR),
+    (_, "format",               SUPPORTED_ATTR),
+    (_, "fallthrough",          SUPPORTED_ATTR),
+    (_, "nonstring",            SUPPORTED_ATTR),
+    (_, "malloc",               SUPPORTED_ATTR),
+    (_, "pure",                 SUPPORTED_ATTR),
+    (_, "sentinel",             SUPPORTED_ATTR),
+    (_, "no_sanitize_memory",   SUPPORTED_ATTR),
+    (_, "no_sanitize_address",  SUPPORTED_ATTR),
+    (_, "no_sanitize_thread",   SUPPORTED_ATTR),
     // GNU forms (__foo__)
     // Note: __noreturn__ is already defined above with NORETURN_KW | SUPPORTED_ATTR
-    (GNU_ATTR_UNUSED,           "__unused__",           SUPPORTED_ATTR),
-    (GNU_ATTR_ALIGNED,          "__aligned__",          SUPPORTED_ATTR),
-    (GNU_ATTR_PACKED,           "__packed__",           SUPPORTED_ATTR),
-    (GNU_ATTR_DEPRECATED,       "__deprecated__",       SUPPORTED_ATTR),
-    (GNU_ATTR_WEAK,             "__weak__",             SUPPORTED_ATTR),
-    (GNU_ATTR_SECTION,          "__section__",          SUPPORTED_ATTR),
-    (GNU_ATTR_VISIBILITY,       "__visibility__",       SUPPORTED_ATTR),
-    (GNU_ATTR_CONSTRUCTOR,      "__constructor__",      SUPPORTED_ATTR),
-    (GNU_ATTR_DESTRUCTOR,       "__destructor__",       SUPPORTED_ATTR),
-    (GNU_ATTR_USED,             "__used__",             SUPPORTED_ATTR),
-    (GNU_ATTR_NOINLINE,         "__noinline__",         SUPPORTED_ATTR),
-    (GNU_ATTR_ALWAYS_INLINE,    "__always_inline__",    SUPPORTED_ATTR),
-    (GNU_ATTR_HOT,              "__hot__",              SUPPORTED_ATTR),
-    (GNU_ATTR_COLD,             "__cold__",             SUPPORTED_ATTR),
-    (GNU_ATTR_WARN_UNUSED_RESULT, "__warn_unused_result__", SUPPORTED_ATTR),
-    (GNU_ATTR_FORMAT,           "__format__",           SUPPORTED_ATTR),
-    (GNU_ATTR_FALLTHROUGH,      "__fallthrough__",      SUPPORTED_ATTR),
-    (GNU_ATTR_NONSTRING,        "__nonstring__",        SUPPORTED_ATTR),
-    (GNU_ATTR_MALLOC,           "__malloc__",           SUPPORTED_ATTR),
-    (GNU_ATTR_PURE,             "__pure__",             SUPPORTED_ATTR),
-    (GNU_ATTR_SENTINEL,         "__sentinel__",         SUPPORTED_ATTR),
+    (_, "__unused__",           SUPPORTED_ATTR),
+    (_, "__aligned__",          SUPPORTED_ATTR),
+    (_, "__packed__",           SUPPORTED_ATTR),
+    (_, "__deprecated__",       SUPPORTED_ATTR),
+    (_, "__weak__",             SUPPORTED_ATTR),
+    (_, "__section__",          SUPPORTED_ATTR),
+    (_, "__visibility__",       SUPPORTED_ATTR),
+    (_, "__constructor__",      SUPPORTED_ATTR),
+    (_, "__destructor__",       SUPPORTED_ATTR),
+    (_, "__used__",             SUPPORTED_ATTR),
+    (_, "__noinline__",         SUPPORTED_ATTR),
+    (_, "__always_inline__",    SUPPORTED_ATTR),
+    (_, "__hot__",              SUPPORTED_ATTR),
+    (_, "__cold__",             SUPPORTED_ATTR),
+    (_, "__warn_unused_result__", SUPPORTED_ATTR),
+    (_, "__format__",           SUPPORTED_ATTR),
+    (_, "__fallthrough__",      SUPPORTED_ATTR),
+    (_, "__nonstring__",        SUPPORTED_ATTR),
+    (_, "__malloc__",           SUPPORTED_ATTR),
+    (_, "__pure__",             SUPPORTED_ATTR),
+    (_, "__sentinel__",         SUPPORTED_ATTR),
 }
 
 // ============================================================================
@@ -365,18 +370,6 @@ pub fn has_tag(id: StringId, mask: u32) -> bool {
     idx > 0 && idx <= KEYWORD_COUNT && KEYWORD_TAGS[idx - 1] & mask != 0
 }
 
-/// Get the full tag bitmask for a StringId.
-/// Returns 0 for non-keyword IDs.
-#[inline]
-pub fn tags(id: StringId) -> u32 {
-    let idx = id.0 as usize;
-    if idx > 0 && idx <= KEYWORD_COUNT {
-        KEYWORD_TAGS[idx - 1]
-    } else {
-        0
-    }
-}
-
 // ============================================================================
 // Tests
 // ============================================================================
@@ -387,6 +380,13 @@ mod tests {
     use crate::strings::StringTable;
     use std::collections::HashSet;
 
+    /// Look up a pre-interned keyword by string, panicking if not found.
+    fn id(table: &StringTable, s: &str) -> StringId {
+        table
+            .lookup(s)
+            .unwrap_or_else(|| panic!("keyword '{}' not interned", s))
+    }
+
     #[test]
     fn test_keyword_ids_deterministic() {
         let table = StringTable::new();
@@ -400,8 +400,9 @@ mod tests {
         assert_eq!(table.get(RETURN), "return");
         assert_eq!(table.get(BUILTIN_VA_START), "__builtin_va_start");
         assert_eq!(table.get(C11_ATOMIC_LOAD), "__c11_atomic_load");
-        assert_eq!(table.get(ATTR_NORETURN), "noreturn");
-        assert_eq!(table.get(GNU_ATTR_PACKED), "__packed__");
+        // Anonymous entries verified via lookup
+        assert!(table.lookup("noreturn").is_some());
+        assert!(table.lookup("__packed__").is_some());
     }
 
     #[test]
@@ -419,187 +420,173 @@ mod tests {
 
     #[test]
     fn test_tags_type_spec() {
-        let type_specs = [
-            VOID,
-            CHAR,
-            SHORT,
-            INT,
-            LONG,
-            FLOAT,
-            DOUBLE,
-            SIGNED,
-            UNSIGNED,
-            BOOL,
-            COMPLEX,
-            FLOAT16,
-            FLOAT32,
-            FLOAT64,
-            INT128,
-            INT128_T,
-            UINT128_T,
-            BUILTIN_VA_LIST,
-            STRUCT,
-            UNION,
-            ENUM,
-            TYPEOF,
-            GNU_TYPEOF,
-            GNU_TYPEOF2,
-            ATOMIC,
-        ];
-        for &id in &type_specs {
-            assert!(
-                has_tag(id, TYPE_SPEC),
-                "'{}' (id={}) should have TYPE_SPEC",
-                KEYWORD_STRINGS[id.0 as usize - 1],
-                id.0
-            );
+        for &s in &[
+            "void",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "signed",
+            "unsigned",
+            "_Bool",
+            "_Complex",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "struct",
+            "union",
+            "enum",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+            "_Atomic",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, TYPE_SPEC), "'{}' should have TYPE_SPEC", s);
         }
     }
 
     #[test]
     fn test_tags_qualifier() {
-        let qualifiers = [
-            CONST,
-            VOLATILE,
-            RESTRICT,
-            ATOMIC,
-            GNU_CONST,
-            GNU_CONST2,
-            GNU_VOLATILE,
-            GNU_VOLATILE2,
-            GNU_RESTRICT,
-            GNU_RESTRICT2,
-        ];
-        for &id in &qualifiers {
-            assert!(
-                has_tag(id, QUALIFIER),
-                "'{}' should have QUALIFIER",
-                KEYWORD_STRINGS[id.0 as usize - 1]
-            );
+        for &s in &[
+            "const",
+            "volatile",
+            "restrict",
+            "_Atomic",
+            "__const__",
+            "__const",
+            "__volatile__",
+            "__volatile",
+            "__restrict__",
+            "__restrict",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, QUALIFIER), "'{}' should have QUALIFIER", s);
         }
     }
 
     #[test]
     fn test_tags_type_keyword() {
-        // Exact 25 members that match is_type_keyword()
-        let type_keywords = [
-            VOID,
-            BOOL,
-            COMPLEX,
-            ATOMIC,
-            CHAR,
-            SHORT,
-            INT,
-            LONG,
-            FLOAT,
-            DOUBLE,
-            FLOAT16,
-            FLOAT32,
-            FLOAT64,
-            SIGNED,
-            UNSIGNED,
-            CONST,
-            VOLATILE,
-            STRUCT,
-            UNION,
-            ENUM,
-            INT128,
-            INT128_T,
-            UINT128_T,
-            BUILTIN_VA_LIST,
-            TYPEOF,
-            GNU_TYPEOF,
-            GNU_TYPEOF2,
-        ];
-        for &id in &type_keywords {
+        for &s in &[
+            "void",
+            "_Bool",
+            "_Complex",
+            "_Atomic",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "signed",
+            "unsigned",
+            "const",
+            "volatile",
+            "struct",
+            "union",
+            "enum",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
             assert!(
-                has_tag(id, TYPE_KEYWORD),
+                has_tag(sid, TYPE_KEYWORD),
                 "'{}' should have TYPE_KEYWORD",
-                KEYWORD_STRINGS[id.0 as usize - 1]
+                s
             );
         }
     }
 
     #[test]
     fn test_tags_decl_start() {
-        // All 43+ entries from current is_declaration_start()
-        let decl_start = [
-            VOID,
-            CHAR,
-            SHORT,
-            INT,
-            LONG,
-            FLOAT,
-            DOUBLE,
-            FLOAT16,
-            FLOAT32,
-            FLOAT64,
-            COMPLEX,
-            ATOMIC,
-            ALIGNAS,
-            SIGNED,
-            UNSIGNED,
-            CONST,
-            VOLATILE,
-            STATIC,
-            EXTERN,
-            AUTO,
-            REGISTER,
-            TYPEDEF,
-            INLINE,
-            GNU_INLINE2,
-            GNU_INLINE,
-            NORETURN,
-            GNU_NORETURN,
-            STRUCT,
-            UNION,
-            ENUM,
-            BOOL,
-            GNU_ATTRIBUTE,
-            GNU_ATTRIBUTE2,
-            INT128,
-            INT128_T,
-            UINT128_T,
-            BUILTIN_VA_LIST,
-            TYPEOF,
-            GNU_TYPEOF,
-            GNU_TYPEOF2,
-            THREAD_LOCAL,
-            GNU_THREAD,
-            STATIC_ASSERT,
-            STATIC_ASSERT_C23,
-        ];
-        for &id in &decl_start {
-            assert!(
-                has_tag(id, DECL_START),
-                "'{}' should have DECL_START",
-                KEYWORD_STRINGS[id.0 as usize - 1]
-            );
+        for &s in &[
+            "void",
+            "char",
+            "short",
+            "int",
+            "long",
+            "float",
+            "double",
+            "_Float16",
+            "_Float32",
+            "_Float64",
+            "_Complex",
+            "_Atomic",
+            "_Alignas",
+            "signed",
+            "unsigned",
+            "const",
+            "volatile",
+            "static",
+            "extern",
+            "auto",
+            "register",
+            "typedef",
+            "inline",
+            "__inline",
+            "__inline__",
+            "_Noreturn",
+            "__noreturn__",
+            "struct",
+            "union",
+            "enum",
+            "_Bool",
+            "__attribute__",
+            "__attribute",
+            "__int128",
+            "__int128_t",
+            "__uint128_t",
+            "__builtin_va_list",
+            "typeof",
+            "__typeof__",
+            "__typeof",
+            "_Thread_local",
+            "__thread",
+            "_Static_assert",
+            "static_assert",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, DECL_START), "'{}' should have DECL_START", s);
         }
     }
 
     #[test]
     fn test_tags_nullability() {
-        let nullability = [
-            NONNULL,
-            GNU_NONNULL,
-            NULLABLE,
-            GNU_NULLABLE,
-            NULL_UNSPECIFIED,
-            GNU_NULL_UNSPECIFIED,
-        ];
-        for &id in &nullability {
-            assert!(
-                has_tag(id, NULLABILITY),
-                "'{}' should have NULLABILITY",
-                KEYWORD_STRINGS[id.0 as usize - 1]
-            );
+        for &s in &[
+            "_Nonnull",
+            "__nonnull",
+            "_Nullable",
+            "__nullable",
+            "_Null_unspecified",
+            "__null_unspecified",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
+            assert!(has_tag(sid, NULLABILITY), "'{}' should have NULLABILITY", s);
         }
     }
 
     #[test]
     fn test_tags_builtin() {
-        // Spot-check some builtins
+        // Spot-check some builtins (named constants)
         let builtins = [
             BUILTIN_VA_START,
             BUILTIN_VA_END,
@@ -616,11 +603,11 @@ mod tests {
             C11_ATOMIC_STORE,
             C11_ATOMIC_EXCHANGE,
         ];
-        for &id in &builtins {
+        for &bid in &builtins {
             assert!(
-                has_tag(id, BUILTIN),
+                has_tag(bid, BUILTIN),
                 "'{}' should have BUILTIN",
-                KEYWORD_STRINGS[id.0 as usize - 1]
+                KEYWORD_STRINGS[bid.0 as usize - 1]
             );
         }
         // Count total builtins
@@ -634,23 +621,24 @@ mod tests {
 
     #[test]
     fn test_tags_supported_attr() {
-        let attrs = [
-            ATTR_NORETURN,
-            GNU_NORETURN,
-            ATTR_UNUSED,
-            GNU_ATTR_UNUSED,
-            ATTR_ALIGNED,
-            GNU_ATTR_ALIGNED,
-            ATTR_PACKED,
-            GNU_ATTR_PACKED,
-            ATTR_ALWAYS_INLINE,
-            GNU_ATTR_ALWAYS_INLINE,
-        ];
-        for &id in &attrs {
+        for &s in &[
+            "noreturn",
+            "__noreturn__",
+            "unused",
+            "__unused__",
+            "aligned",
+            "__aligned__",
+            "packed",
+            "__packed__",
+            "always_inline",
+            "__always_inline__",
+        ] {
+            let table = StringTable::new();
+            let sid = id(&table, s);
             assert!(
-                has_tag(id, SUPPORTED_ATTR),
+                has_tag(sid, SUPPORTED_ATTR),
                 "'{}' should have SUPPORTED_ATTR",
-                KEYWORD_STRINGS[id.0 as usize - 1]
+                s
             );
         }
     }
diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs
index 5d2f14e9..39621595 100644
--- a/cc/parse/parser.rs
+++ b/cc/parse/parser.rs
@@ -646,7 +646,7 @@ impl<'a> Parser<'a> {
         self.peek() == TokenType::Ident
             && self
                 .get_ident_id(self.current())
-                .is_some_and(|id| super::is_nullability_qualifier(id))
+                .is_some_and(super::is_nullability_qualifier)
     }
 
     /// Check if current token is __asm or __asm__

From 2b6a81d06f3dcf80686f65b43b6450b3495541bb Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 14:25:38 +0000
Subject: [PATCH 15/18] cc: promote keyword ID assertions from debug_assert to
 assert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keyword ID determinism is a correctness requirement — wrong IDs cause
silent misparsing in release builds. Use unconditional assert_eq!.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/strings.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cc/strings.rs b/cc/strings.rs
index 84d10d2d..fdbb4416 100644
--- a/cc/strings.rs
+++ b/cc/strings.rs
@@ -84,12 +84,12 @@ impl StringTable {
         };
         // Pre-intern empty string as ID 0
         let empty_id = table.intern_internal("");
-        debug_assert_eq!(empty_id, StringId::EMPTY);
+        assert_eq!(empty_id, StringId::EMPTY);
 
         // Pre-intern all keywords at deterministic slots 1..=KEYWORD_COUNT
         for (i, &s) in crate::kw::KEYWORD_STRINGS.iter().enumerate() {
             let id = table.intern_internal(s);
-            debug_assert_eq!(id.0, (i + 1) as u32, "keyword '{}' got wrong ID", s);
+            assert_eq!(id.0, (i + 1) as u32, "keyword '{}' got wrong ID", s);
         }
 
         table

From 4f4f46d5acf2aadf06f4e54bacb48ae302f91d7f Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 14:28:39 +0000
Subject: [PATCH 16/18] cc: fix potential panic in eval_has_builtin on empty
 macro args

Extract first_tok early and return false if args[0] is empty, avoiding
unwrap() panic on __has_builtin()/__has_attribute() with no tokens.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/token/preprocess.rs | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs
index 5e43504a..19d46836 100644
--- a/cc/token/preprocess.rs
+++ b/cc/token/preprocess.rs
@@ -3206,21 +3206,25 @@ impl<'a> Preprocessor<'a> {
             return false;
         }
 
+        // Try to get the first token from the argument list
+        let first_tok = match args[0].first() {
+            Some(tok) => tok,
+            None => return false,
+        };
+
         // Try to get StringId directly for O(1) tag-based lookup
-        let arg_id = args[0].first().and_then(|tok| {
-            if let TokenValue::Ident(id) = &tok.value {
-                Some(*id)
-            } else {
-                None
-            }
-        });
+        let arg_id = if let TokenValue::Ident(id) = &first_tok.value {
+            Some(*id)
+        } else {
+            None
+        };
 
         match builtin {
             BuiltinMacro::HasAttribute => {
                 if let Some(id) = arg_id {
                     crate::kw::has_tag(id, crate::kw::SUPPORTED_ATTR)
                 } else {
-                    let name = self.token_to_string(args[0].first().unwrap(), idents);
+                    let name = self.token_to_string(first_tok, idents);
                     is_supported_attribute(&name)
                 }
             }
@@ -3228,16 +3232,12 @@ impl<'a> Preprocessor<'a> {
                 if let Some(id) = arg_id {
                     crate::builtins::is_builtin_id(id)
                 } else {
-                    let name = self.token_to_string(args[0].first().unwrap(), idents);
+                    let name = self.token_to_string(first_tok, idents);
                     crate::builtins::is_builtin(name.as_str())
                 }
             }
             BuiltinMacro::HasFeature | BuiltinMacro::HasExtension => {
-                let name = if let Some(tok) = args[0].first() {
-                    self.token_to_string(tok, idents)
-                } else {
-                    return false;
-                };
+                let name = self.token_to_string(first_tok, idents);
                 // Return true for features/extensions we implement
                 matches!(
                     name.as_str(),

From 45ae13c633bdf4d6593235168c01a6440ad367c5 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 14:29:46 +0000
Subject: [PATCH 17/18] cc: add test ensuring SUPPORTED_BUILTINS and kw BUILTIN
 tags stay in sync

Cross-checks that every entry in the SUPPORTED_BUILTINS string list is
pre-interned in kw.rs with the BUILTIN tag, preventing the two sources
from silently diverging.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/builtins.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/cc/builtins.rs b/cc/builtins.rs
index b4a7d27c..21de2862 100644
--- a/cc/builtins.rs
+++ b/cc/builtins.rs
@@ -128,6 +128,7 @@ pub fn is_builtin_id(id: crate::strings::StringId) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::strings::StringTable;
 
     #[test]
     fn test_is_builtin() {
@@ -137,4 +138,21 @@ mod tests {
         assert!(!is_builtin("__builtin_nonexistent"));
         assert!(!is_builtin("printf"));
     }
+
+    /// Verify every SUPPORTED_BUILTINS entry has the BUILTIN tag in kw.rs,
+    /// ensuring the string list and tag-based lookup can never diverge.
+    #[test]
+    fn test_supported_builtins_match_kw_tags() {
+        let table = StringTable::new();
+        for &name in SUPPORTED_BUILTINS {
+            let id = table
+                .lookup(name)
+                .unwrap_or_else(|| panic!("builtin '{}' not pre-interned in kw.rs", name));
+            assert!(
+                is_builtin_id(id),
+                "builtin '{}' is in SUPPORTED_BUILTINS but missing BUILTIN tag in kw.rs",
+                name
+            );
+        }
+    }
 }

From 3e16b3db119ef259525d09df17deefe5d5c40c1a Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 23 Mar 2026 14:43:54 +0000
Subject: [PATCH 18/18] cc: fix aarch64 float16 conversion using fmov instead
 of fcvt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

emit_float_to_float() only recognized Float↔Double conversions in its
needs_convert check, causing Float16↔Float/Double conversions to emit
fmov (bit-copy) instead of fcvt (type conversion). The float16 bit
pattern was zero-extended to 32 bits rather than properly converted,
producing wrong values for all float16 arithmetic on aarch64.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cc/arch/aarch64/float.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cc/arch/aarch64/float.rs b/cc/arch/aarch64/float.rs
index a220884d..05e255be 100644
--- a/cc/arch/aarch64/float.rs
+++ b/cc/arch/aarch64/float.rs
@@ -587,6 +587,14 @@ impl Aarch64CodeGen {
         let src_kind = insn.src_typ.map(|t| types.kind(t));
         let dst_kind = insn.typ.map(|t| types.kind(t));
         let needs_convert = match (src_kind, dst_kind) {
+            (
+                Some(TypeKind::Float16),
+                Some(TypeKind::Float | TypeKind::Double | TypeKind::LongDouble),
+            ) => true,
+            (
+                Some(TypeKind::Float | TypeKind::Double | TypeKind::LongDouble),
+                Some(TypeKind::Float16),
+            ) => true,
             (Some(TypeKind::Float), Some(TypeKind::Double | TypeKind::LongDouble)) => true,
             (Some(TypeKind::Double | TypeKind::LongDouble), Some(TypeKind::Float)) => true,
             // On aarch64, Double and LongDouble are the same, no conversion needed