From 1f08a097d4bc5087475ca8f8f98f58e773589fad Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sat, 7 Feb 2026 10:45:55 -0800 Subject: [PATCH 1/4] fix(kvm): preserve lower bits of addr_hi in MSI address translation Ensure the lower 8 bits of the high 32 bits of the MSI message address are preserved during translation. Signed-off-by: Changyuan Lyu --- alioth/src/arch/x86_64/intr.rs | 6 +++--- alioth/src/hv/kvm/vm/vm_x86_64.rs | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/alioth/src/arch/x86_64/intr.rs b/alioth/src/arch/x86_64/intr.rs index 6b798072..37b68b82 100644 --- a/alioth/src/arch/x86_64/intr.rs +++ b/alioth/src/arch/x86_64/intr.rs @@ -21,8 +21,8 @@ bitfield! { pub mode, set_mode : 2; pub redirection, set_redirection : 3; pub remappable, set_remappable : 4; - pub reserved, set_reserved : 11, 5; - pub dest_id, set_dest_id : 19, 12; + pub u8, virt_dest_id_hi, set_virt_dest_id_hi : 11, 5; + pub u8, dest_id, set_dest_id : 19, 12; pub identifier, _: 31, 20; } @@ -30,5 +30,5 @@ bitfield! { #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] pub struct MsiAddrHi(u32); impl Debug; - pub dest_id, set_dest_id : 31, 8; + pub dest_id_hi, set_dest_id_hi : 31, 8; } diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index 5b1aabac..1552b063 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -32,13 +32,13 @@ use crate::sys::sev::{ pub fn translate_msi_addr(addr_lo: u32, addr_hi: u32) -> (u32, u32) { let mut addr_lo = MsiAddrLo(addr_lo); - if addr_lo.reserved() == 0 || addr_lo.remappable() || addr_hi != 0 { - return (addr_lo.0, addr_hi); + let mut addr_hi = MsiAddrHi(addr_hi); + if addr_lo.virt_dest_id_hi() == 0 || addr_lo.remappable() || addr_hi.dest_id_hi() != 0 { + return (addr_lo.0, addr_hi.0); } - let mut addr_hi = MsiAddrHi(0); - addr_hi.set_dest_id(addr_lo.reserved()); - addr_lo.set_reserved(0); + addr_hi.set_dest_id_hi(addr_lo.virt_dest_id_hi() as u32); + addr_lo.set_virt_dest_id_hi(0); (addr_lo.0, addr_hi.0) } From 0ef4a224c7f2ac68284eecfe3dc74e9c9cbceecc Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sat, 7 Feb 2026 10:55:27 -0800 Subject: [PATCH 2/4] feat(ioapic): implement userspace emulated IOAPIC Implement a userspace emulated IOAPIC as an MMIO device. This is required for TDX support, which needs KVM_CAP_SPLIT_IRQCHIP, disabling the in-kernel IOAPIC. The primary use for the IOAPIC is for the serial console. Signed-off-by: Changyuan Lyu --- alioth/src/arch/x86_64/intr.rs | 39 +++++++ alioth/src/arch/x86_64/ioapic.rs | 63 +++++++++++ alioth/src/arch/x86_64/layout.rs | 2 + alioth/src/arch/x86_64/x86_64.rs | 1 + alioth/src/device/device.rs | 2 + alioth/src/device/ioapic.rs | 185 +++++++++++++++++++++++++++++++ alioth/src/device/ioapic_test.rs | 100 +++++++++++++++++ alioth/src/utils/utils.rs | 2 + 8 files changed, 394 insertions(+) create mode 100644 alioth/src/arch/x86_64/ioapic.rs create mode 100644 alioth/src/device/ioapic.rs create mode 100644 alioth/src/device/ioapic_test.rs diff --git a/alioth/src/arch/x86_64/intr.rs b/alioth/src/arch/x86_64/intr.rs index 37b68b82..f0eb68d1 100644 --- a/alioth/src/arch/x86_64/intr.rs +++ b/alioth/src/arch/x86_64/intr.rs @@ -14,6 +14,34 @@ use bitfield::bitfield; +use crate::consts; + +consts! { + pub struct DeliveryMode(u8) { + FIXED = 0b000; + LOW_PRIORITY = 0b001; + SMI = 0b010; + NMI = 0b100; + INIT = 0b101; + STARTUP_IPI = 0b110; + EXTINT = 0b111; + } +} + +consts! { + pub struct TriggerMode(bool) { + EDGE = false; + LEVEL = true; + } +} + +consts! { + pub struct DestinationMode(bool) { + PHYSICAL = false; + LOGICAL = true; + } +} + bitfield! { #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] pub struct MsiAddrLo(u32); @@ -32,3 +60,14 @@ bitfield! { impl Debug; pub dest_id_hi, set_dest_id_hi : 31, 8; } + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct MsiData(u32); + impl Debug; + impl new; + pub u8, vector, set_vector : 7, 0; + pub u8, from into DeliveryMode, delivery_mode, set_delivery_mode : 11, 8; + pub u8, level, set_level : 14; + pub trigger_mode, set_trigger_mode : 15; +} diff --git a/alioth/src/arch/x86_64/ioapic.rs b/alioth/src/arch/x86_64/ioapic.rs new file mode 100644 index 00000000..9e15bc19 --- /dev/null +++ b/alioth/src/arch/x86_64/ioapic.rs @@ -0,0 +1,63 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitfield::bitfield; + +use crate::arch::intr::DeliveryMode; + +pub const IOREGSEL: u64 = 0x00; +pub const IOWIN: u64 = 0x10; + +pub const IOAPICID: u8 = 0x00; +pub const IOAPICVER: u8 = 0x01; +pub const IOAPICARB: u8 = 0x02; +pub const IOREDTBL_BASE: u8 = 0x10; +pub const IOREDTBL_MAX: u8 = 0x3f; + +pub const NUM_PINS: u8 = 24; + +pub const IOAPIC_VER: u8 = 0x11; + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct RegId(u32); + impl Debug; + impl new; + pub u8, id, set_id : 27, 24; +} + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct RegVer(u32); + impl Debug; + impl new; + pub u8, version, set_version : 7, 0; + pub u8, max_entry, set_max_entry : 23, 16; +} + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct RedirectEntry(u64); + impl Debug; + pub u8, vector, set_vector : 7, 0; + pub u8, from into DeliveryMode, delivery_mode, set_delivery_mode : 10, 8; + pub dest_mode, set_dest_mode : 11; + pub delivery_status, set_delivery_status : 12; + pub riority, set_priority : 13; + pub irr, set_irr : 14; + pub trigger_mode, set_trigger_mode : 15; + pub masked, set_masked : 16; + pub u8, virt_dest_id_hi, set_virt_dest_id_hi : 55, 49; + pub u8, dest_id, set_dest_id : 63, 56; +} diff --git a/alioth/src/arch/x86_64/layout.rs b/alioth/src/arch/x86_64/layout.rs index d6a0ada3..3cc685cc 100644 --- a/alioth/src/arch/x86_64/layout.rs +++ b/alioth/src/arch/x86_64/layout.rs @@ -52,6 +52,8 @@ pub const PCIE_CONFIG_START: u64 = 0xe000_0000; // 3.5 GiB pub const PCIE_CONFIG_END: u64 = 0xf000_0000; // 3.75 GiB, size = 256 MiB pub const IOAPIC_START: u64 = 0xfec0_0000; +pub const IOAPIC_END: u64 = IOAPIC_START + 0x100; + pub const APIC_START: u64 = 0xfee0_0000; pub const MEM_64_START: u64 = 0x1_0000_0000; // 4GiB diff --git a/alioth/src/arch/x86_64/x86_64.rs b/alioth/src/arch/x86_64/x86_64.rs index a88a72ed..d5727553 100644 --- a/alioth/src/arch/x86_64/x86_64.rs +++ b/alioth/src/arch/x86_64/x86_64.rs @@ -14,6 +14,7 @@ pub mod cpuid; pub mod intr; +pub mod ioapic; pub mod layout; pub mod msr; pub mod paging; diff --git a/alioth/src/device/device.rs b/alioth/src/device/device.rs index e69f4cea..048193a9 100644 --- a/alioth/src/device/device.rs +++ b/alioth/src/device/device.rs @@ -22,6 +22,8 @@ pub mod console; #[cfg(target_arch = "x86_64")] #[path = "fw_cfg/fw_cfg.rs"] pub mod fw_cfg; +#[cfg(target_arch = "x86_64")] +pub mod ioapic; pub mod net; #[cfg(target_arch = "aarch64")] pub mod pl011; diff --git a/alioth/src/device/ioapic.rs b/alioth/src/device/ioapic.rs new file mode 100644 index 00000000..a7791511 --- /dev/null +++ b/alioth/src/device/ioapic.rs @@ -0,0 +1,185 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Emulated x86 IO APIC device. +//! See: https://download.intel.com/design/chipsets/datashts/29056601.pdf chapter 3.2 + +use parking_lot::Mutex; + +use crate::arch::intr::{DestinationMode, MsiAddrLo, MsiData, TriggerMode}; +use crate::arch::ioapic::{ + IOAPIC_VER, IOAPICARB, IOAPICID, IOAPICVER, IOREDTBL_BASE, IOREDTBL_MAX, IOREGSEL, IOWIN, + NUM_PINS, RedirectEntry, RegId, RegVer, +}; +use crate::arch::layout::{APIC_START, IOAPIC_END, IOAPIC_START}; +use crate::device::{self, MmioDev, Pause}; +use crate::hv::MsiSender; +use crate::mem; +use crate::mem::emulated::{Action, Mmio}; + +#[derive(Debug, Default)] +struct IoApicRegs { + id: RegId, + redirtbl: [RedirectEntry; NUM_PINS as usize], + select: u8, +} + +#[derive(Debug)] +pub struct IoApic { + regs: Mutex, + msi_sender: M, +} + +impl IoApic { + pub fn new(msi_sender: M) -> Self { + Self { + regs: Mutex::new(IoApicRegs::default()), + msi_sender, + } + } + + pub fn service_pin(&self, pin: u8) -> crate::hv::Result<()> { + let regs = self.regs.lock(); + let Some(entry) = regs.redirtbl.get(pin as usize) else { + log::warn!("IOAPIC: invalid pin {pin}"); + return Ok(()); + }; + + if entry.masked() { + return Ok(()); + } + + if entry.dest_mode() == DestinationMode::LOGICAL.raw() { + log::warn!("IOAPIC: logical destination is not supported"); + return Ok(()); + } + if entry.trigger_mode() == TriggerMode::LEVEL.raw() { + log::warn!("IOAPIC: level-triggered interrupts are not supported"); + return Ok(()); + } + + let mut addr_lo = MsiAddrLo(APIC_START as u32); + addr_lo.set_dest_id(entry.dest_id()); + addr_lo.set_virt_dest_id_hi(entry.virt_dest_id_hi()); + + let data = MsiData::new( + entry.vector(), + entry.delivery_mode(), + false, + entry.trigger_mode(), + ); + + self.msi_sender.send(addr_lo.0 as u64, data.0) + } + + fn read_reg(&self, regs: &IoApicRegs) -> u32 { + match regs.select { + IOAPICID | IOAPICARB => regs.id.0, + IOAPICVER => RegVer::new(IOAPIC_VER, NUM_PINS - 1).0, + select @ IOREDTBL_BASE..=IOREDTBL_MAX => { + let pin = ((select - IOREDTBL_BASE) >> 1) as usize; + let Some(entry) = regs.redirtbl.get(pin) else { + log::warn!("IOAPIC: read from unknown pin {pin:#x}"); + return 0; + }; + if select % 2 == 0 { + entry.0 as u32 + } else { + (entry.0 >> 32) as u32 + } + } + unknown => { + log::warn!("IOAPCI: read from unknown register {unknown:#x}"); + 0 + } + } + } + + fn write_reg(&self, regs: &mut IoApicRegs, val: u32) { + match regs.select { + IOAPICID => regs.id.set_id(RegId(val).id()), + IOAPICVER | IOAPICARB => log::warn!("IOAPIC: IOAPICVER and IOAPICARB are read-only"), + select @ IOREDTBL_BASE..=IOREDTBL_MAX => { + let pin = ((select - IOREDTBL_BASE) >> 1) as usize; + let Some(entry) = regs.redirtbl.get_mut(pin) else { + log::warn!("IOAPIC: write to unknown pin {pin:#x}"); + return; + }; + entry.0 = if select % 2 == 0 { + (entry.0 & 0xffffffff00000000) | (val as u64) + } else { + (entry.0 & 0x00000000ffffffff) | ((val as u64) << 32) + }; + } + unknown => { + log::warn!("IOAPIC: write to unknown register {unknown:#x} with value {val:#x}"); + } + } + } +} + +impl Mmio for IoApic { + fn size(&self) -> u64 { + IOAPIC_END - IOAPIC_START + } + + fn read(&self, offset: u64, size: u8) -> mem::Result { + if size != 4 { + log::warn!("IOAPIC: unaligned read: offset={offset:#x} size={size}"); + return Ok(0); + } + let regs = self.regs.lock(); + let val = match offset { + IOREGSEL => regs.select as u32, + IOWIN => self.read_reg(®s), + _ => { + log::warn!("IOAPIC: read from unknown offset {offset:#x}"); + 0 + } + }; + Ok(val as u64) + } + + fn write(&self, offset: u64, size: u8, val: u64) -> mem::Result { + if size != 4 { + log::warn!("IOAPIC: unaligned write: offset={offset:#x} size={size}"); + return Ok(Action::None); + } + let mut regs = self.regs.lock(); + match offset { + IOREGSEL => regs.select = val as u8, + IOWIN => self.write_reg(&mut regs, val as u32), + _ => { + log::warn!("IOAPIC: write to unknown offset {offset:#x} with value {val:#x}"); + } + } + Ok(Action::None) + } +} + +impl Pause for IoApic { + fn pause(&self) -> device::Result<()> { + Ok(()) + } + + fn resume(&self) -> device::Result<()> { + Ok(()) + } +} + +impl MmioDev for IoApic {} + +#[cfg(test)] +#[path = "ioapic_test.rs"] +mod tests; diff --git a/alioth/src/device/ioapic_test.rs b/alioth/src/device/ioapic_test.rs new file mode 100644 index 00000000..20fd3d9e --- /dev/null +++ b/alioth/src/device/ioapic_test.rs @@ -0,0 +1,100 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use assert_matches::assert_matches; +use parking_lot::Mutex; + +use crate::hv::tests::TestIrqFd; +use crate::hv::{Error as HvError, MsiSender}; +use crate::mem::emulated::Mmio; + +use super::{IOREGSEL, IOWIN, IoApic}; + +#[derive(Debug, Default)] +struct TestMsiSender { + messages: Arc>>, +} + +impl MsiSender for TestMsiSender { + type IrqFd = TestIrqFd; + + fn send(&self, addr: u64, data: u32) -> Result<(), HvError> { + self.messages.lock().push((addr, data)); + Ok(()) + } + + fn create_irqfd(&self) -> Result { + Ok(TestIrqFd::default()) + } +} + +#[test] +fn test_ioapic_read_write() { + let io_apic = IoApic::new(TestMsiSender::default()); + + // Write to select register + io_apic.write(IOREGSEL, 4, 0x10).unwrap(); + assert_eq!(io_apic.read(IOREGSEL, 4).unwrap(), 0x10); + + // Write to window register + io_apic.write(IOWIN, 4, 0x12345678).unwrap(); + + // Read back from window register + assert_eq!(io_apic.read(IOWIN, 4).unwrap(), 0x12345678); + + // Select upper part of redirection table entry + io_apic.write(IOREGSEL, 4, 0x11).unwrap(); + + // Write to window register + io_apic.write(IOWIN, 4, 0xabcdef00).unwrap(); + + // Read back from window register + assert_eq!(io_apic.read(IOWIN, 4).unwrap(), 0xabcdef00); + + // Check redirection table entry + let regs = io_apic.regs.lock(); + assert_eq!(regs.redirtbl[0].0, 0xabcdef0012345678); +} + +#[test] +fn test_ioapic_service_pin() { + let msi_sender = TestMsiSender::default(); + let messages = msi_sender.messages.clone(); + let io_apic = IoApic::new(msi_sender); + + // Configure redirection table entry for pin 4 + // Vector 0x24, destination 2, physical, edge triggered + let redirtbl_entry = (2u64 << 56) | 0x24; + + // IOREDTBL for pin 4 is at registers 0x10 + 4*2 = 0x18 and 0x19 + io_apic.write(IOREGSEL, 4, 0x18).unwrap(); + io_apic + .write(IOWIN, 4, (redirtbl_entry & 0xFFFFFFFF) as u64) + .unwrap(); + io_apic.write(IOREGSEL, 4, 0x19).unwrap(); + io_apic + .write(IOWIN, 4, (redirtbl_entry >> 32) as u64) + .unwrap(); + + // Service pin 4 + io_apic.service_pin(4).unwrap(); + + // Check that an MSI was sent + let messages = messages.lock(); + // Expected addr: 0xfee00000 | (dest << 12) = 0xFEE02000 + // Expected data: (trigger_mode=0 << 15) | (delivery_mode=0 << 8) | vector=0x24 = 0x24 + assert_matches!(messages.as_slice(), [(0xfee02000, 0x24)]); +} diff --git a/alioth/src/utils/utils.rs b/alioth/src/utils/utils.rs index 3d1a9730..61d00d21 100644 --- a/alioth/src/utils/utils.rs +++ b/alioth/src/utils/utils.rs @@ -144,6 +144,7 @@ macro_rules! consts { pub const fn name(self) -> &'static str { match self { $($EnumName::$VARIANT => stringify!($VARIANT),)* + #[allow(unreachable_patterns)] _ => "Unknown" } } @@ -157,6 +158,7 @@ macro_rules! consts { f.write_str("::")?; f.write_str(stringify!($VARIANT)) })* + #[allow(unreachable_patterns)] _ => { ::core::fmt::Write::write_char(f, '(')?; ::core::fmt::Debug::fmt(&self.0, f)?; From ea0f27bc40677cf0f4b37103dbbd97e7906d49b5 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sat, 7 Feb 2026 11:03:09 -0800 Subject: [PATCH 3/4] refactor(kvm): simplify capability enabling Consolidate KVM capability checks and enabling into a single helper function to reduce boilerplate. Signed-off-by: Changyuan Lyu --- alioth/src/hv/kvm/vm/vm.rs | 10 ++++++++-- alioth/src/hv/kvm/vm/vm_x86_64.rs | 18 +++--------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/alioth/src/hv/kvm/vm/vm.rs b/alioth/src/hv/kvm/vm/vm.rs index ebdb1d1f..9c27d30d 100644 --- a/alioth/src/hv/kvm/vm/vm.rs +++ b/alioth/src/hv/kvm/vm/vm.rs @@ -128,8 +128,14 @@ impl VmInner { check_extension(&self.fd, id) } - pub fn enable_cap(&self, cap: &KvmEnableCap) -> Result<(), KvmError> { - unsafe { kvm_enable_cap(&self.fd, cap) }.context(kvm_error::EnableCap { cap: cap.cap })?; + pub fn enable_cap(&self, cap: KvmCap, arg0: u64) -> Result<(), KvmError> { + let request = KvmEnableCap { + cap, + args: [arg0, 0, 0, 0], + flags: 0, + pad: [0; 64], + }; + unsafe { kvm_enable_cap(&self.fd, &request) }.context(kvm_error::EnableCap { cap })?; Ok(()) } } diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index 1552b063..0dbdf45c 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -22,7 +22,7 @@ use crate::hv::kvm::sev::SevFd; use crate::hv::kvm::{KvmError, KvmVm, kvm_error}; use crate::hv::{Coco, Kvm, Result, VmConfig, error}; use crate::sys::kvm::{ - KvmCap, KvmCreateGuestMemfd, KvmEnableCap, KvmVmType, KvmX2apicApiFlag, kvm_create_guest_memfd, + KvmCap, KvmCreateGuestMemfd, KvmVmType, KvmX2apicApiFlag, kvm_create_guest_memfd, kvm_create_irqchip, kvm_memory_encrypt_op, kvm_set_identity_map_addr, kvm_set_tss_addr, }; use crate::sys::sev::{ @@ -98,13 +98,7 @@ impl KvmVm { } Some(Coco::AmdSnp { .. }) => { let bitmap = self.vm.check_extension(KvmCap::EXIT_HYPERCALL)?.get(); - let request = KvmEnableCap { - cap: KvmCap::EXIT_HYPERCALL, - args: [bitmap as _, 0, 0, 0], - flags: 0, - pad: [0; 64], - }; - self.vm.enable_cap(&request)?; + self.vm.enable_cap(KvmCap::EXIT_HYPERCALL, bitmap as u64)?; let mut init = KvmSevInit::default(); self.sev_op(KvmSevCmdId::INIT2, Some(&mut init))?; log::debug!("{}: snp init: {init:#x?}", self.vm); @@ -115,13 +109,7 @@ impl KvmVm { let x2apic_caps = KvmX2apicApiFlag::USE_32BIT_IDS | KvmX2apicApiFlag::DISABLE_BROADCAST_QUIRK; - let request = KvmEnableCap { - cap: KvmCap::X2APIC_API, - args: [x2apic_caps.bits(), 0, 0, 0], - flags: 0, - pad: [0; 64], - }; - if let Err(e) = self.vm.enable_cap(&request) { + if let Err(e) = self.vm.enable_cap(KvmCap::X2APIC_API, x2apic_caps.bits()) { log::error!("Failed to enable KVM_CAP_X2APIC_API: {e:?}"); } unsafe { kvm_create_irqchip(&self.vm.fd) }.context(error::CreateDevice)?; From 9f815d5a08d36e386816315a28475358fb773e54 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sat, 7 Feb 2026 11:05:04 -0800 Subject: [PATCH 4/4] feat(x86_64): use userspace emulated IOAPIC Remove the `IrqSender` trait on x86_64 as it is no longer needed. Signed-off-by: Changyuan Lyu --- alioth/src/board/board.rs | 3 --- alioth/src/board/board_x86_64.rs | 20 +++++++++------ alioth/src/device/serial.rs | 42 +++++++++++++++++-------------- alioth/src/hv/hv.rs | 5 ++++ alioth/src/hv/kvm/vm/vm.rs | 16 +++++++++--- alioth/src/hv/kvm/vm/vm_x86_64.rs | 5 ++-- alioth/src/sys/linux/kvm.rs | 1 + alioth/src/vm/vm.rs | 4 +-- 8 files changed, 59 insertions(+), 37 deletions(-) diff --git a/alioth/src/board/board.rs b/alioth/src/board/board.rs index ede0651c..7ffcc751 100644 --- a/alioth/src/board/board.rs +++ b/alioth/src/board/board.rs @@ -211,7 +211,6 @@ where pub config: BoardConfig, pub payload: RwLock>, pub io_devs: RwLock)>>, - #[cfg(target_arch = "aarch64")] pub mmio_devs: RwLock)>>, pub pci_bus: PciBus, #[cfg(target_arch = "x86_64")] @@ -238,7 +237,6 @@ where payload: RwLock::new(None), vcpus: Arc::new(RwLock::new(Vec::new())), io_devs: RwLock::new(Vec::new()), - #[cfg(target_arch = "aarch64")] mmio_devs: RwLock::new(Vec::new()), pci_bus: PciBus::new(), #[cfg(target_arch = "x86_64")] @@ -417,7 +415,6 @@ where for (port, dev) in self.io_devs.read().iter() { self.memory.add_io_dev(*port, dev.clone())?; } - #[cfg(target_arch = "aarch64")] for (addr, dev) in self.mmio_devs.read().iter() { self.memory.add_mmio_dev(*addr, dev.clone())?; } diff --git a/alioth/src/board/board_x86_64.rs b/alioth/src/board/board_x86_64.rs index ef80d1ce..d4f1a83d 100644 --- a/alioth/src/board/board_x86_64.rs +++ b/alioth/src/board/board_x86_64.rs @@ -15,7 +15,6 @@ use std::arch::x86_64::{__cpuid, CpuidResult}; use std::collections::HashMap; use std::iter::zip; -use std::marker::PhantomData; use std::mem::{offset_of, size_of, size_of_val}; use std::path::Path; use std::sync::Arc; @@ -27,13 +26,14 @@ use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes}; use crate::arch::cpuid::CpuidIn; use crate::arch::layout::{ - BIOS_DATA_END, EBDA_END, EBDA_START, MEM_64_START, PORT_ACPI_RESET, PORT_ACPI_SLEEP_CONTROL, - RAM_32_SIZE, + BIOS_DATA_END, EBDA_END, EBDA_START, IOAPIC_START, MEM_64_START, PORT_ACPI_RESET, + PORT_ACPI_SLEEP_CONTROL, RAM_32_SIZE, }; use crate::arch::msr::{IA32_MISC_ENABLE, MiscEnable}; use crate::arch::reg::{Reg, SegAccess, SegReg, SegRegVal}; use crate::arch::sev::SnpPageType; use crate::board::{Board, BoardConfig, CpuTopology, PCIE_MMIO_64_SIZE, Result, VcpuGuard, error}; +use crate::device::ioapic::IoApic; use crate::firmware::acpi::bindings::{ AcpiTableFadt, AcpiTableHeader, AcpiTableRsdp, AcpiTableXsdt3, }; @@ -47,10 +47,13 @@ use crate::mem::mapped::ArcMemPages; use crate::mem::{MemRange, MemRegion, MemRegionEntry, MemRegionType}; use crate::utils::wrapping_sum; -pub struct ArchBoard { +pub struct ArchBoard +where + V: Vm, +{ cpuids: HashMap, sev_ap_eip: AtomicU32, - _phantom: PhantomData, + pub(crate) io_apic: Arc>, } fn add_topology(cpuids: &mut HashMap, func: u32, levels: &[(u8, u16)]) { @@ -70,7 +73,7 @@ fn add_topology(cpuids: &mut HashMap, func: u32, levels: & } impl ArchBoard { - pub fn new(hv: &H, _vm: &V, config: &BoardConfig) -> Result + pub fn new(hv: &H, vm: &V, config: &BoardConfig) -> Result where H: Hypervisor, { @@ -147,10 +150,11 @@ impl ArchBoard { let host_cpuid = unsafe { __cpuid(func) }; cpuids.insert(CpuidIn { func, index: None }, host_cpuid); } + Ok(Self { cpuids, sev_ap_eip: AtomicU32::new(0), - _phantom: PhantomData, + io_apic: Arc::new(IoApic::new(vm.create_msi_sender()?)), }) } } @@ -538,6 +542,8 @@ where } pub fn arch_init(&self) -> Result<()> { + let io_apic = self.arch.io_apic.clone(); + self.mmio_devs.write().push((IOAPIC_START, io_apic)); Ok(()) } } diff --git a/alioth/src/device/serial.rs b/alioth/src/device/serial.rs index 78e30ab4..c46008ee 100644 --- a/alioth/src/device/serial.rs +++ b/alioth/src/device/serial.rs @@ -21,8 +21,9 @@ use bitflags::bitflags; use parking_lot::Mutex; use crate::device::console::{Console, UartRecv}; +use crate::device::ioapic::IoApic; use crate::device::{self, MmioDev, Pause}; -use crate::hv::IrqSender; +use crate::hv::MsiSender; use crate::mem; use crate::mem::emulated::{Action, Mmio}; @@ -183,16 +184,17 @@ struct SerialReg { } #[derive(Debug)] -pub struct Serial { +pub struct Serial { name: Arc, - irq_sender: Arc, + io_apci: Arc>, + pin: u8, reg: Arc>, console: Console, } -impl Mmio for Serial +impl Mmio for Serial where - I: IrqSender, + M: MsiSender, { fn size(&self) -> u64 { 8 @@ -283,9 +285,9 @@ where } } -impl Pause for Serial +impl Pause for Serial where - I: IrqSender, + M: MsiSender, { fn pause(&self) -> device::Result<()> { Ok(()) @@ -296,15 +298,16 @@ where } } -impl MmioDev for Serial where I: IrqSender {} +impl MmioDev for Serial where M: MsiSender {} -struct SerialRecv { +struct SerialRecv { pub name: Arc, - pub irq_sender: Arc, + pub io_apci: Arc>, + pub pin: u8, pub reg: Arc>, } -impl UartRecv for SerialRecv { +impl UartRecv for SerialRecv { fn receive(&self, bytes: &[u8]) { let mut reg = self.reg.lock(); reg.data.extend(bytes); @@ -313,7 +316,7 @@ impl UartRecv for SerialRecv { .contains(InterruptEnable::RECEIVED_DATA_AVAILABLE) { reg.interrupt_identification.set_rx_data_available(); - if let Err(e) = self.irq_sender.send() { + if let Err(e) = self.io_apci.service_pin(self.pin) { log::error!("{}: sending interrupt: {e:?}", self.name); } } @@ -321,16 +324,16 @@ impl UartRecv for SerialRecv { } } -impl Serial +impl Serial where - I: IrqSender + Sync + Send + 'static, + M: MsiSender, { - pub fn new(base_port: u16, irq_sender: I) -> io::Result { - let irq_sender = Arc::new(irq_sender); + pub fn new(base_port: u16, io_apci: Arc>, pin: u8) -> io::Result { let reg = Arc::new(Mutex::new(SerialReg::default())); let name: Arc = Arc::from(format!("serial_{base_port:#x}")); let uart_recv = SerialRecv { - irq_sender: irq_sender.clone(), + io_apci: io_apci.clone(), + pin, name: name.clone(), reg: reg.clone(), }; @@ -338,14 +341,15 @@ where let serial = Serial { name, reg, - irq_sender, + pin, + io_apci, console, }; Ok(serial) } fn send_irq(&self) { - if let Err(e) = self.irq_sender.send() { + if let Err(e) = self.io_apci.service_pin(self.pin) { log::error!("{}: sending interrupt: {e:?}", self.name); } } diff --git a/alioth/src/hv/hv.rs b/alioth/src/hv/hv.rs index a0d79a4a..651c0c9c 100644 --- a/alioth/src/hv/hv.rs +++ b/alioth/src/hv/hv.rs @@ -25,6 +25,7 @@ use std::arch::x86_64::CpuidResult; use std::collections::HashMap; use std::fmt::Debug; use std::os::fd::AsFd; +#[cfg(not(target_arch = "x86_64"))] use std::sync::Arc; use std::thread::JoinHandle; @@ -210,10 +211,12 @@ pub trait Vcpu { } } +#[cfg(not(target_arch = "x86_64"))] pub trait IrqSender: Debug + Send + Sync + 'static { fn send(&self) -> Result<(), Error>; } +#[cfg(not(target_arch = "x86_64"))] impl IrqSender for Arc where T: IrqSender, @@ -328,10 +331,12 @@ pub struct VmConfig { pub trait Vm { type Vcpu: Vcpu; type Memory: VmMemory; + #[cfg(not(target_arch = "x86_64"))] type IrqSender: IrqSender + Send + Sync; type MsiSender: MsiSender; type IoeventFdRegistry: IoeventFdRegistry; fn create_vcpu(&self, index: u16, identity: u64) -> Result; + #[cfg(not(target_arch = "x86_64"))] fn create_irq_sender(&self, pin: u8) -> Result; fn create_msi_sender( &self, diff --git a/alioth/src/hv/kvm/vm/vm.rs b/alioth/src/hv/kvm/vm/vm.rs index 9c27d30d..7cb44470 100644 --- a/alioth/src/hv/kvm/vm/vm.rs +++ b/alioth/src/hv/kvm/vm/vm.rs @@ -29,18 +29,22 @@ use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; use std::thread::JoinHandle; -use libc::{EFD_CLOEXEC, EFD_NONBLOCK, SIGRTMIN, eventfd, write}; +#[cfg(not(target_arch = "x86_64"))] +use libc::write; +use libc::{EFD_CLOEXEC, EFD_NONBLOCK, SIGRTMIN, eventfd}; use parking_lot::{Mutex, RwLock}; use snafu::ResultExt; #[cfg(target_arch = "x86_64")] use crate::arch::sev::{SevPolicy, SnpPageType, SnpPolicy}; use crate::ffi; +#[cfg(not(target_arch = "x86_64"))] +use crate::hv::IrqSender; use crate::hv::kvm::vcpu::KvmVcpu; use crate::hv::kvm::{KvmError, check_extension, kvm_error}; use crate::hv::{ - Error, IoeventFd, IoeventFdRegistry, IrqFd, IrqSender, Kvm, MemMapOption, MsiSender, Result, - Vm, VmConfig, VmMemory, error, + Error, IoeventFd, IoeventFdRegistry, IrqFd, Kvm, MemMapOption, MsiSender, Result, Vm, VmConfig, + VmMemory, error, }; #[cfg(target_arch = "x86_64")] use crate::sys::kvm::KVM_IRQCHIP_IOAPIC; @@ -279,7 +283,7 @@ impl VmMemory for KvmMemory { Ok(()) } } - +#[cfg(not(target_arch = "x86_64"))] #[derive(Debug)] pub struct KvmIrqSender { pin: u8, @@ -287,6 +291,7 @@ pub struct KvmIrqSender { event_fd: OwnedFd, } +#[cfg(not(target_arch = "x86_64"))] impl Drop for KvmIrqSender { fn drop(&mut self) { let pin_flag = 1 << (self.pin as u32); @@ -307,6 +312,7 @@ impl Drop for KvmIrqSender { } } +#[cfg(not(target_arch = "x86_64"))] impl IrqSender for KvmIrqSender { fn send(&self) -> Result<(), Error> { ffi!(unsafe { write(self.event_fd.as_raw_fd(), &1u64 as *const _ as _, 8) }) @@ -640,6 +646,7 @@ impl Vm for KvmVm { #[cfg(target_arch = "aarch64")] type GicV3 = aarch64::KvmGicV3; type IoeventFdRegistry = KvmIoeventFdRegistry; + #[cfg(not(target_arch = "x86_64"))] type IrqSender = KvmIrqSender; #[cfg(target_arch = "aarch64")] type Its = aarch64::KvmIts; @@ -667,6 +674,7 @@ impl Vm for KvmVm { } } + #[cfg(not(target_arch = "x86_64"))] fn create_irq_sender(&self, pin: u8) -> Result { let pin_flag = 1 << pin; if self.vm.pin_map.fetch_or(pin_flag, Ordering::AcqRel) & pin_flag == pin_flag { diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index 0dbdf45c..34e6d573 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -17,13 +17,14 @@ use std::os::fd::{AsFd, AsRawFd, FromRawFd, OwnedFd}; use snafu::ResultExt; use crate::arch::intr::{MsiAddrHi, MsiAddrLo}; +use crate::arch::ioapic::NUM_PINS; use crate::arch::sev::{SevPolicy, SevStatus, SnpPageType, SnpPolicy}; use crate::hv::kvm::sev::SevFd; use crate::hv::kvm::{KvmError, KvmVm, kvm_error}; use crate::hv::{Coco, Kvm, Result, VmConfig, error}; use crate::sys::kvm::{ KvmCap, KvmCreateGuestMemfd, KvmVmType, KvmX2apicApiFlag, kvm_create_guest_memfd, - kvm_create_irqchip, kvm_memory_encrypt_op, kvm_set_identity_map_addr, kvm_set_tss_addr, + kvm_memory_encrypt_op, kvm_set_identity_map_addr, kvm_set_tss_addr, }; use crate::sys::sev::{ KvmSevCmd, KvmSevCmdId, KvmSevInit, KvmSevLaunchMeasure, KvmSevLaunchStart, @@ -112,7 +113,7 @@ impl KvmVm { if let Err(e) = self.vm.enable_cap(KvmCap::X2APIC_API, x2apic_caps.bits()) { log::error!("Failed to enable KVM_CAP_X2APIC_API: {e:?}"); } - unsafe { kvm_create_irqchip(&self.vm.fd) }.context(error::CreateDevice)?; + self.vm.enable_cap(KvmCap::SPLIT_IRQCHIP, NUM_PINS as u64)?; // TODO should be in parameters unsafe { kvm_set_tss_addr(&self.vm.fd, 0xf000_0000) }.context(error::SetVmParam)?; unsafe { kvm_set_identity_map_addr(&self.vm.fd, &0xf000_3000) } diff --git a/alioth/src/sys/linux/kvm.rs b/alioth/src/sys/linux/kvm.rs index 0bfd2bdd..ce24e235 100644 --- a/alioth/src/sys/linux/kvm.rs +++ b/alioth/src/sys/linux/kvm.rs @@ -504,6 +504,7 @@ consts! { KVMCLOCK_CTRL = 76; SIGNAL_MSI = 77; ARM_PSCI_0_2 = 102; + SPLIT_IRQCHIP = 121; X2APIC_API = 129; EXIT_HYPERCALL = 201; // GUEST_MEMFD = 234; diff --git a/alioth/src/vm/vm.rs b/alioth/src/vm/vm.rs index fb687e1b..ba652394 100644 --- a/alioth/src/vm/vm.rs +++ b/alioth/src/vm/vm.rs @@ -163,8 +163,8 @@ where #[cfg(target_arch = "x86_64")] pub fn add_com1(&self) -> Result<(), Error> { - let irq_sender = self.board.vm.create_irq_sender(4)?; - let com1 = Serial::new(PORT_COM1, irq_sender).context(error::CreateConsole)?; + let io_apic = self.board.arch.io_apic.clone(); + let com1 = Serial::new(PORT_COM1, io_apic, 4).context(error::CreateConsole)?; self.board.io_devs.write().push((PORT_COM1, Arc::new(com1))); Ok(()) }