From 30cbe4d36c996fd912d5adf3153cf00c7e6f4f6f Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Wed, 11 Feb 2026 18:06:19 +0000 Subject: [PATCH 01/10] Update guest Cargo.lock files Previously, these were inconsistent. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/tests/rust_guests/simpleguest/Cargo.lock | 20 ++++++++++---------- src/tests/rust_guests/witguest/Cargo.lock | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index 0faead807..8effa6589 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -25,9 +25,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "shlex", @@ -47,9 +47,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flatbuffers" @@ -137,9 +137,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", "hashbrown", @@ -147,9 +147,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "linkme" @@ -422,6 +422,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.17" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" +checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d" diff --git a/src/tests/rust_guests/witguest/Cargo.lock b/src/tests/rust_guests/witguest/Cargo.lock index 3715fc832..bedfdb2da 100644 --- a/src/tests/rust_guests/witguest/Cargo.lock +++ b/src/tests/rust_guests/witguest/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "shlex", @@ -141,9 +141,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flatbuffers" From 41dd4383ada9377fd60e6c51a1f0f347f8326248 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Wed, 4 Feb 2026 12:43:09 +0000 Subject: [PATCH 02/10] SandboxMemoryLayout: do not pass heap and scratch size explicitly These values could conflict with those in the SandboxConfiguration also passed in. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_host/src/mem/layout.rs | 18 ++++++++++++++---- src/hyperlight_host/src/sandbox/snapshot.rs | 5 +---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 925067c21..0e81f50d1 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -200,11 +200,22 @@ impl SandboxMemoryLayout { pub(crate) fn new( cfg: SandboxConfiguration, code_size: usize, - heap_size: usize, - scratch_size: usize, init_data_size: usize, init_data_permissions: Option, ) -> Result { + let heap_size = usize::try_from(cfg.get_heap_size())?; + let scratch_size = cfg.get_scratch_size(); + if scratch_size > Self::MAX_MEMORY_SIZE { + return Err(MemoryRequestTooBig(scratch_size, Self::MAX_MEMORY_SIZE)); + } + let min_scratch_size = hyperlight_common::layout::min_scratch_size( + cfg.get_input_data_size(), + cfg.get_output_data_size(), + ); + if scratch_size < min_scratch_size { + return Err(MemoryRequestTooSmall(scratch_size, min_scratch_size)); + } + let guest_code_offset = 0; // The following offsets are to the fields of the PEB struct itself! let peb_offset = code_size.next_multiple_of(PAGE_SIZE_USIZE); @@ -655,8 +666,7 @@ mod tests { #[test] fn test_get_memory_size() { let sbox_cfg = SandboxConfiguration::default(); - let sbox_mem_layout = - SandboxMemoryLayout::new(sbox_cfg, 4096, 4096, 0x3000, 0, None).unwrap(); + let sbox_mem_layout = SandboxMemoryLayout::new(sbox_cfg, 4096, 0, None).unwrap(); assert_eq!( sbox_mem_layout.get_memory_size().unwrap(), get_expected_memory_size(&sbox_mem_layout) diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index 20cd046ad..dfe048569 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -359,8 +359,6 @@ impl Snapshot { let mut layout = crate::mem::layout::SandboxMemoryLayout::new( cfg, exe_info.loaded_size(), - usize::try_from(cfg.get_heap_size())?, - cfg.get_scratch_size(), guest_blob_size, guest_blob_mem_flags, )?; @@ -612,9 +610,8 @@ mod tests { let mut snapshot_mem = ExclusiveSharedMemory::new(PAGE_SIZE + pt_bytes.len()).unwrap(); snapshot_mem.copy_from_slice(&pt_bytes, PAGE_SIZE).unwrap(); - let cfg = crate::sandbox::SandboxConfiguration::default(); let mgr = SandboxMemoryManager::new( - SandboxMemoryLayout::new(cfg, 4096, 2048, 4096, 0x3000, None).unwrap(), + SandboxMemoryLayout::new(cfg, 4096, 0x3000, None).unwrap(), snapshot_mem, scratch_mem, 0.into(), From 9ad121fd978eefbed725905d9f7db03265f0968e Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Wed, 4 Feb 2026 12:49:51 +0000 Subject: [PATCH 03/10] Move I/O buffers to scratch region These are, for now, still set up at certain distinguished addresses (at the beginning of the region) that the host and the guest can both compute. In the future, hopefully all of this will be replaced with a more flexible set up based on the ring-buffer I/O design doc. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/arch/amd64/layout.rs | 6 +- src/hyperlight_host/benches/benchmarks.rs | 1 + src/hyperlight_host/src/error.rs | 6 + src/hyperlight_host/src/mem/layout.rs | 176 ++++++++---------- src/hyperlight_host/src/mem/memory_region.rs | 6 - src/hyperlight_host/src/mem/mgr.rs | 76 +++++--- src/hyperlight_host/src/sandbox/config.rs | 2 +- .../src/sandbox/initialized_multi_use.rs | 7 +- src/hyperlight_host/src/sandbox/outb.rs | 12 +- src/hyperlight_host/src/sandbox/snapshot.rs | 3 +- .../src/sandbox/uninitialized.rs | 2 +- .../tests/sandbox_host_tests.rs | 18 ++ 12 files changed, 168 insertions(+), 147 deletions(-) diff --git a/src/hyperlight_common/src/arch/amd64/layout.rs b/src/hyperlight_common/src/arch/amd64/layout.rs index b1471b8ce..14a9cd62a 100644 --- a/src/hyperlight_common/src/arch/amd64/layout.rs +++ b/src/hyperlight_common/src/arch/amd64/layout.rs @@ -37,6 +37,8 @@ pub const MAX_GPA: usize = 0x0000_000f_ffff_ffff; /// - A page for the smallest possible non-exception stack /// - (up to) 3 pages for mapping that /// - Two pages for the exception stack and metadata -pub fn min_scratch_size() -> usize { - 12 * crate::vmem::PAGE_SIZE +/// - A page-aligned amount of memory for I/O buffers (for now) +pub fn min_scratch_size(input_data_size: usize, output_data_size: usize) -> usize { + (input_data_size + output_data_size).next_multiple_of(crate::vmem::PAGE_SIZE) + + 12 * crate::vmem::PAGE_SIZE } diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index c9ee5cdfc..ca104f9d3 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -385,6 +385,7 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) { let mut config = SandboxConfiguration::default(); config.set_input_data_size(2 * SIZE + (1024 * 1024)); // 2 * SIZE + 1 MB, to allow 1MB for the rest of the serialized function call + config.set_scratch_size(2 * SIZE + 2 * (1024 * 1024)); // 2 * SIZE + 2 MB, to allow 1MB for data outside of the I/O regions config.set_heap_size(SIZE as u64 * 15); let sandbox = UninitializedSandbox::new( diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 1bca9e944..bb37c31db 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -160,6 +160,11 @@ pub enum HyperlightError { #[error("Memory requested {0} exceeds maximum size allowed {1}")] MemoryRequestTooBig(usize, usize), + /// The memory request is too small to contain everything that is + /// required + #[error("Memory requested {0} exceeds maximum size allowed {1}")] + MemoryRequestTooSmall(usize, usize), + /// Metric Not Found. #[error("Metric Not Found {0:?}.")] MetricNotFound(&'static str), @@ -368,6 +373,7 @@ impl HyperlightError { | HyperlightError::MemoryAllocationFailed(_) | HyperlightError::MemoryProtectionFailed(_) | HyperlightError::MemoryRequestTooBig(_, _) + | HyperlightError::MemoryRequestTooSmall(_, _) | HyperlightError::MetricNotFound(_) | HyperlightError::MmapFailed(_) | HyperlightError::MprotectFailed(_) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 0e81f50d1..92df50cd8 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -27,10 +27,6 @@ limitations under the License. //! +-------------------------------------------+ //! | Guest Heap | //! +-------------------------------------------+ -//! | Output Data | -//! +-------------------------------------------+ -//! | Input Data | -//! +-------------------------------------------+ //! | PEB Struct | (HyperlightPEB size) //! +-------------------------------------------+ //! | Guest Code | @@ -46,17 +42,23 @@ limitations under the License. //! - `InitData` - some extra data that can be loaded onto the sandbox during //! initialization. //! -//! - `HostDefinitions` - the length of this is the `HostFunctionDefinitionSize` -//! field from `SandboxConfiguration` -//! -//! - `InputData` - this is a buffer that is used for input data to the host program. -//! the length of this field is `InputDataSize` from `SandboxConfiguration` -//! -//! - `OutputData` - this is a buffer that is used for output data from host program. -//! the length of this field is `OutputDataSize` from `SandboxConfiguration` -//! //! - `GuestHeap` - this is a buffer that is used for heap data in the guest. the length //! of this field is returned by the `heap_size()` method of this struct +//! +//! There is also a scratch region at the top of physical memory, +//! which is mostly laid out as a large undifferentiated blob of +//! memory, although at present the snapshot process specially +//! privileges the statically allocated input and output data regions: +//! +//! +-------------------------------------------+ (top of physical memory) +//! | Exception Stack, Metadata | +//! +-------------------------------------------+ (1 page below) +//! | Scratch Memory | +//! +-------------------------------------------+ +//! | Output Data | +//! +-------------------------------------------+ +//! | Input Data | +//! +-------------------------------------------+ (scratch size) use std::fmt::Debug; use std::mem::{offset_of, size_of}; @@ -66,13 +68,15 @@ use tracing::{Span, instrument}; #[cfg(feature = "init-paging")] use super::memory_region::MemoryRegionType::PageTables; -use super::memory_region::MemoryRegionType::{Code, Heap, InitData, InputData, OutputData, Peb}; +use super::memory_region::MemoryRegionType::{Code, Heap, InitData, Peb}; use super::memory_region::{ DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegion_, MemoryRegionFlags, MemoryRegionKind, MemoryRegionVecBuilder, }; use super::shared_mem::{ExclusiveSharedMemory, SharedMemory}; -use crate::error::HyperlightError::{GuestOffsetIsInvalid, MemoryRequestTooBig}; +use crate::error::HyperlightError::{ + GuestOffsetIsInvalid, MemoryRequestTooBig, MemoryRequestTooSmall, +}; use crate::sandbox::SandboxConfiguration; use crate::{Result, new_error}; @@ -92,10 +96,6 @@ pub(crate) struct SandboxMemoryLayout { peb_init_data_offset: usize, peb_heap_data_offset: usize, - // The following are the actual values - // that are written to the PEB struct - pub(super) input_data_buffer_offset: usize, - pub(super) output_data_buffer_offset: usize, guest_heap_buffer_offset: usize, init_data_offset: usize, pt_offset: usize, @@ -149,14 +149,6 @@ impl Debug for SandboxMemoryLayout { "Guest Heap Offset", &format_args!("{:#x}", self.peb_heap_data_offset), ) - .field( - "Input Data Buffer Offset", - &format_args!("{:#x}", self.input_data_buffer_offset), - ) - .field( - "Output Data Buffer Offset", - &format_args!("{:#x}", self.output_data_buffer_offset), - ) .field( "Guest Heap Buffer Offset", &format_args!("{:#x}", self.guest_heap_buffer_offset), @@ -181,9 +173,14 @@ impl Debug for SandboxMemoryLayout { impl SandboxMemoryLayout { /// The maximum amount of memory a single sandbox will be allowed. - /// The addressable virtual memory with current paging setup is virtual address 0x0 - 0x40000000 (excl.), - /// However, the memory up to Self::BASE_ADDRESS is not used. - const MAX_MEMORY_SIZE: usize = 0x40000000 - Self::BASE_ADDRESS; + /// + /// Currently, both the scratch region and the snapshot region are + /// bounded by this size. The current value is essentially + /// arbitrary and chosen for historical reasons; the modern + /// sandbox virtual memory layout can support much more, so this + /// could be increased should use cases for larger sandboxes + /// arise. + const MAX_MEMORY_SIZE: usize = 0x4000_0000 - Self::BASE_ADDRESS; /// The base address of the sandbox's memory. #[cfg(feature = "init-paging")] @@ -192,7 +189,7 @@ impl SandboxMemoryLayout { pub(crate) const BASE_ADDRESS: usize = 0x0; // the offset into a sandbox's input/output buffer where the stack starts - const STACK_POINTER_SIZE_BYTES: u64 = 8; + pub(crate) const STACK_POINTER_SIZE_BYTES: u64 = 8; /// Create a new `SandboxMemoryLayout` with the given /// `SandboxConfiguration`, code size and stack/heap size. @@ -229,14 +226,10 @@ impl SandboxMemoryLayout { // The following offsets are the actual values that relate to memory layout, // which are written to PEB struct let peb_address = Self::BASE_ADDRESS + peb_offset; - // make sure input data buffer starts at 4K boundary - let input_data_buffer_offset = (peb_heap_data_offset + size_of::()) - .next_multiple_of(PAGE_SIZE_USIZE); - let output_data_buffer_offset = (input_data_buffer_offset + cfg.get_input_data_size()) - .next_multiple_of(PAGE_SIZE_USIZE); // make sure heap buffer starts at 4K boundary - let guest_heap_buffer_offset = (output_data_buffer_offset + cfg.get_output_data_size()) + let guest_heap_buffer_offset = (peb_heap_data_offset + size_of::()) .next_multiple_of(PAGE_SIZE_USIZE); + // make sure init data starts at 4K boundary let init_data_offset = (guest_heap_buffer_offset + heap_size).next_multiple_of(PAGE_SIZE_USIZE); @@ -252,8 +245,6 @@ impl SandboxMemoryLayout { peb_heap_data_offset, sandbox_memory_config: cfg, code_size, - input_data_buffer_offset, - output_data_buffer_offset, guest_heap_buffer_offset, peb_address, guest_code_offset, @@ -301,11 +292,18 @@ impl SandboxMemoryLayout { self.get_init_data_size_offset() + size_of::() } - /// Get the offset in guest memory to the start of output data. + /// Get the guest virtual address of the start of output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] - #[cfg(test)] - pub(crate) fn get_output_data_offset(&self) -> usize { - self.output_data_buffer_offset + pub(crate) fn get_output_data_buffer_gva(&self) -> u64 { + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + + self.sandbox_memory_config.get_input_data_size() as u64 + } + + /// Get the offset into the host scratch buffer of the start of + /// the output data. + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_output_data_buffer_scratch_host_offset(&self) -> usize { + self.sandbox_memory_config.get_input_data_size() } /// Get the offset in guest memory to the input data size. @@ -323,6 +321,29 @@ impl SandboxMemoryLayout { self.get_input_data_size_offset() + size_of::() } + /// Get the guest virtual address of the start of input data + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + fn get_input_data_buffer_gva(&self) -> u64 { + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + } + + /// Get the offset into the host scratch buffer of the start of + /// the input data + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_input_data_buffer_scratch_host_offset(&self) -> usize { + 0 + } + + /// Get the offset into the host scratch buffer of the start of + /// the input data + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_first_free_scratch_gpa(&self) -> u64 { + (hyperlight_common::layout::scratch_base_gpa(self.scratch_size) as usize + + self.sandbox_memory_config.get_input_data_size() + + self.sandbox_memory_config.get_output_data_size()) + .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) as u64 + } + /// Get the offset in guest memory to where the guest dispatch function /// pointer is written #[instrument(skip_all, parent = Span::current(), level= "Trace")] @@ -424,47 +445,12 @@ impl SandboxMemoryLayout { } // PEB - let input_data_offset = builder.push_page_aligned( + let heap_offset = builder.push_page_aligned( size_of::(), MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, Peb, ); - let expected_input_data_offset = TryInto::::try_into(self.input_data_buffer_offset)?; - - if input_data_offset != expected_input_data_offset { - return Err(new_error!( - "Input Data offset does not match expected Input Data offset expected: {}, actual: {}", - expected_input_data_offset, - input_data_offset - )); - } - - // guest input data - let output_data_offset = builder.push_page_aligned( - self.sandbox_memory_config.get_input_data_size(), - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - InputData, - ); - - let expected_output_data_offset = - TryInto::::try_into(self.output_data_buffer_offset)?; - - if output_data_offset != expected_output_data_offset { - return Err(new_error!( - "Output Data offset does not match expected Output Data offset expected: {}, actual: {}", - expected_output_data_offset, - output_data_offset - )); - } - - // guest output data - let heap_offset = builder.push_page_aligned( - self.sandbox_memory_config.get_output_data_size(), - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - OutputData, - ); - let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset)?; if heap_offset != expected_heap_offset { @@ -595,8 +581,10 @@ impl SandboxMemoryLayout { .get_input_data_size() .try_into()?, )?; - let addr = get_address!(input_data_buffer_offset); - shared_mem.write_u64(self.get_input_data_pointer_offset(), addr)?; + shared_mem.write_u64( + self.get_input_data_pointer_offset(), + self.get_input_data_buffer_gva(), + )?; // Set up output buffer pointer shared_mem.write_u64( @@ -605,8 +593,10 @@ impl SandboxMemoryLayout { .get_output_data_size() .try_into()?, )?; - let addr = get_address!(output_data_buffer_offset); - shared_mem.write_u64(self.get_output_data_pointer_offset(), addr)?; + shared_mem.write_u64( + self.get_output_data_pointer_offset(), + self.get_output_data_buffer_gva(), + )?; // Set up init data pointer shared_mem.write_u64( @@ -623,17 +613,10 @@ impl SandboxMemoryLayout { // End of setting up the PEB - // Initialize the stack pointers of input data and output data - // to point to the ninth (index 8) byte, which is the first free address - // of the each respective stack. The first 8 bytes are the stack pointer itself. - shared_mem.write_u64( - self.input_data_buffer_offset, - Self::STACK_POINTER_SIZE_BYTES, - )?; - shared_mem.write_u64( - self.output_data_buffer_offset, - Self::STACK_POINTER_SIZE_BYTES, - )?; + // The input and output data regions do not have their layout + // initialised here, because they are in the scratch + // region---they are instead set in + // [`SandboxMemoryManager::update_scratch_bookkeeping`]. Ok(()) } @@ -647,17 +630,12 @@ mod tests { // helper func for testing fn get_expected_memory_size(layout: &SandboxMemoryLayout) -> usize { - let cfg = layout.sandbox_memory_config; let mut expected_size = 0; // in order of layout expected_size += layout.code_size; expected_size += size_of::().next_multiple_of(PAGE_SIZE_USIZE); - expected_size += cfg.get_input_data_size().next_multiple_of(PAGE_SIZE_USIZE); - - expected_size += cfg.get_output_data_size().next_multiple_of(PAGE_SIZE_USIZE); - expected_size += layout.heap_size.next_multiple_of(PAGE_SIZE_USIZE); expected_size diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index dbe83f1b0..9ddaa894a 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -126,12 +126,6 @@ pub enum MemoryRegionType { InitData, /// The region contains the PEB Peb, - /// The region contains the Host Function Definitions - HostFunctionDefinitions, - /// The region contains the Input Data - InputData, - /// The region contains the Output Data - OutputData, /// The region contains the Heap Heap, /// The region contains the Guard Page diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index ea390e8b2..204043448 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -176,6 +176,12 @@ where &mut self.shared_mem } + /// Get `SharedMemory` in `self` as a mutable reference + #[cfg(any(gdb, test))] + pub(crate) fn get_scratch_mem_mut(&mut self) -> &mut S { + &mut self.scratch_mem + } + /// Create a snapshot with the given mapped regions pub(crate) fn snapshot( &mut self, @@ -289,8 +295,8 @@ impl SandboxMemoryManager { /// Reads a host function call from memory #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_host_function_call(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -304,8 +310,8 @@ impl SandboxMemoryManager { let mut builder = FlatBufferBuilder::new(); let data = res.encode(&mut builder); - self.shared_mem.push_buffer( - self.layout.input_data_buffer_offset, + self.scratch_mem.push_buffer( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), data, ) @@ -321,19 +327,20 @@ impl SandboxMemoryManager { ) })?; - self.shared_mem.push_buffer( - self.layout.input_data_buffer_offset, + self.scratch_mem.push_buffer( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), buffer, - ) + )?; + Ok(()) } /// Reads a function call result from memory. /// A function call result can be either an error or a successful return value. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_function_call_result(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -341,8 +348,8 @@ impl SandboxMemoryManager { /// Read guest log data from the `SharedMemory` contained within `self` #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn read_guest_log_data(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -350,8 +357,8 @@ impl SandboxMemoryManager { pub(crate) fn clear_io_buffers(&mut self) { // Clear the output data buffer loop { - let Ok(_) = self.shared_mem.try_pop_buffer_into::>( - self.layout.output_data_buffer_offset, + let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) else { break; @@ -359,8 +366,8 @@ impl SandboxMemoryManager { } // Clear the input data buffer loop { - let Ok(_) = self.shared_mem.try_pop_buffer_into::>( - self.layout.input_data_buffer_offset, + let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), ) else { break; @@ -402,25 +409,36 @@ impl SandboxMemoryManager { Ok((gsnapshot, gscratch)) } - fn update_scratch_bookkeeping(&mut self, snapshot_pt_base_gpa: u64) -> Result<()> { + #[inline] + fn update_scratch_bookkeeping_item(&mut self, offset: u64, value: u64) -> Result<()> { let scratch_size = self.scratch_mem.mem_size(); + let base_offset = scratch_size - offset as usize; + self.scratch_mem.write::(base_offset, value) + } - let size_offset = - scratch_size - hyperlight_common::layout::SCRATCH_TOP_SIZE_OFFSET as usize; - self.scratch_mem - .write::(size_offset, scratch_size as u64)?; + fn update_scratch_bookkeeping(&mut self, snapshot_pt_base_gpa: u64) -> Result<()> { + use hyperlight_common::layout::*; + let scratch_size = self.scratch_mem.mem_size(); + self.update_scratch_bookkeeping_item(SCRATCH_TOP_SIZE_OFFSET, scratch_size as u64)?; + self.update_scratch_bookkeeping_item( + SCRATCH_TOP_ALLOCATOR_OFFSET, + self.layout.get_first_free_scratch_gpa(), + )?; + self.update_scratch_bookkeeping_item( + SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET, + snapshot_pt_base_gpa, + )?; - let alloc_offset = - scratch_size - hyperlight_common::layout::SCRATCH_TOP_ALLOCATOR_OFFSET as usize; + // Initialise the guest input and output data buffers in + // scratch memory. TODO: remove the need for this. self.scratch_mem.write::( - alloc_offset, - hyperlight_common::layout::scratch_base_gpa(scratch_size), + self.layout.get_input_data_buffer_scratch_host_offset(), + SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, + )?; + self.scratch_mem.write::( + self.layout.get_output_data_buffer_scratch_host_offset(), + SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, )?; - - let snapshot_pt_base_gpa_offset = scratch_size - - hyperlight_common::layout::SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET as usize; - self.scratch_mem - .write::(snapshot_pt_base_gpa_offset, snapshot_pt_base_gpa)?; Ok(()) } diff --git a/src/hyperlight_host/src/sandbox/config.rs b/src/hyperlight_host/src/sandbox/config.rs index 044c5f2fb..f12387a0b 100644 --- a/src/hyperlight_host/src/sandbox/config.rs +++ b/src/hyperlight_host/src/sandbox/config.rs @@ -92,7 +92,7 @@ impl SandboxConfiguration { /// The default heap size of a hyperlight sandbox pub const DEFAULT_HEAP_SIZE: u64 = 131072; /// The default size of the scratch region - pub const DEFAULT_SCRATCH_SIZE: usize = 0x40000; + pub const DEFAULT_SCRATCH_SIZE: usize = 0x48000; #[allow(clippy::too_many_arguments)] /// Create a new configuration for a sandbox with the given sizes. diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 138ebdaf6..8092e52db 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -1030,7 +1030,7 @@ mod tests { assert_eq!(res, 0); } - // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (1K) and heap(14K). + // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (24K) and heap(20K). // This test effectively ensures that the stack is being properly reset after each call and we are not leaking memory in the Guest. #[test] fn test_with_small_stack_and_heap() { @@ -1038,7 +1038,10 @@ mod tests { cfg.set_heap_size(20 * 1024); // min_scratch_size already includes 1 page (4k on most // platforms) of guest stack, so add 20k more to get 24k total - let min_scratch = hyperlight_common::layout::min_scratch_size(); + let min_scratch = hyperlight_common::layout::min_scratch_size( + cfg.get_input_data_size(), + cfg.get_output_data_size(), + ); cfg.set_scratch_size(min_scratch + 0x5000); let mut sbox1: MultiUseSandbox = { diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index 688f37a8d..d79bd23bd 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -297,8 +297,8 @@ mod tests { let log_msg = new_guest_log_data(LogLevel::Information); let guest_log_data_buffer: Vec = log_msg.try_into().unwrap(); - let offset = mgr.layout.get_output_data_offset(); - mgr.get_shared_mem_mut() + let offset = mgr.layout.get_output_data_buffer_scratch_host_offset(); + mgr.get_scratch_mem_mut() .push_buffer( offset, sandbox_cfg.get_output_data_size(), @@ -335,9 +335,9 @@ mod tests { let log_data = new_guest_log_data(level); let guest_log_data_buffer: Vec = log_data.clone().try_into().unwrap(); - mgr.get_shared_mem_mut() + mgr.get_scratch_mem_mut() .push_buffer( - layout.get_output_data_offset(), + layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), guest_log_data_buffer.as_slice(), ) @@ -417,9 +417,9 @@ mod tests { subscriber.clear(); let guest_log_data_buffer: Vec = log_data.try_into().unwrap(); - mgr.get_shared_mem_mut() + mgr.get_scratch_mem_mut() .push_buffer( - layout.get_output_data_offset(), + layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), guest_log_data_buffer.as_slice(), ) diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index dfe048569..9e6622199 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -590,7 +590,8 @@ mod tests { } fn make_simple_pt_mems() -> (SandboxMemoryManager, u64) { - let scratch_mem = ExclusiveSharedMemory::new(PAGE_SIZE).unwrap(); + let cfg = crate::sandbox::SandboxConfiguration::default(); + let scratch_mem = ExclusiveSharedMemory::new(cfg.get_scratch_size()).unwrap(); let pt_base = PAGE_SIZE + SandboxMemoryLayout::BASE_ADDRESS; let pt_buf = GuestPageTableBuffer::new(pt_base); let mapping = Mapping { diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 4e13fa0d6..803891bef 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -1142,7 +1142,7 @@ mod tests { { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(32 * 1024 * 1024); // 32MB heap - cfg.set_scratch_size(256 * 1024); // 256KB scratch + cfg.set_scratch_size(256 * 1024 * 2); // 512KB scratch (256KB will be input/output) cfg.set_input_data_size(128 * 1024); // 128KB input cfg.set_output_data_size(128 * 1024); // 128KB output diff --git a/src/hyperlight_host/tests/sandbox_host_tests.rs b/src/hyperlight_host/tests/sandbox_host_tests.rs index 6d040f061..e98faac88 100644 --- a/src/hyperlight_host/tests/sandbox_host_tests.rs +++ b/src/hyperlight_host/tests/sandbox_host_tests.rs @@ -212,6 +212,7 @@ fn incorrect_parameter_num() { #[test] fn max_memory_sandbox() { let mut cfg = SandboxConfiguration::default(); + cfg.set_scratch_size(0x40004000); cfg.set_input_data_size(0x40000000); let a = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), @@ -224,6 +225,23 @@ fn max_memory_sandbox() { )); } +#[test] +fn small_scratch_sandbox() { + let mut cfg = SandboxConfiguration::default(); + cfg.set_scratch_size(0x48000); + cfg.set_input_data_size(0x24000); + cfg.set_output_data_size(0x24000); + let a = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + Some(cfg), + ); + + assert!(matches!( + a.unwrap_err(), + HyperlightError::MemoryRequestTooSmall(..) + )); +} + #[test] fn iostack_is_working() { for mut sandbox in get_simpleguest_sandboxes(None).into_iter() { From 15db0fecbd701b5f744e1dbc3a4afb8f7770d3f0 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Wed, 4 Feb 2026 14:28:29 +0000 Subject: [PATCH 04/10] Disable MADV_DONTNEED zeroing optimisation on mshv In 1f94fb30, some odd behaviour with MSHV and MAP_PRIVATE mappings was noticed. It turns out that this was actually due to the hypervisor pinning the old page structures, which meant that `madvise(MADV_DONTNEED)` resulted in the userspace view of the memory in question becoming completely divorced from the hypervisor view. Switching (back) to MAP_SHARED "fixed" this only because it meant that zeroing was actually ineffective for both the host and the guest (so at least host writes were reflected in the guest): `madvise(2)` notes that shared anonymous mappings will have their contents repopulated on access after an `MADV_DONTNEED`. This commit switches back to `MAP_PRIVATE` (so that the optimisation is correct on KVM, where it works) and disables the optimisation on MSHV, where the scratch region will always be zeroed by writing zeroes to it. The original intent of lazily zeroing/populating the memory will likely only be possible on MSHV with kernel/hypervisor changes for support. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_host/src/mem/shared_mem.rs | 32 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index cae355aae..799a41b08 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -169,6 +169,8 @@ unsafe impl Send for GuestSharedMemory {} /// communication buffers, allowing it to be used concurrently with a /// GuestSharedMemory. /// +/// # Concurrency model +/// /// Given future requirements for asynchronous I/O with a minimum /// amount of copying (e.g. WASIp3 streams), we would like it to be /// possible to safely access these buffers concurrently with the @@ -299,6 +301,24 @@ unsafe impl Send for GuestSharedMemory {} /// \[4\] P1152R0: Deprecating `volatile`. JF Bastien. /// \[5\] P1382R1: `volatile_load` and `volatile_store`. JF Bastien, Paul McKenney, Jeffrey Yasskin, and the indefatigable TBD. /// \[6\] Documentation for std::sync::atomic::fence. +/// +/// # Note \[Keeping mappings in sync between userspace and the guest\] +/// +/// When using this structure with mshv on Linux, it is necessary to +/// be a little bit careful: since the hypervisor is not directly +/// integrated with the host kernel virtual memory subsystem, it is +/// easy for the memory region in userspace to get out of sync with +/// the memory region mapped into the guest. Generally speaking, when +/// the [`SharedMemory`] is mapped into a partition, the MSHV kernel +/// module will call `pin_user_pages(FOLL_PIN|FOLL_WRITE)` on it, +/// which will eagerly do any CoW, etc needing to obtain backing pages +/// pinned in memory, and then map precisely those backing pages into +/// the virtual machine. After that, the backing pages mapped into the +/// VM will not change until the region is unmapped or remapped. This +/// means that code in this module needs to be very careful to avoid +/// changing the backing pages of the region in the host userspace, +/// since that would result in hyperlight-host's view of the memory +/// becoming completely divorced from the view of the VM. #[derive(Clone, Debug)] pub struct HostSharedMemory { region: Arc, @@ -314,11 +334,9 @@ impl ExclusiveSharedMemory { #[cfg(target_os = "linux")] #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub fn new(min_size_bytes: usize) -> Result { - #[cfg(miri)] - use libc::MAP_PRIVATE; - use libc::{MAP_ANONYMOUS, MAP_FAILED, PROT_READ, PROT_WRITE, c_int, mmap, off_t, size_t}; + use libc::{MAP_ANONYMOUS, MAP_PRIVATE, MAP_FAILED, PROT_READ, PROT_WRITE, c_int, mmap, off_t, size_t}; #[cfg(not(miri))] - use libc::{MAP_NORESERVE, MAP_SHARED, PROT_NONE, mprotect}; + use libc::{MAP_NORESERVE, PROT_NONE, mprotect}; if min_size_bytes == 0 { return Err(new_error!("Cannot create shared memory with size 0")); @@ -346,7 +364,7 @@ impl ExclusiveSharedMemory { // allocate the memory #[cfg(not(miri))] - let flags = MAP_ANONYMOUS | MAP_SHARED | MAP_NORESERVE; + let flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE; #[cfg(miri)] let flags = MAP_ANONYMOUS | MAP_PRIVATE; @@ -764,7 +782,9 @@ pub trait SharedMemory { #[allow(unused_mut)] // unused on some platforms, although not others let mut do_copy = true; // TODO: Compare & add heuristic thresholds: mmap, MADV_DONTNEED, MADV_REMOVE, MADV_FREE (?) - #[cfg(target_os = "linux")] + // TODO: Find a similar lazy zeroing approach that works on MSHV. + // (See Note [Keeping mappings in sync between userspace and the guest]) + #[cfg(all(target_os = "linux", feature = "kvm", not(any(feature = "mshv3"))))] unsafe { let ret = libc::madvise( e.region.ptr as *mut libc::c_void, From 6ab5ef70df2d1ab6e49433eb24fa062919079764 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Thu, 5 Feb 2026 12:57:29 +0000 Subject: [PATCH 05/10] Avoid passing the guest dispatch pointer in memory In preparation for the snapshot becoming immutable, this gets rid of on of the last places that assume identity mapping & require early memory writes in the sandbox. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_common/src/mem.rs | 1 - src/hyperlight_guest_bin/src/lib.rs | 13 +++-- .../src/hypervisor/hyperlight_vm.rs | 53 ++++++++++++------ src/hyperlight_host/src/mem/layout.rs | 15 ------ src/hyperlight_host/src/mem/mgr.rs | 42 ++++----------- src/hyperlight_host/src/mem/shared_mem.rs | 5 +- .../src/sandbox/initialized_multi_use.rs | 8 ++- src/hyperlight_host/src/sandbox/outb.rs | 6 +-- src/hyperlight_host/src/sandbox/snapshot.rs | 54 ++++++++++++------- .../src/sandbox/uninitialized_evolve.rs | 19 +------ 10 files changed, 104 insertions(+), 112 deletions(-) diff --git a/src/hyperlight_common/src/mem.rs b/src/hyperlight_common/src/mem.rs index 35fa9c183..3c86f8ecf 100644 --- a/src/hyperlight_common/src/mem.rs +++ b/src/hyperlight_common/src/mem.rs @@ -31,7 +31,6 @@ pub struct GuestMemoryRegion { #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct HyperlightPEB { - pub guest_function_dispatch_ptr: u64, pub input_stack: GuestMemoryRegion, pub output_stack: GuestMemoryRegion, pub init_data: GuestMemoryRegion, diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index 69ffc37af..cc1665abd 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -178,8 +178,13 @@ unsafe extern "C" { /// Architecture-nonspecific initialisation: set up the heap, /// coordinate some addresses and configuration with the host, and run /// user initialisation -pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max_log_level: u64) { - let peb_ptr = unsafe { +pub(crate) extern "C" fn generic_init( + peb_address: u64, + seed: u64, + ops: u64, + max_log_level: u64, +) -> u64 { + unsafe { GUEST_HANDLE = GuestHandle::init(peb_address as *mut HyperlightPEB); #[allow(static_mut_refs)] let peb_ptr = GUEST_HANDLE.peb().unwrap(); @@ -202,8 +207,6 @@ pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc(); unsafe { - (*peb_ptr).guest_function_dispatch_ptr = dispatch_function as usize as u64; - let srand_seed = (((peb_address << 8) ^ (seed >> 4)) >> 32) as u32; // Set the seed for the random number generator for C code using rand; srand(srand_seed); @@ -231,6 +234,8 @@ pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max unsafe { hyperlight_main(); } + + dispatch_function as usize as u64 } #[cfg(feature = "macros")] diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index f8285d425..ba2cb8eb6 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -68,6 +68,7 @@ use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; use crate::sandbox::outb::{HandleOutbError, handle_outb}; +use crate::sandbox::snapshot::NextAction; #[cfg(feature = "mem_profile")] use crate::sandbox::trace::MemTraceInfo; #[cfg(crashdump)] @@ -85,7 +86,7 @@ pub(crate) struct HyperlightVm { #[cfg(not(gdb))] vm: Box, page_size: usize, - entrypoint: Option, // only present if this vm has not yet been initialised + entrypoint: NextAction, // only present if this vm has not yet been initialised rsp_gva: u64, interrupt_handle: Arc, @@ -120,6 +121,8 @@ pub enum DispatchGuestCallError { Run(#[from] RunVmError), #[error("Failed to setup registers: {0}")] SetupRegs(RegisterError), + #[error("VM was uninitialized")] + Uninitialized, } impl DispatchGuestCallError { @@ -129,7 +132,7 @@ impl DispatchGuestCallError { // These errors poison the sandbox because they can leave it in an inconsistent state // by returning before the guest can unwind properly DispatchGuestCallError::Run(_) => true, - DispatchGuestCallError::SetupRegs(_) => false, + DispatchGuestCallError::SetupRegs(_) | DispatchGuestCallError::Uninitialized => false, } } @@ -338,7 +341,7 @@ impl HyperlightVm { snapshot_mem: GuestSharedMemory, scratch_mem: GuestSharedMemory, _pml4_addr: u64, - entrypoint: Option, + entrypoint: NextAction, rsp_gva: u64, #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, @@ -438,9 +441,9 @@ impl HyperlightVm { ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; // Add breakpoint to the entry point address, if we are going to initialise ret.vm.set_debug(true).map_err(VmError::Debug)?; - if let Some(entrypoint) = entrypoint { + if let NextAction::Initialise(initialise) = entrypoint { ret.vm - .add_hw_breakpoint(entrypoint) + .add_hw_breakpoint(initialise) .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; } } @@ -462,7 +465,7 @@ impl HyperlightVm { guest_max_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> std::result::Result<(), InitializeError> { - let Some(entrypoint) = self.entrypoint else { + let NextAction::Initialise(initialise) = self.entrypoint else { return Ok(()); }; @@ -474,7 +477,7 @@ impl HyperlightVm { }; let regs = CommonRegisters { - rip: entrypoint, + rip: initialise, // We usually keep the top of the stack 16-byte // aligned. However, the ABI requirement is that the stack // be aligned _before a call instruction_, which means @@ -508,6 +511,7 @@ impl HyperlightVm { return Err(InitializeError::InvalidStackPointer(regs.rsp)); } self.rsp_gva = regs.rsp; + self.entrypoint = NextAction::Call(regs.rax); Ok(()) } @@ -631,6 +635,16 @@ impl HyperlightVm { self.rsp_gva = gva; } + /// Get the current entrypoint action + pub(crate) fn get_entrypoint(&mut self) -> NextAction { + self.entrypoint + } + + /// Set the current entrypoint action + pub(crate) fn set_entrypoint(&mut self, entrypoint: NextAction) { + self.entrypoint = entrypoint + } + /// Dispatch a call from the host to the guest using the given pointer /// to the dispatch function _in the guest's address space_. /// @@ -641,14 +655,16 @@ impl HyperlightVm { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn dispatch_call_from_host( &mut self, - dispatch_func_addr: RawPtr, mem_mgr: &mut SandboxMemoryManager, host_funcs: &Arc>, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> std::result::Result<(), DispatchGuestCallError> { + let NextAction::Call(dispatch_func_addr) = self.entrypoint else { + return Err(DispatchGuestCallError::Uninitialized); + }; // set RIP and RSP, reset others let regs = CommonRegisters { - rip: dispatch_func_addr.into(), + rip: dispatch_func_addr, // We usually keep the top of the stack 16-byte // aligned. However, the ABI requirement is that the stack // be aligned _before a call instruction_, which means @@ -755,13 +771,13 @@ impl HyperlightVm { match exit_reason { #[cfg(gdb)] Ok(VmExit::Debug { dr6, exception }) => { + let initialise = match self.entrypoint { + NextAction::Initialise(initialise) => initialise, + _ => 0, + }; // Handle debug event (breakpoints) - let stop_reason = arch::vcpu_stop_reason( - self.vm.as_mut(), - dr6, - self.entrypoint.unwrap_or(0), - exception, - )?; + let stop_reason = + arch::vcpu_stop_reason(self.vm.as_mut(), dr6, initialise, exception)?; if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { break Err(e.into()); } @@ -1133,6 +1149,11 @@ impl HyperlightVm { .and_then(|name| name.to_os_string().into_string().ok()) }); + let initialise = match self.entrypoint { + NextAction::Initialise(initialise) => initialise, + _ => 0, + }; + // Include dynamically mapped regions // TODO: include the snapshot and scratch regions let regions: Vec = self.get_mapped_regions().cloned().collect(); @@ -1140,7 +1161,7 @@ impl HyperlightVm { regions, regs, xsave.to_vec(), - self.entrypoint.unwrap_or(0), + initialise, self.rt_cfg.binary_path.clone(), filename, ))) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 92df50cd8..4a9a5e244 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -90,7 +90,6 @@ pub(crate) struct SandboxMemoryLayout { /// The following fields are offsets to the actual PEB struct fields. /// They are used when writing the PEB struct itself peb_offset: usize, - peb_guest_dispatch_function_ptr_offset: usize, // set by guest in guest entrypoint peb_input_data_offset: usize, peb_output_data_offset: usize, peb_init_data_offset: usize, @@ -129,10 +128,6 @@ impl Debug for SandboxMemoryLayout { .field("PEB Address", &format_args!("{:#x}", self.peb_address)) .field("PEB Offset", &format_args!("{:#x}", self.peb_offset)) .field("Code Size", &format_args!("{:#x}", self.code_size)) - .field( - "Guest Dispatch Function Pointer Offset", - &format_args!("{:#x}", self.peb_guest_dispatch_function_ptr_offset), - ) .field( "Input Data Offset", &format_args!("{:#x}", self.peb_input_data_offset), @@ -216,8 +211,6 @@ impl SandboxMemoryLayout { let guest_code_offset = 0; // The following offsets are to the fields of the PEB struct itself! let peb_offset = code_size.next_multiple_of(PAGE_SIZE_USIZE); - let peb_guest_dispatch_function_ptr_offset = - peb_offset + offset_of!(HyperlightPEB, guest_function_dispatch_ptr); let peb_input_data_offset = peb_offset + offset_of!(HyperlightPEB, input_stack); let peb_output_data_offset = peb_offset + offset_of!(HyperlightPEB, output_stack); let peb_init_data_offset = peb_offset + offset_of!(HyperlightPEB, init_data); @@ -238,7 +231,6 @@ impl SandboxMemoryLayout { Ok(Self { peb_offset, heap_size, - peb_guest_dispatch_function_ptr_offset, peb_input_data_offset, peb_output_data_offset, peb_init_data_offset, @@ -344,13 +336,6 @@ impl SandboxMemoryLayout { .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) as u64 } - /// Get the offset in guest memory to where the guest dispatch function - /// pointer is written - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_dispatch_function_pointer_offset(&self) -> usize { - self.peb_guest_dispatch_function_ptr_offset - } - /// Get the offset in guest memory to the heap size #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_heap_size_offset(&self) -> usize { diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 204043448..d0cb775ad 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -24,11 +24,10 @@ use tracing::{Span, instrument}; use super::layout::SandboxMemoryLayout; use super::memory_region::MemoryRegion; -use super::ptr::{GuestPtr, RawPtr}; -use super::ptr_offset::Offset; +use super::ptr::RawPtr; use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory}; use crate::hypervisor::regs::CommonSpecialRegisters; -use crate::sandbox::snapshot::Snapshot; +use crate::sandbox::snapshot::{NextAction, Snapshot}; use crate::{Result, new_error}; /// A struct that is responsible for laying out and managing the memory @@ -44,7 +43,7 @@ pub(crate) struct SandboxMemoryManager { /// Pointer to where to load memory from pub(crate) load_addr: RawPtr, /// Offset for the execution entrypoint from `load_addr` - pub(crate) entrypoint_offset: Option, + pub(crate) entrypoint: NextAction, /// How many memory regions were mapped after sandbox creation pub(crate) mapped_rgns: u64, /// Buffer for accumulating guest abort messages @@ -152,14 +151,14 @@ where shared_mem: S, scratch_mem: S, load_addr: RawPtr, - entrypoint_offset: Option, + entrypoint: NextAction, ) -> Self { Self { layout, shared_mem, scratch_mem, load_addr, - entrypoint_offset, + entrypoint, mapped_rgns: 0, abort_buffer: Vec::new(), } @@ -176,12 +175,6 @@ where &mut self.shared_mem } - /// Get `SharedMemory` in `self` as a mutable reference - #[cfg(any(gdb, test))] - pub(crate) fn get_scratch_mem_mut(&mut self) -> &mut S { - &mut self.scratch_mem - } - /// Create a snapshot with the given mapped regions pub(crate) fn snapshot( &mut self, @@ -190,6 +183,7 @@ where root_pt_gpa: u64, rsp_gva: u64, sregs: CommonSpecialRegisters, + entrypoint: NextAction, ) -> Result { Snapshot::new( &mut self.shared_mem, @@ -201,6 +195,7 @@ where root_pt_gpa, rsp_gva, sregs, + entrypoint, ) } } @@ -212,14 +207,13 @@ impl SandboxMemoryManager { shared_mem.copy_from_slice(s.memory(), 0)?; let scratch_mem = ExclusiveSharedMemory::new(s.layout().get_scratch_size())?; let load_addr: RawPtr = RawPtr::try_from(layout.get_guest_code_address())?; - let entrypoint_gva = s.preinitialise(); - let entrypoint_offset = entrypoint_gva.map(|x| (x - u64::from(&load_addr)).into()); + let entrypoint = s.entrypoint(); Ok(Self::new( layout, shared_mem, scratch_mem, load_addr, - entrypoint_offset, + entrypoint, )) } @@ -257,7 +251,7 @@ impl SandboxMemoryManager { scratch_mem: hscratch, layout: self.layout, load_addr: self.load_addr.clone(), - entrypoint_offset: self.entrypoint_offset, + entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: self.abort_buffer, }; @@ -266,7 +260,7 @@ impl SandboxMemoryManager { scratch_mem: gscratch, layout: self.layout, load_addr: self.load_addr.clone(), - entrypoint_offset: self.entrypoint_offset, + entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: Vec::new(), // Guest doesn't need abort buffer }; @@ -278,20 +272,6 @@ impl SandboxMemoryManager { } impl SandboxMemoryManager { - /// Get the address of the dispatch function in memory - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_pointer_to_dispatch_function(&self) -> Result { - let guest_dispatch_function_ptr = self - .shared_mem - .read::(self.layout.get_dispatch_function_pointer_offset())?; - - // This pointer is written by the guest library but is accessible to - // the guest engine so we should bounds check it before we return it. - - let guest_ptr = GuestPtr::try_from(RawPtr::from(guest_dispatch_function_ptr))?; - guest_ptr.absolute() - } - /// Reads a host function call from memory #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_host_function_call(&mut self) -> Result { diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 799a41b08..ecf0f66a6 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -334,7 +334,10 @@ impl ExclusiveSharedMemory { #[cfg(target_os = "linux")] #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub fn new(min_size_bytes: usize) -> Result { - use libc::{MAP_ANONYMOUS, MAP_PRIVATE, MAP_FAILED, PROT_READ, PROT_WRITE, c_int, mmap, off_t, size_t}; + use libc::{ + MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, PROT_READ, PROT_WRITE, c_int, mmap, off_t, + size_t, + }; #[cfg(not(miri))] use libc::{MAP_NORESERVE, PROT_NONE, mprotect}; diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 8092e52db..c31fcda45 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -42,7 +42,6 @@ use crate::mem::memory_region::MemoryRegion; #[cfg(unix)] use crate::mem::memory_region::{MemoryRegionFlags, MemoryRegionType}; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; use crate::metrics::{ METRIC_GUEST_ERROR, METRIC_GUEST_ERROR_LABEL_CODE, maybe_time_and_emit_guest_call, @@ -95,7 +94,6 @@ pub struct MultiUseSandbox { pub(super) host_funcs: Arc>, pub(crate) mem_mgr: SandboxMemoryManager, vm: HyperlightVm, - dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, /// If the current state of the sandbox has been captured in a snapshot, @@ -114,7 +112,6 @@ impl MultiUseSandbox { host_funcs: Arc>, mgr: SandboxMemoryManager, vm: HyperlightVm, - dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> MultiUseSandbox { Self { @@ -123,7 +120,6 @@ impl MultiUseSandbox { host_funcs, mem_mgr: mgr, vm, - dispatch_ptr, #[cfg(gdb)] dbg_mem_access_fn, snapshot: None, @@ -178,12 +174,14 @@ impl MultiUseSandbox { .vm .get_snapshot_sregs() .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; + let entrypoint = self.vm.get_entrypoint(); let memory_snapshot = self.mem_mgr.snapshot( self.id, mapped_regions_vec, root_pt_gpa, stack_top_gpa, sregs, + entrypoint, )?; let snapshot = Arc::new(memory_snapshot); self.snapshot = Some(snapshot.clone()); @@ -319,6 +317,7 @@ impl MultiUseSandbox { })?; self.vm.set_stack_top(snapshot.stack_top_gva()); + self.vm.set_entrypoint(snapshot.entrypoint()); let current_regions: HashSet<_> = self.vm.get_mapped_regions().cloned().collect(); let snapshot_regions: HashSet<_> = snapshot.regions().iter().cloned().collect(); @@ -653,7 +652,6 @@ impl MultiUseSandbox { self.mem_mgr.write_guest_function_call(buffer)?; let dispatch_res = self.vm.dispatch_call_from_host( - self.dispatch_ptr.clone(), &mut self.mem_mgr, &self.host_funcs, #[cfg(gdb)] diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index d79bd23bd..76ec53cee 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -298,7 +298,7 @@ mod tests { let guest_log_data_buffer: Vec = log_msg.try_into().unwrap(); let offset = mgr.layout.get_output_data_buffer_scratch_host_offset(); - mgr.get_scratch_mem_mut() + mgr.scratch_mem .push_buffer( offset, sandbox_cfg.get_output_data_size(), @@ -335,7 +335,7 @@ mod tests { let log_data = new_guest_log_data(level); let guest_log_data_buffer: Vec = log_data.clone().try_into().unwrap(); - mgr.get_scratch_mem_mut() + mgr.scratch_mem .push_buffer( layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), @@ -417,7 +417,7 @@ mod tests { subscriber.clear(); let guest_log_data_buffer: Vec = log_data.try_into().unwrap(); - mgr.get_scratch_mem_mut() + mgr.scratch_mem .push_buffer( layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index 9e6622199..ff33ab26b 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -33,6 +33,29 @@ use crate::sandbox::uninitialized::{GuestBinary, GuestEnvironment}; pub(super) static SANDBOX_CONFIGURATION_COUNTER: AtomicU64 = AtomicU64::new(0); +/// Presently, a snapshot can be of a preinitialised sandbox, which +/// still needs an initialise function called in order to determine +/// how to call into it, or of an already-properly-initialised sandbox +/// which can be immediately called into. This keeps track of the +/// difference. +/// +/// TODO: this should not necessarily be around in the long term: +/// ideally we would just preinitialise earlier in the snapshot +/// creation process and never need this. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum NextAction { + /// A sandbox in the preinitialise state still needs to be + /// initialised by calling the initialise function + Initialise(u64), + /// A sandbox in the ready state can immediately be called into, + /// using the dispatch function pointer. + Call(u64), + /// Only when compiling for tests: a sandbox that cannot actually + /// be used + #[cfg(test)] + None, +} + /// A wrapper around a `SharedMemory` reference and a snapshot /// of the memory therein pub struct Snapshot { @@ -79,20 +102,8 @@ pub struct Snapshot { /// root_pt_gpa field is used since page tables are relocated during snapshot. sregs: Option, - /// Preinitialisation entry point for snapshots created directly from a - /// guest binary. - /// - /// When creating a snapshot directly from a guest binary, this tracks - /// the address that we need to call into before actually using a - /// sandbox from this snapshot in order to perform guest-side - /// preinitialisation. - /// - /// Long-term, the intention is to run this preinitialisation eagerly as - /// part of the snapshot creation process so that restored sandboxes can - /// begin executing from their normal entry point without requiring this - /// field. Until that refactoring happens, this remains part of the - /// snapshot format and must be preserved. - preinitialise: Option, + /// The next action that should be performed on this snapshot + entrypoint: NextAction, } impl core::convert::AsRef for Snapshot { fn as_ref(&self) -> &Self { @@ -438,7 +449,7 @@ impl Snapshot { root_pt_gpa: pt_base_gpa as u64, stack_top_gva: exn_stack_top_gva, sregs: None, - preinitialise: Some(load_addr + entrypoint_offset), + entrypoint: NextAction::Initialise(load_addr + entrypoint_offset), }) } @@ -461,6 +472,7 @@ impl Snapshot { root_pt_gpa: u64, stack_top_gva: u64, sregs: CommonSpecialRegisters, + entrypoint: NextAction, ) -> Result { let (new_root_pt_gpa, memory) = shared_mem.with_exclusivity(|snap_e| { scratch_mem.with_exclusivity(|scratch_e| { @@ -512,7 +524,7 @@ impl Snapshot { stack_top_gva, sregs: Some(sregs), root_pt_gpa: new_root_pt_gpa as u64, - preinitialise: None, + entrypoint, }) } @@ -564,8 +576,8 @@ impl Snapshot { self.sregs.as_ref() } - pub(crate) fn preinitialise(&self) -> Option { - self.preinitialise + pub(crate) fn entrypoint(&self) -> NextAction { + self.entrypoint } } @@ -616,7 +628,7 @@ mod tests { snapshot_mem, scratch_mem, 0.into(), - None, + super::NextAction::None, ); let (mgr, _) = mgr.build().unwrap(); (mgr, pt_base as u64) @@ -642,6 +654,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); @@ -673,6 +686,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); assert_eq!(snapshot.mem_size(), size); @@ -695,6 +709,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); @@ -711,6 +726,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index d9f73bfc8..31e3975c9 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -34,7 +34,7 @@ use crate::sandbox::config::DebugInfo; use crate::sandbox::trace::MemTraceInfo; #[cfg(target_os = "linux")] use crate::signal_handlers::setup_signal_handlers; -use crate::{MultiUseSandbox, Result, UninitializedSandbox, new_error}; +use crate::{MultiUseSandbox, Result, UninitializedSandbox}; #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { @@ -77,13 +77,6 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result Result Date: Fri, 6 Feb 2026 06:07:43 +0000 Subject: [PATCH 06/10] Make the snapshot region readonly and enable CoW There are still a few changes needed to be able to ensure that the host doesn't try to write to the snapshot region before it can be mmap'd readonly into the guest. However, the guest no longer tries to write to the snapshot region. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- fuzz/fuzz_targets/host_call.rs | 1 + src/hyperlight_common/src/arch/amd64/vmem.rs | 135 +++++++++++++++--- src/hyperlight_common/src/arch/i686/vmem.rs | 7 +- src/hyperlight_common/src/vmem.rs | 13 +- .../src/arch/amd64/exception/handle.rs | 123 +++++++++++++--- .../src/arch/amd64/init.rs | 8 +- .../src/arch/amd64/machine.rs | 7 +- src/hyperlight_guest_bin/src/paging.rs | 28 ++-- src/hyperlight_host/benches/benchmarks.rs | 4 +- .../src/hypervisor/virtual_machine/mshv.rs | 7 +- .../src/hypervisor/virtual_machine/whp.rs | 1 - src/hyperlight_host/src/mem/memory_region.rs | 2 - src/hyperlight_host/src/mem/mgr.rs | 21 ++- src/hyperlight_host/src/mem/shared_mem.rs | 3 +- .../src/sandbox/initialized_multi_use.rs | 7 +- src/hyperlight_host/src/sandbox/snapshot.rs | 74 ++++++---- .../src/sandbox/uninitialized.rs | 2 +- src/hyperlight_host/tests/common/mod.rs | 23 ++- .../tests/sandbox_host_tests.rs | 6 +- src/tests/rust_guests/simpleguest/src/main.rs | 40 +++++- 20 files changed, 393 insertions(+), 119 deletions(-) diff --git a/fuzz/fuzz_targets/host_call.rs b/fuzz/fuzz_targets/host_call.rs index befd95ff2..b0d37cf1a 100644 --- a/fuzz/fuzz_targets/host_call.rs +++ b/fuzz/fuzz_targets/host_call.rs @@ -35,6 +35,7 @@ fuzz_target!( let mut cfg = SandboxConfiguration::default(); cfg.set_output_data_size(64 * 1024); // 64 KB output buffer cfg.set_input_data_size(64 * 1024); // 64 KB input buffer + cfg.set_scratch_size(512 * 1024); // large scratch region to contain those buffers, any data copies, etc. let u_sbox = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_for_fuzzing_as_string().expect("Guest Binary Missing")), Some(cfg) diff --git a/src/hyperlight_common/src/arch/amd64/vmem.rs b/src/hyperlight_common/src/arch/amd64/vmem.rs index 110598a9a..631617c1c 100644 --- a/src/hyperlight_common/src/arch/amd64/vmem.rs +++ b/src/hyperlight_common/src/arch/amd64/vmem.rs @@ -26,7 +26,8 @@ limitations under the License. //! allocating intermediate tables as needed and setting appropriate flags on leaf PTEs use crate::vmem::{ - BasicMapping, Mapping, MappingKind, TableMovabilityBase, TableOps, TableReadOps, Void, + BasicMapping, CowMapping, Mapping, MappingKind, TableMovabilityBase, TableOps, TableReadOps, + Void, }; // Paging Flags @@ -64,6 +65,11 @@ const PAGE_CACHE_ENABLED: u64 = 0 << 4; // PCD - page cache disable bit not set const PAGE_WRITE_BACK: u64 = 0 << 3; // PWT - page write-through bit not set (write-back caching) const PAGE_PAT_WB: u64 = 0 << 7; // PAT - page attribute table index bit (0 for write-back memory when PCD=0, PWT=0) +// We use various patterns of the available-for-software-use bits to +// represent certain special mappings. +const PTE_AVL_MASK: u64 = 0x0000_0000_0000_0E00; +const PAGE_AVL_COW: u64 = 1 << 9; + /// Returns PAGE_RW if writable is true, 0 otherwise #[inline(always)] const fn page_rw_flag(writable: bool) -> u64 { @@ -417,7 +423,7 @@ unsafe fn map_page< r: MapResponse, ) { let pte = match &mapping.kind { - MappingKind::BasicMapping(bm) => + MappingKind::Basic(bm) => // TODO: Support not readable // NOTE: On x86-64, there is no separate "readable" bit in the page table entry. // This means that pages cannot be made write-only or execute-only without also being readable. @@ -437,6 +443,19 @@ unsafe fn map_page< page_rw_flag(bm.writable) | // R/W - set if writable PAGE_PRESENT // P - this entry is present } + MappingKind::Cow(cm) => { + (mapping.phys_base + (r.vmin - mapping.virt_base)) | + page_nx_flag(cm.executable) | // NX - no execute unless allowed + PAGE_AVL_COW | + PAGE_PAT_WB | // PAT index bit for write-back memory + PAGE_DIRTY_CLEAR | // dirty bit (set by CPU when written) + PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_CACHE_ENABLED | // leave caching enabled + PAGE_WRITE_BACK | // use write-back caching + PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now) + 0 | // R/W - Cow page is never writable + PAGE_PRESENT // P - this entry is present + } }; unsafe { write_entry_updating(op, r.update_parent, r.entry_ptr, pte); @@ -517,7 +536,7 @@ pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>( op: impl core::convert::AsRef + Copy + 'a, address: u64, len: u64, -) -> impl Iterator + 'a { +) -> impl Iterator + 'a { // Undo sign-extension, and mask off any sub-page bits let vmin = (address & ((1u64 << VA_BITS) - 1)) & !(PAGE_SIZE as u64 - 1); let vmax = core::cmp::min(vmin + len, 1u64 << VA_BITS); @@ -540,12 +559,27 @@ pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>( let sgn_bit = r.vmin >> (VA_BITS - 1); let sgn_bits = 0u64.wrapping_sub(sgn_bit) << VA_BITS; let virt_addr = sgn_bits | r.vmin; - let perms = BasicMapping { - readable: true, - writable: (pte & PAGE_RW) != 0, - executable: (pte & PAGE_NX) == 0, + + let executable = (pte & PAGE_NX) == 0; + let avl = pte & PTE_AVL_MASK; + let kind = if avl == PAGE_AVL_COW { + MappingKind::Cow(CowMapping { + readable: true, + executable, + }) + } else { + MappingKind::Basic(BasicMapping { + readable: true, + writable: (pte & PAGE_RW) != 0, + executable, + }) }; - Some((virt_addr, phys_addr, perms)) + Some(Mapping { + phys_base: phys_addr, + virt_base: virt_addr, + len: PAGE_SIZE as u64, + kind, + }) }) } @@ -721,7 +755,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -754,7 +788,7 @@ mod tests { phys_base: 0x2000, virt_base: 0x2000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: false, executable: true, @@ -777,7 +811,7 @@ mod tests { phys_base: 0x10000, virt_base: 0x10000, len: 4 * PAGE_SIZE as u64, // 4 pages = 16KB - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -810,7 +844,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -824,7 +858,7 @@ mod tests { phys_base: 0x5000, virt_base: 0x5000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -849,7 +883,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -860,8 +894,75 @@ mod tests { let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() }; assert!(result.is_some(), "Should find mapped address"); - let pte = result.unwrap(); - assert_eq!(pte.1, 0x1000); + let mapping = result.unwrap(); + assert_eq!(mapping.phys_base, 0x1000); + } + + #[test] + fn test_virt_to_phys_unaligned_virt() { + let ops = MockTableOps::new(); + let mapping = Mapping { + phys_base: 0x1000, + virt_base: 0x1000, + len: PAGE_SIZE as u64, + kind: MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + }; + + unsafe { map(&ops, mapping) }; + + let result = unsafe { virt_to_phys(&ops, 0x1234, 1).next() }; + assert!(result.is_some(), "Should find mapped address"); + let mapping = result.unwrap(); + assert_eq!(mapping.phys_base, 0x1000); + } + + #[test] + fn test_virt_to_phys_perms() { + let test = |kind| { + let ops = MockTableOps::new(); + let mapping = Mapping { + phys_base: 0x1000, + virt_base: 0x1000, + len: PAGE_SIZE as u64, + kind, + }; + unsafe { map(&ops, mapping) }; + let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() }; + let mapping = result.unwrap(); + assert_eq!(mapping.kind, kind); + }; + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: false, + executable: false, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: false, + executable: true, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + })); + test(MappingKind::Cow(CowMapping { + readable: true, + executable: false, + })); + test(MappingKind::Cow(CowMapping { + readable: true, + executable: true, + })); } #[test] @@ -880,7 +981,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, diff --git a/src/hyperlight_common/src/arch/i686/vmem.rs b/src/hyperlight_common/src/arch/i686/vmem.rs index 83c749975..778b557a5 100644 --- a/src/hyperlight_common/src/arch/i686/vmem.rs +++ b/src/hyperlight_common/src/arch/i686/vmem.rs @@ -17,7 +17,7 @@ limitations under the License. // This file is just dummy definitions at the moment, in order to // allow compiling the guest for real mode boot scenarios. -use crate::vmem::{BasicMapping, Mapping, TableOps, TableReadOps, Void}; +use crate::vmem::{Mapping, TableOps, TableReadOps, Void}; pub const PAGE_SIZE: usize = 4096; pub const PAGE_TABLE_SIZE: usize = 4096; @@ -31,10 +31,7 @@ pub unsafe fn map(_op: &Op, _mapping: Mapping) { } #[allow(clippy::missing_safety_doc)] -pub unsafe fn virt_to_phys( - _op: &Op, - _address: u64, -) -> impl Iterator { +pub unsafe fn virt_to_phys(_op: &Op, _address: u64) -> impl Iterator { panic!( "vmem::virt_to_phys: i686 guests do not support booting the full hyperlight guest kernel" ); diff --git a/src/hyperlight_common/src/vmem.rs b/src/hyperlight_common/src/vmem.rs index 6ddf172dd..c206e50f1 100644 --- a/src/hyperlight_common/src/vmem.rs +++ b/src/hyperlight_common/src/vmem.rs @@ -180,16 +180,23 @@ pub trait TableOps: TableReadOps { ); } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct BasicMapping { pub readable: bool, pub writable: bool, pub executable: bool, } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct CowMapping { + pub readable: bool, + pub executable: bool, +} + +#[derive(Debug, PartialEq, Clone, Copy)] pub enum MappingKind { - BasicMapping(BasicMapping), + Basic(BasicMapping), + Cow(CowMapping), /* TODO: What useful things other than basic mappings actually * require touching the tables? */ } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs index 91fad866a..8db2ee18a 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs @@ -17,6 +17,9 @@ limitations under the License. use core::fmt::Write; use hyperlight_common::outb::Exception; +use hyperlight_common::vmem::{ + BasicMapping, CowMapping, MappingKind, PAGE_SIZE, PhysAddr, VirtAddr, +}; use hyperlight_guest::exit::write_abort; use hyperlight_guest::layout::{MAIN_STACK_LIMIT_GVA, MAIN_STACK_TOP_GVA}; @@ -57,6 +60,99 @@ pub type ExceptionHandler = fn( page_fault_address: u64, ) -> bool; +fn handle_stack_pagefault(gva: u64) { + // TODO: perhaps we should have a sanity check that the + // stack grows only one page at a time, which should be + // ensured by our stack probing discipline? + unsafe { + let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); + crate::paging::map_region( + new_page, + (gva & !0xfff) as *mut u8, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + ); + } +} + +fn handle_cow_pagefault(_phys: PhysAddr, virt: VirtAddr, perms: CowMapping) { + unsafe { + let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); + let target_virt = virt as *mut u8; + core::ptr::copy( + target_virt, + crate::paging::phys_to_virt(new_page).unwrap(), + PAGE_SIZE, + ); + // todo(multithreading): when we have multiple threads, we + // will likely need to (at least in some situations) do a + // break-before-make sequence here to avoid any possible + // issues with incoherent TLBs. + crate::paging::map_region( + new_page, + target_virt, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + // Inherit R bit from the original mapping (always 1 at the moment) + readable: perms.readable, + // If we got here, the original marking was marked + // CoW, so the copied mapping should always be + // writable + writable: true, + executable: perms.executable, + }), + ); + core::arch::asm!("invlpg [{}]", in(reg) target_virt, options(readonly, nostack, preserves_flags)); + } +} + +fn try_handle_internal_pagefault( + exn_info: *mut ExceptionInfo, + _ctx: *mut Context, + gva: u64, +) -> bool { + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; + let present = (error_code & (1 << 0)) != 0; // bit 0 is P + if !present { + // If the fault was caused by a not-present page, check if we + // should populate it with a stack page + if (MAIN_STACK_LIMIT_GVA..MAIN_STACK_TOP_GVA).contains(&gva) { + handle_stack_pagefault(gva); + return true; + } + return false; + } + let mut orig_mappings = crate::paging::virt_to_phys(gva); + + let fault_was_rsvd_entry = (error_code & (1 << 3)) != 0; + if fault_was_rsvd_entry { + // We don't expect this to ever happen + return false; + } + let access_was_write = (error_code & (1 << 1)) != 0; + let access_was_user = (error_code & (1 << 2)) != 0; + let access_was_insn = (error_code & (1 << 4)) != 0; + if access_was_write && !access_was_user && !access_was_insn { + // The fault was probably caused by a lack of write + // permission. Check if that's because the page needs to be + // CoW'd + if let Some(mapping) = orig_mappings.next() + && let None = orig_mappings.next() + && let MappingKind::Cow(cm) = mapping.kind + { + handle_cow_pagefault(mapping.phys_base, mapping.virt_base, cm); + return true; + } + + return false; + }; + false +} + /// Internal exception handler invoked by the low-level exception entry code. /// /// This function is called from assembly when an exception occurs. It checks for @@ -67,32 +163,14 @@ pub(crate) extern "C" fn hl_exception_handler( exception_number: u64, page_fault_address: u64, ) { - // TODO: is this always needed? surely only needed if CoW - crate::paging::flush_tlb(); - let ctx = stack_pointer as *mut Context; let exn_info = (stack_pointer + size_of::() as u64) as *mut ExceptionInfo; let exception = Exception::try_from(exception_number as u8).expect("Invalid exception number"); - let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; - let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; - - if exception_number == 14 - && (MAIN_STACK_LIMIT_GVA..MAIN_STACK_TOP_GVA).contains(&page_fault_address) - { - // TODO: perhaps we should have a sanity check that the - // stack grows only one page at a time, which should be - // ensured by our stack probing discipline? - unsafe { - let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); - crate::paging::map_region( - new_page, - (page_fault_address & !0xfff) as *mut u8, - hyperlight_common::vmem::PAGE_SIZE as u64, - ); - return; - } + // Check if it is a page fault that needs to be handled for normal Hyperlight operation + if exception_number == 14 && try_handle_internal_pagefault(exn_info, ctx, page_fault_address) { + return; } // Check for registered user handlers (only for architecture-defined vectors 0-30) @@ -110,6 +188,9 @@ pub(crate) extern "C" fn hl_exception_handler( } } + // Otherwise, abort due to unexpected exception + let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; let bytes_at_rip = unsafe { (saved_rip as *const [u8; 8]).read_volatile() }; // begin abort sequence by writing the error code diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 61e4da900..8c20eb655 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -110,10 +110,16 @@ unsafe fn init_stack() -> u64 { use hyperlight_guest::layout::MAIN_STACK_TOP_GVA; let stack_top_page_base = (MAIN_STACK_TOP_GVA - 1) & !0xfff; unsafe { + use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; crate::paging::map_region( hyperlight_guest::prim_alloc::alloc_phys_pages(1), stack_top_page_base as *mut u8, - hyperlight_common::vmem::PAGE_SIZE as u64, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), ); } MAIN_STACK_TOP_GVA diff --git a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs index 79bc7cab8..e9a164807 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs @@ -16,7 +16,7 @@ limitations under the License. use core::mem; -use hyperlight_common::vmem::PAGE_SIZE; +use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; use super::layout::PROC_CONTROL_GVA; @@ -223,6 +223,11 @@ impl ProcCtrl { hyperlight_guest::prim_alloc::alloc_phys_pages(2), ptr, PAGE_SIZE as u64 * 2, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), ); let ptr = ptr as *mut Self; (&raw mut (*ptr).gdt).write_bytes(0u8, 1); diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index 15721a825..a13bb1832 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -45,15 +45,19 @@ impl GuestMappingOperations { scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(), } } - fn phys_to_virt(&self, addr: u64) -> *mut u8 { + fn fallible_phys_to_virt(&self, addr: u64) -> Option<*mut u8> { if addr >= self.scratch_base_gpa { - (self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8 + Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8) } else if addr >= self.snapshot_pt_base_gpa { - (self.snapshot_pt_base_gva + (addr - self.snapshot_pt_base_gpa)) as *mut u8 + Some((self.snapshot_pt_base_gva + (addr - self.snapshot_pt_base_gpa)) as *mut u8) } else { - panic!("phys_to_virt encountered snapshot non-PT page") + None } } + fn phys_to_virt(&self, addr: u64) -> *mut u8 { + self.fallible_phys_to_virt(addr) + .expect("phys_to_virt encountered snapshot non-PT page") + } } // for virt_to_phys impl core::convert::AsRef for GuestMappingOperations { @@ -137,7 +141,7 @@ impl vmem::TableOps for GuestMappingOperations { /// as such do not use concurrently with any other page table operations /// - TLB invalidation is not performed, /// if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards. -pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { +pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64, kind: vmem::MappingKind) { unsafe { vmem::map( &GuestMappingOperations::new(), @@ -145,22 +149,20 @@ pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { phys_base, virt_base: virt_base as u64, len, - kind: vmem::MappingKind::BasicMapping(vmem::BasicMapping { - readable: true, - writable: true, - executable: true, - }), + kind, }, ); } } -pub fn virt_to_phys( - gva: u64, -) -> impl Iterator { +pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator { unsafe { vmem::virt_to_phys::<_>(GuestMappingOperations::new(), gva, 1) } } +pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> { + GuestMappingOperations::new().fallible_phys_to_virt(gpa) +} + pub fn flush_tlb() { // Currently this just always flips CR4.PGE back and forth to // trigger a tlb flush. We should use a faster approach where diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index ca104f9d3..55aed01a6 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -385,8 +385,8 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) { let mut config = SandboxConfiguration::default(); config.set_input_data_size(2 * SIZE + (1024 * 1024)); // 2 * SIZE + 1 MB, to allow 1MB for the rest of the serialized function call - config.set_scratch_size(2 * SIZE + 2 * (1024 * 1024)); // 2 * SIZE + 2 MB, to allow 1MB for data outside of the I/O regions config.set_heap_size(SIZE as u64 * 15); + config.set_scratch_size(6 * SIZE + 4 * (1024 * 1024)); // Big enough for the IO data regions and enough of the heap to be used let sandbox = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), @@ -398,7 +398,7 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) { b.iter(|| { sandbox .call::<()>("LargeParameters", (large_vec.clone(), large_string.clone())) - .unwrap() + .unwrap(); }); }); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 74d7834ee..b681acae3 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -359,9 +359,12 @@ impl VirtualMachine for MshvVm { #[cfg(gdb)] impl DebuggableVm for MshvVm { fn translate_gva(&self, gva: u64) -> std::result::Result { - use mshv_bindings::{HV_TRANSLATE_GVA_VALIDATE_READ, HV_TRANSLATE_GVA_VALIDATE_WRITE}; + use mshv_bindings::HV_TRANSLATE_GVA_VALIDATE_READ; - let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; + // Do not use HV_TRANSLATE_GVA_VALIDATE_WRITE, since many + // things that are interesting to debug are not in fact + // writable from the guest's point of view. + let flags = HV_TRANSLATE_GVA_VALIDATE_READ as u64; let (addr, _) = self .vcpu_fd .translate_gva(gva, flags) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 94f415b7e..c22e97ba3 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -156,7 +156,6 @@ impl VirtualMachine for WhpVm { MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), - MemoryRegionFlags::STACK_GUARD => Ok(WHvMapGpaRangeFlagNone), _ => Err(MapMemoryError::InvalidFlags(format!( "Invalid memory region flag: {:?}", flag diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 9ddaa894a..71237e4ac 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -48,8 +48,6 @@ bitflags! { const WRITE = 2; /// allow guest to execute const EXECUTE = 4; - /// identifier that this is a stack guard page - const STACK_GUARD = 8; } } diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index d0cb775ad..674a47cee 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -427,7 +427,7 @@ impl SandboxMemoryManager { #[cfg(test)] #[cfg(all(feature = "init-paging", target_arch = "x86_64"))] mod tests { - use hyperlight_common::vmem::PAGE_TABLE_SIZE; + use hyperlight_common::vmem::{MappingKind, PAGE_TABLE_SIZE}; use hyperlight_testing::sandbox_sizes::{LARGE_HEAP_SIZE, MEDIUM_HEAP_SIZE, SMALL_HEAP_SIZE}; use hyperlight_testing::simple_guest_as_string; @@ -470,15 +470,22 @@ mod tests { // Verify identity mapping (phys == virt for low memory) assert_eq!( - mapping.1, addr, + mapping.phys_base, addr, "{}: {:?} region: address 0x{:x} should identity map, got phys 0x{:x}", - name, region.region_type, addr, mapping.1 + name, region.region_type, addr, mapping.phys_base ); + // Verify kind is Basic + let MappingKind::Basic(bm) = mapping.kind else { + panic!( + "{}: {:?} region: address 0x{:x} should be kind basic, got {:?}", + name, region.region_type, addr, mapping.kind + ); + }; + // Verify writable - let actual = mapping.2.writable; - let expected = region.flags.contains(MemoryRegionFlags::WRITE) - || region.flags.contains(MemoryRegionFlags::STACK_GUARD); + let actual = bm.writable; + let expected = region.flags.contains(MemoryRegionFlags::WRITE); assert_eq!( actual, expected, "{}: {:?} region: address 0x{:x} has writable {}, expected {} (region flags: {:?})", @@ -486,7 +493,7 @@ mod tests { ); // Verify executable - let actual = mapping.2.executable; + let actual = bm.executable; let expected = region.flags.contains(MemoryRegionFlags::EXECUTE); assert_eq!( actual, expected, diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index ecf0f66a6..ae9545e59 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -676,9 +676,10 @@ impl GuestSharedMemory { region_type: MemoryRegionType, ) -> MemoryRegion { let flags = match region_type { - MemoryRegionType::Scratch | MemoryRegionType::Snapshot => { + MemoryRegionType::Scratch => { MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE } + MemoryRegionType::Snapshot => MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE, #[allow(clippy::panic)] // In the future, all the host side knowledge about memory // region types should collapse down to Snapshot vs diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index c31fcda45..e05f70937 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -531,11 +531,6 @@ impl MultiUseSandbox { if self.poisoned { return Err(crate::HyperlightError::PoisonedSandbox); } - if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { - // Stack guard pages are an internal implementation detail - // (which really should be moved into the guest) - log_then_return!("Cannot map host memory as a stack guard page"); - } if rgn.flags.contains(MemoryRegionFlags::WRITE) { // TODO: Implement support for writable mappings, which // need to be registered with the memory manager so that @@ -1040,7 +1035,7 @@ mod tests { cfg.get_input_data_size(), cfg.get_output_data_size(), ); - cfg.set_scratch_size(min_scratch + 0x5000); + cfg.set_scratch_size(min_scratch + 0x10000); let mut sbox1: MultiUseSandbox = { let path = simple_guest_as_string().unwrap(); diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index ff33ab26b..af6b42ef1 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -17,7 +17,7 @@ limitations under the License. use std::sync::atomic::{AtomicU64, Ordering}; use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; -use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; +use hyperlight_common::vmem::{self, BasicMapping, CowMapping, Mapping, MappingKind, PAGE_SIZE}; use tracing::{Span, instrument}; use crate::HyperlightError::MemoryRegionSizeMismatch; @@ -254,25 +254,26 @@ fn filtered_mappings<'a>( regions: &[MemoryRegion], scratch_size: usize, root_pt: u64, -) -> Vec<(u64, u64, BasicMapping, &'a [u8])> { +) -> Vec<(Mapping, &'a [u8])> { let op = SharedMemoryPageTableBuffer::new(snap, scratch, scratch_size, root_pt); unsafe { hyperlight_common::vmem::virt_to_phys(&op, 0, hyperlight_common::layout::MAX_GVA as u64) } - .filter_map(move |(gva, gpa, bm)| { + .filter_map(move |mapping| { // the scratch map doesn't count - if gva >= scratch_base_gva(scratch_size) { + if mapping.virt_base >= scratch_base_gva(scratch_size) { return None; } // neither does the mapping of the snapshot's own page tables - if gva >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 - && gva <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 + if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 + && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 { return None; } // todo: is it useful to warn if we can't resolve this? - let contents = unsafe { guest_page(snap, scratch, regions, scratch_size, gpa) }?; - Some((gva, gpa, bm, contents)) + let contents = + unsafe { guest_page(snap, scratch, regions, scratch_size, mapping.phys_base) }?; + Some((mapping, contents)) }) .collect() } @@ -319,10 +320,12 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { phys_base: scratch_base_gpa(scratch_size), virt_base: scratch_base_gva(scratch_size), len: scratch_size as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, - executable: true, + // assume that the guest will map these pages elsewhere if + // it actually needs to execute from them + executable: false, }), }; unsafe { vmem::map(pt_buf, mapping) }; @@ -334,9 +337,9 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { phys_base: (pt_buf.phys_base() + pt_size_mapped) as u64, virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, - writable: true, + writable: false, executable: false, }), }; @@ -399,24 +402,25 @@ impl Snapshot { // 1. Map the (ideally readonly) pages of snapshot data for rgn in layout.get_memory_regions_::(())?.iter() { let readable = rgn.flags.contains(MemoryRegionFlags::READ); - let writable = rgn.flags.contains(MemoryRegionFlags::WRITE) - // Temporary hack: the stack guard page is - // currently checked for in the host, rather than - // the guest, so we need to mark it writable in - // the Stage 1 translation so that the fault - // exception on a write is taken to the - // hypervisor, rather than the guest kernel - || rgn.flags.contains(MemoryRegionFlags::STACK_GUARD); let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); + let kind = if writable { + MappingKind::Cow(CowMapping { + readable, + executable, + }) + } else { + MappingKind::Basic(BasicMapping { + readable, + writable: false, + executable, + }) + }; let mapping = Mapping { phys_base: rgn.guest_region.start as u64, virt_base: rgn.guest_region.start as u64, len: rgn.guest_region.len() as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable, - writable, - executable, - }), + kind, }; unsafe { vmem::map(&pt_buf, mapping) }; } @@ -487,15 +491,27 @@ impl Snapshot { let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + live_pages.len() * PAGE_SIZE; let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); let mut snapshot_memory: Vec = Vec::new(); - for (gva, _, bm, contents) in live_pages { + for (mapping, contents) in live_pages { let new_offset = snapshot_memory.len(); snapshot_memory.extend(contents); let new_gpa = new_offset + SandboxMemoryLayout::BASE_ADDRESS; + let kind = match mapping.kind { + MappingKind::Cow(cm) => MappingKind::Cow(cm), + MappingKind::Basic(bm) if bm.writable => MappingKind::Cow(CowMapping { + readable: bm.readable, + executable: bm.executable, + }), + MappingKind::Basic(bm) => MappingKind::Basic(BasicMapping { + readable: bm.readable, + writable: false, + executable: bm.executable, + }), + }; let mapping = Mapping { phys_base: new_gpa as u64, - virt_base: gva, + virt_base: mapping.virt_base, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(bm), + kind, }; unsafe { vmem::map(&pt_buf, mapping) }; } @@ -610,7 +626,7 @@ mod tests { phys_base: SandboxMemoryLayout::BASE_ADDRESS as u64, virt_base: SandboxMemoryLayout::BASE_ADDRESS as u64, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: true, diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 803891bef..723f8f8f4 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -1094,7 +1094,7 @@ mod tests { // Test 3: Create snapshot with custom scratch size { let mut cfg = SandboxConfiguration::default(); - cfg.set_scratch_size(128 * 1024); // 128KB scratch + cfg.set_scratch_size(144 * 1024); // 144KB scratch let env = GuestEnvironment::new(GuestBinary::FilePath(binary_path.clone()), None); diff --git a/src/hyperlight_host/tests/common/mod.rs b/src/hyperlight_host/tests/common/mod.rs index ab58237b8..aeee8c658 100644 --- a/src/hyperlight_host/tests/common/mod.rs +++ b/src/hyperlight_host/tests/common/mod.rs @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ use hyperlight_host::func::HostFunction; +use hyperlight_host::sandbox::SandboxConfiguration; #[cfg(gdb)] use hyperlight_host::sandbox::config::DebugInfo; use hyperlight_host::{GuestBinary, MultiUseSandbox, Result, UninitializedSandbox}; @@ -33,7 +34,6 @@ pub fn new_uninit() -> Result { pub fn new_uninit_rust() -> Result { #[cfg(gdb)] { - use hyperlight_host::sandbox::SandboxConfiguration; let mut cfg = SandboxConfiguration::default(); let debug_info = DebugInfo { port: 8080 }; cfg.set_guest_debug_info(debug_info); @@ -59,6 +59,27 @@ pub fn new_uninit_c() -> Result { ) } +pub fn get_simpleguest_sandboxes_with_cfg( + writer: Option>, // An optional writer to make sure correct info is passed to the host printer + cfg: SandboxConfiguration, +) -> Vec { + let elf_path = get_c_or_rust_simpleguest_path(); + + let sandboxes = [ + // in hypervisor elf + UninitializedSandbox::new(GuestBinary::FilePath(elf_path.clone()), Some(cfg)).unwrap(), + ]; + + sandboxes + .into_iter() + .map(|mut sandbox| { + if let Some(writer) = writer.clone() { + sandbox.register_print(writer).unwrap(); + } + sandbox.evolve().unwrap() + }) + .collect() +} pub fn get_simpleguest_sandboxes( writer: Option>, // An optional writer to make sure correct info is passed to the host printer ) -> Vec { diff --git a/src/hyperlight_host/tests/sandbox_host_tests.rs b/src/hyperlight_host/tests/sandbox_host_tests.rs index e98faac88..f392fba14 100644 --- a/src/hyperlight_host/tests/sandbox_host_tests.rs +++ b/src/hyperlight_host/tests/sandbox_host_tests.rs @@ -26,7 +26,7 @@ use hyperlight_host::{ use hyperlight_testing::simple_guest_as_string; pub mod common; // pub to disable dead_code warning -use crate::common::get_simpleguest_sandboxes; +use crate::common::{get_simpleguest_sandboxes, get_simpleguest_sandboxes_with_cfg}; #[test] fn pass_byte_array() { @@ -121,7 +121,9 @@ fn invalid_guest_function_name() { #[test] fn set_static() { - for mut sandbox in get_simpleguest_sandboxes(None).into_iter() { + let mut cfg: SandboxConfiguration = Default::default(); + cfg.set_scratch_size(0x100A000); + for mut sandbox in get_simpleguest_sandboxes_with_cfg(None, cfg).into_iter() { let fn_name = "SetStatic"; let res = sandbox.call::(fn_name, ()); println!("{:?}", res); diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 09f55212a..296ace27c 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -41,6 +41,7 @@ use hyperlight_common::flatbuffer_wrappers::function_types::{ use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::flatbuffer_wrappers::guest_log_level::LogLevel; use hyperlight_common::flatbuffer_wrappers::util::get_flatbuffer_result; +use hyperlight_common::vmem::{BasicMapping, MappingKind}; use hyperlight_guest::error::{HyperlightGuestError, Result}; use hyperlight_guest::exit::{abort_with_code, abort_with_code_and_message}; use hyperlight_guest_bin::exception::arch::{Context, ExceptionInfo}; @@ -582,8 +583,17 @@ fn read_mapped_buffer(base: u64, len: u64, do_map: bool) -> Vec { if do_map { unsafe { - hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) - }; + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ); + } } let data = unsafe { core::slice::from_raw_parts(base, len) }; @@ -603,7 +613,18 @@ fn write_mapped_buffer(base: u64, len: u64) -> bool { let base = base as usize as *mut u8; let len = len as usize; - unsafe { hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) }; + unsafe { + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ) + }; let data = unsafe { core::slice::from_raw_parts_mut(base, len) }; @@ -619,7 +640,18 @@ fn exec_mapped_buffer(base: u64, len: u64) -> bool { let base = base as usize as *mut u8; let len = len as usize; - unsafe { hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) }; + unsafe { + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ) + }; let data = unsafe { core::slice::from_raw_parts(base, len) }; From 8d0da513df621d890c8c4ec637bb47d0d80f6eb6 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:08:55 +0000 Subject: [PATCH 07/10] amd64: Remove unused access/dirty flag updates Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_common/src/arch/amd64/vmem.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hyperlight_common/src/arch/amd64/vmem.rs b/src/hyperlight_common/src/arch/amd64/vmem.rs index 631617c1c..1c64e8ca9 100644 --- a/src/hyperlight_common/src/arch/amd64/vmem.rs +++ b/src/hyperlight_common/src/arch/amd64/vmem.rs @@ -59,8 +59,8 @@ const PAGE_NX: u64 = 1 << 63; /// This masks out the lower 12 flag bits AND the upper bits including NX (bit 63) const PTE_ADDR_MASK: u64 = 0x000F_FFFF_FFFF_F000; const PAGE_USER_ACCESS_DISABLED: u64 = 0 << 2; // U/S bit not set - supervisor mode only (no code runs in user mode for now) -const PAGE_DIRTY_CLEAR: u64 = 0 << 6; // D - dirty bit cleared (set by CPU when written) -const PAGE_ACCESSED_CLEAR: u64 = 0 << 5; // A - accessed bit cleared (set by CPU when accessed) +const PAGE_DIRTY_SET: u64 = 1 << 6; // D - dirty bit +const PAGE_ACCESSED_SET: u64 = 1 << 5; // A - accessed bit const PAGE_CACHE_ENABLED: u64 = 0 << 4; // PCD - page cache disable bit not set (caching enabled) const PAGE_WRITE_BACK: u64 = 0 << 3; // PWT - page write-through bit not set (write-back caching) const PAGE_PAT_WB: u64 = 0 << 7; // PAT - page attribute table index bit (0 for write-back memory when PCD=0, PWT=0) @@ -107,7 +107,7 @@ fn bits(x: u64) -> u64 { #[allow(clippy::precedence)] fn pte_for_table(table_addr: Op::TableAddr) -> u64 { Op::to_phys(table_addr) | - PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag PAGE_CACHE_ENABLED | // leave caching enabled PAGE_WRITE_BACK | // use write-back caching PAGE_USER_ACCESS_DISABLED |// dont allow user access (no code runs in user mode for now) @@ -435,8 +435,8 @@ unsafe fn map_page< (mapping.phys_base + (r.vmin - mapping.virt_base)) | page_nx_flag(bm.executable) | // NX - no execute unless allowed PAGE_PAT_WB | // PAT index bit for write-back memory - PAGE_DIRTY_CLEAR | // dirty bit (set by CPU when written) - PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag PAGE_CACHE_ENABLED | // leave caching enabled PAGE_WRITE_BACK | // use write-back caching PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now) @@ -448,8 +448,8 @@ unsafe fn map_page< page_nx_flag(cm.executable) | // NX - no execute unless allowed PAGE_AVL_COW | PAGE_PAT_WB | // PAT index bit for write-back memory - PAGE_DIRTY_CLEAR | // dirty bit (set by CPU when written) - PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag PAGE_CACHE_ENABLED | // leave caching enabled PAGE_WRITE_BACK | // use write-back caching PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now) From 68deb0d9b6c53cd9cc35d427cfd09f3657086705 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:12:19 +0000 Subject: [PATCH 08/10] Eagerly copy page tables to the snapshot region This is necessary for now on amd64, where processor page table walks of the guest page tables are treated as writes w.r.t. to the second-level page tables, meaning that no guest page table can ever be in the (mapped readonly at stage 2) snapshot region. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_guest_bin/src/paging.rs | 30 ++----- src/hyperlight_host/benches/benchmarks.rs | 2 + .../src/hypervisor/hyperlight_vm.rs | 85 ++++++------------- src/hyperlight_host/src/mem/layout.rs | 80 ++++++++--------- src/hyperlight_host/src/mem/memory_region.rs | 2 - src/hyperlight_host/src/mem/mgr.rs | 46 ++++------ .../src/sandbox/initialized_multi_use.rs | 7 +- src/hyperlight_host/src/sandbox/snapshot.rs | 56 +++--------- .../src/sandbox/uninitialized.rs | 2 +- .../src/sandbox/uninitialized_evolve.rs | 12 +-- 10 files changed, 111 insertions(+), 211 deletions(-) diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index a13bb1832..5d3a38a00 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -29,18 +29,12 @@ use hyperlight_guest::prim_alloc::alloc_phys_pages; #[derive(Copy, Clone)] struct GuestMappingOperations { - snapshot_pt_base_gpa: u64, - snapshot_pt_base_gva: u64, scratch_base_gpa: u64, scratch_base_gva: u64, } impl GuestMappingOperations { fn new() -> Self { Self { - snapshot_pt_base_gpa: unsafe { - hyperlight_guest::layout::snapshot_pt_gpa_base_gva().read_volatile() - }, - snapshot_pt_base_gva: hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64, scratch_base_gpa: hyperlight_guest::layout::scratch_base_gpa(), scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(), } @@ -48,8 +42,6 @@ impl GuestMappingOperations { fn fallible_phys_to_virt(&self, addr: u64) -> Option<*mut u8> { if addr >= self.scratch_base_gpa { Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8) - } else if addr >= self.snapshot_pt_base_gpa { - Some((self.snapshot_pt_base_gva + (addr - self.snapshot_pt_base_gpa)) as *mut u8) } else { None } @@ -94,6 +86,11 @@ impl vmem::TableReadOps for GuestMappingOperations { } impl vmem::TableOps for GuestMappingOperations { + // Currently, we don't actually move tables anywhere on amd64 + // because of issues with guest PTs in IPAs that are mapped + // readonly in Stage 2 translation. However, this code all works + // and will re-enabled as soon as there is improved + // architecture/hypervisor support. type TableMovability = vmem::MayMoveTable; unsafe fn alloc_table(&self) -> u64 { let page_addr = unsafe { alloc_phys_pages(1) }; @@ -104,26 +101,11 @@ impl vmem::TableOps for GuestMappingOperations { page_addr } unsafe fn write_entry(&self, addr: u64, entry: u64) -> Option { - let mut addr = addr; - let mut ret = None; - if addr >= self.snapshot_pt_base_gpa && addr < self.scratch_base_gpa { - // This needs to be CoW'd over to the scratch region - unsafe { - let new_table = alloc_phys_pages(1); - core::ptr::copy( - self.phys_to_virt(addr & !0xfff), - self.phys_to_virt(new_table), - vmem::PAGE_TABLE_SIZE, - ); - addr = new_table | (addr & 0xfff); - ret = Some(new_table); - } - } let addr = self.phys_to_virt(addr); unsafe { asm!("mov qword ptr [{}], {}", in(reg) addr, in(reg) entry); } - ret + None } unsafe fn update_root(&self, new_root: u64) { unsafe { diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 55aed01a6..592d6042d 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -62,11 +62,13 @@ impl SandboxSize { Self::Medium => { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(MEDIUM_HEAP_SIZE); + cfg.set_scratch_size(0x50000); Some(cfg) } Self::Large => { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(LARGE_HEAP_SIZE); + cfg.set_scratch_size(0x100000); Some(cfg) } } diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index ba2cb8eb6..7206cc847 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -928,7 +928,7 @@ impl HyperlightVm { /// - Special registers (restored from snapshot, with CR3 updated to new page table location) // TODO: check if other state needs to be reset pub(crate) fn reset_vcpu( - &self, + &mut self, cr3: u64, sregs: &CommonSpecialRegisters, ) -> std::result::Result<(), RegisterError> { @@ -1492,7 +1492,6 @@ mod tests { use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; use crate::mem::ptr::RawPtr; - use crate::mem::ptr_offset::Offset; use crate::mem::shared_mem::ExclusiveSharedMemory; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; @@ -2041,11 +2040,10 @@ mod tests { #[cfg(any(crashdump, gdb))] let rt_cfg: SandboxRuntimeConfig = Default::default(); - let mut layout = - SandboxMemoryLayout::new(config, code.len(), 4096, 4096, 0x3000, None).unwrap(); + let mut layout = SandboxMemoryLayout::new(config, code.len(), 4096, None).unwrap(); - let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + layout.get_pt_offset(); - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_base_gpa = layout.get_pt_base_gpa(); + let pt_buf = GuestPageTableBuffer::new(pt_base_gpa as usize); for rgn in layout .get_memory_regions_::(()) @@ -2053,14 +2051,13 @@ mod tests { .iter() { let readable = rgn.flags.contains(MemoryRegionFlags::READ); - let writable = rgn.flags.contains(MemoryRegionFlags::WRITE) - || rgn.flags.contains(MemoryRegionFlags::STACK_GUARD); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); let mapping = Mapping { phys_base: rgn.guest_region.start as u64, virt_base: rgn.guest_region.start as u64, len: rgn.guest_region.len() as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable, writable, executable, @@ -2069,22 +2066,6 @@ mod tests { unsafe { vmem::map(&pt_buf, mapping) }; } - let mut pt_size_mapped = 0; - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_base_gpa + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable: true, - writable: true, - executable: false, - }), - }; - unsafe { vmem::map(&pt_buf, mapping) }; - pt_size_mapped = pt_buf.size(); - } - // Map the scratch region at the top of the address space let scratch_size = config.get_scratch_size(); let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); @@ -2093,7 +2074,7 @@ mod tests { phys_base: scratch_gpa, virt_base: scratch_gva, len: scratch_size as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: true, // Match regular codepath (map_specials) @@ -2101,40 +2082,22 @@ mod tests { }; unsafe { vmem::map(&pt_buf, scratch_mapping) }; - // Re-map page tables if they grew from scratch mapping - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_base_gpa + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable: true, - writable: true, - executable: false, - }), - }; - unsafe { vmem::map(&pt_buf, mapping) }; - pt_size_mapped = pt_buf.size(); - } - let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len()).unwrap(); let mem_size = layout.get_memory_size().unwrap(); let mut eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); - eshm.copy_from_slice(&pt_bytes, layout.get_pt_offset()) - .unwrap(); + let snapshot_pt_start = mem_size - layout.get_pt_size(); + eshm.copy_from_slice(&pt_bytes, snapshot_pt_start).unwrap(); eshm.copy_from_slice(code, layout.get_guest_code_offset()) .unwrap(); - let load_addr = RawPtr::from(layout.get_guest_code_address() as u64); let scratch_mem = ExclusiveSharedMemory::new(config.get_scratch_size()).unwrap(); let mut mem_mgr = SandboxMemoryManager::new( layout, eshm, scratch_mem, - load_addr, - Some(Offset::from(0u64)), + NextAction::Initialise(layout.get_guest_code_address() as u64), ); mem_mgr.write_memory_layout().unwrap(); @@ -2197,7 +2160,7 @@ mod tests { fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes const CODE: [u8; 2] = [0x50, 0xf4]; - let hyperlight_vm = hyperlight_vm(&CODE); + let mut hyperlight_vm = hyperlight_vm(&CODE); let available_hv = *get_available_hypervisor().as_ref().unwrap(); // Get the initial CR3 value before dirtying sregs @@ -2360,7 +2323,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // After run, check registers match expected dirty state let regs = hyperlight_vm.vm.regs().unwrap(); @@ -2462,7 +2425,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // After run, check FPU state matches expected dirty values let fpu = hyperlight_vm.vm.fpu().unwrap(); @@ -2547,7 +2510,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // Verify debug registers are dirty let debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); @@ -2593,7 +2556,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // Get the initial CR3 value and expected defaults let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; @@ -2653,8 +2616,11 @@ mod tests { // Re-run from entrypoint (flag=1 means guest skips dirty phase, just does FXSAVE) // Use stack_top - 8 to match initialise()'s behavior (simulates call pushing return addr) + let NextAction::Call(rip) = ctx.ctx.vm.entrypoint else { + panic!("entrypoint should be call"); + }; let regs = CommonRegisters { - rip: ctx.ctx.vm.entrypoint.expect("entrypoint should be set"), + rip, rsp: ctx.stack_top_gva() - 8, rflags: 1 << 1, ..Default::default() @@ -2736,7 +2702,7 @@ mod tests { let mut fxsave = [0u8; 512]; self.ctx .hshm - .shared_mem + .scratch_mem .copy_to_slice(&mut fxsave, self.fxsave_offset) .unwrap(); fxsave @@ -2759,9 +2725,9 @@ mod tests { // These are in the output_data region which starts at a known offset. // We use a default SandboxConfiguration to get the same layout as create_test_vm_context. let config: SandboxConfiguration = Default::default(); - let layout = SandboxMemoryLayout::new(config, 512, 4096, 4096, 0x3000, None).unwrap(); - let fxsave_offset = layout.get_output_data_offset(); - let fxsave_gva = (SandboxMemoryLayout::BASE_ADDRESS + fxsave_offset) as u64; + let layout = SandboxMemoryLayout::new(config, 512, 4096, None).unwrap(); + let fxsave_offset = layout.get_output_data_buffer_scratch_host_offset(); + let fxsave_gva = layout.get_output_data_buffer_gva(); let flag_gva = fxsave_gva + 512; let mut a = CodeAssembler::new(64).unwrap(); @@ -2812,6 +2778,9 @@ mod tests { a.mov(rax, fxsave_gva).unwrap(); a.fxsave(ptr(rax)).unwrap(); + // Return dispatch ptr + a.mov(rax, layout.get_guest_code_address() as u64).unwrap(); + a.hlt().unwrap(); let code = a.assemble(0).unwrap(); diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 4a9a5e244..cceb23f2f 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -66,8 +66,6 @@ use std::mem::{offset_of, size_of}; use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB, PAGE_SIZE_USIZE}; use tracing::{Span, instrument}; -#[cfg(feature = "init-paging")] -use super::memory_region::MemoryRegionType::PageTables; use super::memory_region::MemoryRegionType::{Code, Heap, InitData, Peb}; use super::memory_region::{ DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegion_, MemoryRegionFlags, MemoryRegionKind, @@ -97,7 +95,6 @@ pub(crate) struct SandboxMemoryLayout { guest_heap_buffer_offset: usize, init_data_offset: usize, - pt_offset: usize, pt_size: Option, // other @@ -152,7 +149,6 @@ impl Debug for SandboxMemoryLayout { "Init Data Offset", &format_args!("{:#x}", self.init_data_offset), ) - .field("PT Offset", &format_args!("{:#x}", self.pt_offset)) .field("PT Size", &format_args!("{:#x}", self.pt_size.unwrap_or(0))) .field( "Guest Code Offset", @@ -226,7 +222,6 @@ impl SandboxMemoryLayout { // make sure init data starts at 4K boundary let init_data_offset = (guest_heap_buffer_offset + heap_size).next_multiple_of(PAGE_SIZE_USIZE); - let pt_offset = (init_data_offset + init_data_size).next_multiple_of(PAGE_SIZE_USIZE); Ok(Self { peb_offset, @@ -243,7 +238,6 @@ impl SandboxMemoryLayout { init_data_offset, init_data_size, init_data_permissions, - pt_offset, pt_size: None, scratch_size, }) @@ -326,14 +320,28 @@ impl SandboxMemoryLayout { 0 } - /// Get the offset into the host scratch buffer of the start of - /// the input data + /// Get the offset from the beginning of the scratch region to the + /// location where page tables will be eagerly copied on restore #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_first_free_scratch_gpa(&self) -> u64 { - (hyperlight_common::layout::scratch_base_gpa(self.scratch_size) as usize - + self.sandbox_memory_config.get_input_data_size() + pub(crate) fn get_pt_base_scratch_offset(&self) -> usize { + (self.sandbox_memory_config.get_input_data_size() + self.sandbox_memory_config.get_output_data_size()) - .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) as u64 + .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) + } + + /// Get the base GPA to which the page tables will be eagerly + /// copied on restore + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_pt_base_gpa(&self) -> u64 { + hyperlight_common::layout::scratch_base_gpa(self.scratch_size) + + self.get_pt_base_scratch_offset() as u64 + } + + /// Get the first GPA of the scratch region that the host hasn't + /// used for something else + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_first_free_scratch_gpa(&self) -> u64 { + self.get_pt_base_gpa() + self.pt_size.unwrap_or(0) as u64 } /// Get the offset in guest memory to the heap size @@ -354,7 +362,7 @@ impl SandboxMemoryLayout { /// layout. #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_unaligned_memory_size(&self) -> usize { - self.pt_offset + self.pt_size.unwrap_or(0) + self.init_data_offset + self.init_data_size } /// get the code offset @@ -391,16 +399,25 @@ impl SandboxMemoryLayout { } } - /// Get the offset into the snapshot region of the page tables + /// Sets the size of the memory region used for page tables #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_pt_offset(&self) -> usize { - self.pt_offset + pub(crate) fn set_pt_size(&mut self, size: usize) -> Result<()> { + let min_fixed_scratch = hyperlight_common::layout::min_scratch_size( + self.sandbox_memory_config.get_input_data_size(), + self.sandbox_memory_config.get_output_data_size(), + ); + let min_scratch = min_fixed_scratch + size; + if self.scratch_size < min_scratch { + return Err(MemoryRequestTooSmall(self.scratch_size, min_scratch)); + } + self.pt_size = Some(size); + Ok(()) } - /// Sets the size of the memory region used for page tables + /// Get the size of the memory region used for page tables #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn set_pt_size(&mut self, size: usize) { - self.pt_size = Some(size); + pub(crate) fn get_pt_size(&mut self) -> usize { + self.pt_size.unwrap_or(0) } /// Returns the memory regions associated with this memory layout, @@ -480,31 +497,6 @@ impl SandboxMemoryLayout { init_data_offset }; - #[cfg(feature = "init-paging")] - // page tables - let final_offset = { - let expected_pt_offset = TryInto::::try_into(self.pt_offset)?; - - if after_init_offset != expected_pt_offset { - return Err(new_error!( - "Page table offset does not match expected: {}, actual: {}", - expected_pt_offset, - after_init_offset - )); - } - - if let Some(pt_size) = self.pt_size { - builder.push_page_aligned( - pt_size, - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - PageTables, - ) - } else { - after_init_offset - } - }; - - #[cfg(not(feature = "init-paging"))] let final_offset = after_init_offset; let expected_final_offset = TryInto::::try_into(self.get_memory_size()?)?; diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 71237e4ac..a2b37e982 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -116,8 +116,6 @@ impl TryFrom for MemoryRegionFlags { #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] /// The type of memory region pub enum MemoryRegionType { - /// The region contains the guest's page tables - PageTables, /// The region contains the guest's code Code, /// The region contains the guest's init data diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 674a47cee..8691df380 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -24,7 +24,6 @@ use tracing::{Span, instrument}; use super::layout::SandboxMemoryLayout; use super::memory_region::MemoryRegion; -use super::ptr::RawPtr; use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory}; use crate::hypervisor::regs::CommonSpecialRegisters; use crate::sandbox::snapshot::{NextAction, Snapshot}; @@ -40,8 +39,6 @@ pub(crate) struct SandboxMemoryManager { pub(crate) scratch_mem: S, /// The memory layout of the underlying shared memory pub(crate) layout: SandboxMemoryLayout, - /// Pointer to where to load memory from - pub(crate) load_addr: RawPtr, /// Offset for the execution entrypoint from `load_addr` pub(crate) entrypoint: NextAction, /// How many memory regions were mapped after sandbox creation @@ -127,10 +124,8 @@ impl GuestPageTableBuffer { } } - pub(crate) fn phys_base(&self) -> usize { - self.phys_base - } - + #[cfg(test)] + #[allow(dead_code)] pub(crate) fn size(&self) -> usize { self.buffer.borrow().len() } @@ -150,14 +145,12 @@ where layout: SandboxMemoryLayout, shared_mem: S, scratch_mem: S, - load_addr: RawPtr, entrypoint: NextAction, ) -> Self { Self { layout, shared_mem, scratch_mem, - load_addr, entrypoint, mapped_rgns: 0, abort_buffer: Vec::new(), @@ -206,15 +199,8 @@ impl SandboxMemoryManager { let mut shared_mem = ExclusiveSharedMemory::new(s.mem_size())?; shared_mem.copy_from_slice(s.memory(), 0)?; let scratch_mem = ExclusiveSharedMemory::new(s.layout().get_scratch_size())?; - let load_addr: RawPtr = RawPtr::try_from(layout.get_guest_code_address())?; let entrypoint = s.entrypoint(); - Ok(Self::new( - layout, - shared_mem, - scratch_mem, - load_addr, - entrypoint, - )) + Ok(Self::new(layout, shared_mem, scratch_mem, entrypoint)) } /// Write memory layout @@ -250,7 +236,6 @@ impl SandboxMemoryManager { shared_mem: hshm, scratch_mem: hscratch, layout: self.layout, - load_addr: self.load_addr.clone(), entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: self.abort_buffer, @@ -259,14 +244,11 @@ impl SandboxMemoryManager { shared_mem: gshm, scratch_mem: gscratch, layout: self.layout, - load_addr: self.load_addr.clone(), entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: Vec::new(), // Guest doesn't need abort buffer }; - host_mgr.update_scratch_bookkeeping( - (SandboxMemoryLayout::BASE_ADDRESS + self.layout.get_pt_offset()) as u64, - )?; + host_mgr.update_scratch_bookkeeping()?; Ok((host_mgr, guest_mgr)) } } @@ -385,7 +367,8 @@ impl SandboxMemoryManager { Some(gscratch) }; - self.update_scratch_bookkeeping(snapshot.root_pt_gpa())?; + self.layout = *snapshot.layout(); + self.update_scratch_bookkeeping()?; Ok((gsnapshot, gscratch)) } @@ -396,7 +379,7 @@ impl SandboxMemoryManager { self.scratch_mem.write::(base_offset, value) } - fn update_scratch_bookkeeping(&mut self, snapshot_pt_base_gpa: u64) -> Result<()> { + fn update_scratch_bookkeeping(&mut self) -> Result<()> { use hyperlight_common::layout::*; let scratch_size = self.scratch_mem.mem_size(); self.update_scratch_bookkeeping_item(SCRATCH_TOP_SIZE_OFFSET, scratch_size as u64)?; @@ -404,10 +387,6 @@ impl SandboxMemoryManager { SCRATCH_TOP_ALLOCATOR_OFFSET, self.layout.get_first_free_scratch_gpa(), )?; - self.update_scratch_bookkeeping_item( - SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET, - snapshot_pt_base_gpa, - )?; // Initialise the guest input and output data buffers in // scratch memory. TODO: remove the need for this. @@ -420,6 +399,16 @@ impl SandboxMemoryManager { SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, )?; + // Copy the page tables into the scratch region + let snapshot_pt_end = self.shared_mem.mem_size(); + let snapshot_pt_start = snapshot_pt_end - self.layout.get_pt_size(); + self.shared_mem.with_exclusivity(|snap| { + self.scratch_mem.with_exclusivity(|scratch| { + let bytes = &snap.as_slice()[snapshot_pt_start..snapshot_pt_end]; + scratch.copy_from_slice(bytes, self.layout.get_pt_base_scratch_offset()) + }) + })???; + Ok(()) } } @@ -523,6 +512,7 @@ mod tests { ("large (256MB heap)", { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(LARGE_HEAP_SIZE); + cfg.set_scratch_size(0x100000); cfg }), ]; diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index e05f70937..666ba4f98 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -1030,12 +1030,14 @@ mod tests { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(20 * 1024); // min_scratch_size already includes 1 page (4k on most - // platforms) of guest stack, so add 20k more to get 24k total + // platforms) of guest stack, so add 20k more to get 24k + // total, and then add some more for the eagerly-copied page + // tables on amd64 let min_scratch = hyperlight_common::layout::min_scratch_size( cfg.get_input_data_size(), cfg.get_output_data_size(), ); - cfg.set_scratch_size(min_scratch + 0x10000); + cfg.set_scratch_size(min_scratch + 0x10000 + 0x10000); let mut sbox1: MultiUseSandbox = { let path = simple_guest_as_string().unwrap(); @@ -1419,6 +1421,7 @@ mod tests { for (name, heap_size) in test_cases { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); + cfg.set_scratch_size(0x100000); let path = simple_guest_as_string().unwrap(); let sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg)) diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index af6b42ef1..53c45ef21 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -90,16 +90,14 @@ pub struct Snapshot { /// It is not a [`blake3::Hash`] because we do not presently /// require constant-time equality checking hash: [u8; 32], - /// The address of the root page table - root_pt_gpa: u64, /// The address of the top of the guest stack stack_top_gva: u64, /// Special register state captured from the vCPU during snapshot. - /// None for snapshots created directly from a binary (before guest runs). - /// Some for snapshots taken from a running sandbox. - /// Note: CR3 in this struct is NOT used on restore - instead, the new - /// root_pt_gpa field is used since page tables are relocated during snapshot. + /// None for snapshots created directly from a binary (before + /// guest runs). Some for snapshots taken from a running sandbox. + /// Note: CR3 in this struct is NOT used on restore, since page + /// tables are relocated during snapshot. sregs: Option, /// The next action that should be performed on this snapshot @@ -137,7 +135,7 @@ impl hyperlight_common::vmem::TableReadOps for Snapshot { addr } fn root_table(&self) -> u64 { - self.root_pt_gpa + self.root_pt_gpa() } } @@ -329,23 +327,6 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { }), }; unsafe { vmem::map(pt_buf, mapping) }; - // Map the page tables themselves, in order to allow the - // guest to update them easily - let mut pt_size_mapped = 0; - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_buf.phys_base() + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::Basic(BasicMapping { - readable: true, - writable: false, - executable: false, - }), - }; - pt_size_mapped = pt_buf.size(); - unsafe { vmem::map(pt_buf, mapping) }; - } } impl Snapshot { @@ -391,11 +372,9 @@ impl Snapshot { .transpose()?; #[cfg(feature = "init-paging")] - let pt_base_gpa = { + { // Set up page table entries for the snapshot - let pt_base_gpa = - crate::mem::layout::SandboxMemoryLayout::BASE_ADDRESS + layout.get_pt_offset(); - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; @@ -429,12 +408,9 @@ impl Snapshot { map_specials(&pt_buf, layout.get_scratch_size()); let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len())?; memory.extend(&pt_bytes); - pt_base_gpa }; - #[cfg(not(feature = "init-paging"))] - let pt_base_gpa = 0usize; let exn_stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET @@ -450,7 +426,6 @@ impl Snapshot { regions: extra_regions, load_info, hash, - root_pt_gpa: pt_base_gpa as u64, stack_top_gva: exn_stack_top_gva, sregs: None, entrypoint: NextAction::Initialise(load_addr + entrypoint_offset), @@ -478,7 +453,7 @@ impl Snapshot { sregs: CommonSpecialRegisters, entrypoint: NextAction, ) -> Result { - let (new_root_pt_gpa, memory) = shared_mem.with_exclusivity(|snap_e| { + let memory = shared_mem.with_exclusivity(|snap_e| { scratch_mem.with_exclusivity(|scratch_e| { let scratch_size = layout.get_scratch_size(); @@ -488,8 +463,7 @@ impl Snapshot { // Pass 2: copy them, and map them // TODO: Look for opportunities to hugepage map - let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + live_pages.len() * PAGE_SIZE; - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); let mut snapshot_memory: Vec = Vec::new(); for (mapping, contents) in live_pages { let new_offset = snapshot_memory.len(); @@ -518,11 +492,11 @@ impl Snapshot { // Phase 3: Map the special mappings map_specials(&pt_buf, layout.get_scratch_size()); let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len())?; snapshot_memory.extend(&pt_bytes); - (pt_base_gpa, snapshot_memory) + Ok::, crate::HyperlightError>(snapshot_memory) }) - })??; + })???; // We do not need the original regions anymore, as any uses of // them in the guest have been incorporated into the snapshot @@ -539,7 +513,6 @@ impl Snapshot { hash, stack_top_gva, sregs: Some(sregs), - root_pt_gpa: new_root_pt_gpa as u64, entrypoint, }) } @@ -576,7 +549,7 @@ impl Snapshot { } pub(crate) fn root_pt_gpa(&self) -> u64 { - self.root_pt_gpa + self.layout.get_pt_base_gpa() } pub(crate) fn stack_top_gva(&self) -> u64 { @@ -643,7 +616,6 @@ mod tests { SandboxMemoryLayout::new(cfg, 4096, 0x3000, None).unwrap(), snapshot_mem, scratch_mem, - 0.into(), super::NextAction::None, ); let (mgr, _) = mgr.build().unwrap(); diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 723f8f8f4..9d602cf70 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -1094,7 +1094,7 @@ mod tests { // Test 3: Create snapshot with custom scratch size { let mut cfg = SandboxConfiguration::default(); - cfg.set_scratch_size(144 * 1024); // 144KB scratch + cfg.set_scratch_size(256 * 1024); // 256KB scratch let env = GuestEnvironment::new(GuestBinary::FilePath(binary_path.clone()), None); diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 31e3975c9..f042c3f57 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -25,8 +25,7 @@ use super::uninitialized::SandboxRuntimeConfig; use crate::hypervisor::hyperlight_vm::{HyperlightVm, HyperlightVmError}; use crate::mem::exe::LoadInfo; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::ptr_offset::Offset; +use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::GuestSharedMemory; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; @@ -96,13 +95,6 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(any(crashdump, gdb))] rt_cfg: &SandboxRuntimeConfig, _load_info: LoadInfo, ) -> Result { - let base_ptr = GuestPtr::try_from(Offset::from(0))?; - - let pml4_ptr = { - let pml4_offset_u64 = mgr.layout.get_pt_offset() as u64; - base_ptr + Offset::from(pml4_offset_u64) - }; - // Create gdb thread if gdb is enabled and the configuration is provided #[cfg(gdb)] let gdb_conn = if let Some(DebugInfo { port }) = rt_cfg.debug_info { @@ -130,7 +122,7 @@ pub(crate) fn set_up_hypervisor_partition( Ok(HyperlightVm::new( mgr.shared_mem, mgr.scratch_mem, - pml4_ptr.absolute()?, + mgr.layout.get_pt_base_gpa(), mgr.entrypoint, stack_top_gva, config, From 002afe497758c992e9eeceab6e7e3c56ab529534 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:14:49 +0000 Subject: [PATCH 09/10] amd64: slightly improve TLB flushing This is still somewhat broken (see https://github.com/hyperlight-dev/hyperlight/issues/721), since it relies on the flush_tlb function ending up in the big area around the code that is still identity mapped. So, this code should probably move somewhere else. However, this fixes a very significant bug in the prior code, where the flush_tlb() function called on guest function dispatch could return to the wrong address, due to the correct return address having been pushed onto the wrong page due to a stale TLB entry. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/arch/amd64/dispatch.rs | 70 +++++++++++++++++++ .../src/arch/amd64/mod.rs | 1 + .../src/guest_function/call.rs | 40 +---------- src/hyperlight_guest_bin/src/lib.rs | 2 +- src/hyperlight_guest_bin/src/paging.rs | 19 ----- .../src/hypervisor/hyperlight_vm.rs | 27 +++++-- src/hyperlight_host/tests/integration_test.rs | 6 +- 7 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs new file mode 100644 index 000000000..972ecb715 --- /dev/null +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -0,0 +1,70 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! This module contains the architecture-specific dispatch function +//! entry sequence + +// The extern "C" here is a lie until #![feature(abi_custom)] is stabilised +unsafe extern "C" { + /// There are two reasons that we need an architecture-specific + /// assembly stub currently: (1) to ensure that the fact that the + /// dispatch function never returns but can be executed again is + /// OK (i.e. we must ensure that there is no stack frame + /// teardown); (2) at least presently, to ensure that the TLB is + /// flushed in certain cases. + /// + /// # TLB flushing + /// + /// The hyperlight host likes to use one partition and reset it in + /// various ways; if that has happened, there might stale TLB + /// entries hanging around from the former user of the + /// partition. Flushing the TLB here is not quite the right thing + /// to do, since incorrectly cached entries could make even this + /// code not exist, but regrettably there is not a simple way for + /// the host to trigger flushing when it ought to happen, so for + /// now this works in practice, since the text segment is always + /// part of the big identity-mapped region at the base of the + /// guest. The stack, however, is not part of the snapshot region + /// which is (in practice) idmapped for the relevant area, so this + /// cannot touch the stack until the flush is done. + /// + /// Currently this just always flips CR4.PGE back and forth to + /// trigger a tlb flush. We should use a faster approach where + /// available + /// + /// # ABI + /// + /// The ZF should be set if a TLB flush is required on this call + /// (e.g. the first call after a snapshot restore) + /// + /// The stack pointer should, unusually for amd64, by 16-byte + /// aligned at the beginning of the function---no return address + /// should be pushed. + pub(crate) unsafe fn dispatch_function(must_flush_tlb: u64); +} +core::arch::global_asm!(" + .global dispatch_function + dispatch_function: + jnz flush_done + mov rdi, cr4 + xor rdi, 0x80 + mov cr4, rdi + xor rdi, 0x80 + mov cr4, rdi + flush_done: + call {internal_dispatch_function}\n + hlt\n +", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/mod.rs b/src/hyperlight_guest_bin/src/arch/amd64/mod.rs index 9ba50286d..acaf7e40d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/mod.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/mod.rs @@ -15,6 +15,7 @@ limitations under the License. */ pub(crate) mod context; +pub(crate) mod dispatch; pub mod exception; mod init; mod layout; diff --git a/src/hyperlight_guest_bin/src/guest_function/call.rs b/src/hyperlight_guest_bin/src/guest_function/call.rs index c460b06c6..bfeec104e 100644 --- a/src/hyperlight_guest_bin/src/guest_function/call.rs +++ b/src/hyperlight_guest_bin/src/guest_function/call.rs @@ -22,7 +22,6 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, Functi use hyperlight_common::flatbuffer_wrappers::function_types::{FunctionCallResult, ParameterType}; use hyperlight_common::flatbuffer_wrappers::guest_error::{ErrorCode, GuestError}; use hyperlight_guest::error::{HyperlightGuestError, Result}; -use hyperlight_guest::exit::halt; use tracing::{Span, instrument}; use crate::{GUEST_HANDLE, REGISTERED_GUEST_FUNCTIONS}; @@ -79,14 +78,8 @@ pub(crate) fn call_guest_function(function_call: FunctionCall) -> Result } } -// This function is marked as no_mangle/inline to prevent the compiler from inlining it , if its inlined the epilogue will not be called -// and we will leak memory as the epilogue will not be called as halt() is not going to return. -// -// This function may panic, as we have no other ways of dealing with errors at this level -#[unsafe(no_mangle)] -#[inline(never)] #[instrument(skip_all, parent = Span::current(), level= "Trace")] -fn internal_dispatch_function() { +pub(crate) fn internal_dispatch_function() { let handle = unsafe { GUEST_HANDLE }; #[cfg(debug_assertions)] @@ -115,34 +108,3 @@ fn internal_dispatch_function() { } } } - -// This is implemented as a separate function to make sure that epilogue in the internal_dispatch_function is called before the halt() -// which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return -// when running in the hypervisor. -#[instrument(skip_all, parent = Span::current(), level= "Trace")] -pub(crate) extern "C" fn dispatch_function() { - // The hyperlight host likes to use one partition and reset it in - // various ways; if that has happened, there might stale TLB - // entries hanging around from the former user of the - // partition. Flushing the TLB here is not quite the right thing - // to do, since incorrectly cached entries could make even this - // code not exist, but regrettably there is not a simple way for - // the host to trigger flushing when it ought to happen, so for - // now this works in practice, since the text segment is always - // part of the big identity-mapped region at the base of the - // guest. - crate::paging::flush_tlb(); - - // Read the current TSC to report it to the host with the spans/events - // This helps calculating the timestamps relative to the guest call - #[cfg(feature = "trace_guest")] - { - let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc(); - // Reset the trace state for the new guest function call with the new start TSC - // This clears any existing spans/events from previous calls ensuring a clean state - hyperlight_guest_tracing::new_call(guest_start_tsc); - } - - internal_dispatch_function(); - halt(); -} diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index cc1665abd..3bcce40db 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -20,8 +20,8 @@ extern crate alloc; use core::fmt::Write; +use arch::dispatch::dispatch_function; use buddy_system_allocator::LockedHeap; -use guest_function::call::dispatch_function; use guest_function::register::GuestFunctionRegister; use guest_logger::init_logger; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index 5d3a38a00..a3df745f1 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -144,22 +144,3 @@ pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> { GuestMappingOperations::new().fallible_phys_to_virt(gpa) } - -pub fn flush_tlb() { - // Currently this just always flips CR4.PGE back and forth to - // trigger a tlb flush. We should use a faster approach where - // available - let mut orig_cr4: u64; - unsafe { - asm!("mov {}, cr4", out(reg) orig_cr4); - } - let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE - unsafe { - asm!( - "mov cr4, {}", - "mov cr4, {}", - in(reg) tmp_cr4, - in(reg) orig_cr4 - ); - } -} diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index 7206cc847..91a91acdf 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -104,6 +104,8 @@ pub(crate) struct HyperlightVm { mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) + pending_tlb_flush: bool, + #[cfg(gdb)] gdb_conn: Option>, #[cfg(gdb)] @@ -421,6 +423,8 @@ impl HyperlightVm { mmap_regions: Vec::new(), + pending_tlb_flush: false, + #[cfg(gdb)] gdb_conn, #[cfg(gdb)] @@ -662,17 +666,25 @@ impl HyperlightVm { let NextAction::Call(dispatch_func_addr) = self.entrypoint else { return Err(DispatchGuestCallError::Uninitialized); }; + let mut rflags = 1 << 1; // RFLAGS.1 is RES1 + if self.pending_tlb_flush { + rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes + self.pending_tlb_flush = false; + } // set RIP and RSP, reset others let regs = CommonRegisters { rip: dispatch_func_addr, // We usually keep the top of the stack 16-byte - // aligned. However, the ABI requirement is that the stack - // be aligned _before a call instruction_, which means - // that the stack needs to actually be ≡ 8 mod 16 at the - // first instruction (since, on x64, a call instruction - // automatically pushes a return address). - rsp: self.rsp_gva - 8, - rflags: 1 << 1, + // aligned. Since the usual ABI requirement is that the + // stack be aligned _before a call instruction_, one might + // expect that the stack pointer here needs to actually be + // ≡ 8 mod 16 at the first instruction (since, on x64, a + // call instruction automatically pushes a return + // address). However, the x64 entry stub in + // hyperlight_guest::arch::dispatch handles this itself, + // so we do use the aligned address here. + rsp: self.rsp_gva, + rflags, ..Default::default() }; self.vm @@ -945,6 +957,7 @@ impl HyperlightVm { // to point to the new (relocated) page tables let mut sregs = *sregs; sregs.cr3 = cr3; + self.pending_tlb_flush = true; self.vm.set_sregs(&sregs)?; } #[cfg(not(feature = "init-paging"))] diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 083e690e3..4b2426cf2 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -704,13 +704,13 @@ fn log_message() { // - internal_dispatch_function does a log::trace! in debug mode // - logs from trace level tracing spans created as logs because of the tracing `log` feature // - 4 from evolve call (hyperlight_main + halt) - // - 16 from guest call (internal_dispatch_function + others) + // - 12 from guest call (internal_dispatch_function + others) // and are multiplied because we make 6 calls to `log_test_messages` // NOTE: These numbers need to be updated if log messages or spans are added/removed let num_fixed_trace_log = if cfg!(debug_assertions) { - (1 + 20) * 6 + (1 + 16) * 6 } else { - 20 * 6 + 16 * 6 }; let tests = vec![ From c2b03eac904da47d61e092349b325cf8e7e5cd79 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Thu, 12 Feb 2026 01:55:07 +0000 Subject: [PATCH 10/10] amd64: add missing barrier for certain page-table updates Certain page-table updates that do not require TLB invalidation were previously missing any kind of fence at all to keep table walks coherent. This commit introduces a new barrier for this scenario. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/arch/amd64/exception/handle.rs | 8 +++ .../src/arch/amd64/init.rs | 1 + .../src/arch/amd64/machine.rs | 1 + src/hyperlight_guest_bin/src/paging.rs | 55 +++++++++++++++++++ src/tests/rust_guests/simpleguest/src/main.rs | 7 ++- 5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs index 8db2ee18a..f14d37ea8 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs @@ -76,6 +76,12 @@ fn handle_stack_pagefault(gva: u64) { executable: false, }), ); + // This is mapping an entry for the first time, so we don't + // need to worry about TLB maintenance. We do (probably) need + // to worry about coherence between these stores and the page + // table walker, but this page won't be accessed again until + // after [`iret`], which is a serializing instruction, so + // that's already handled as well. } } @@ -106,6 +112,8 @@ fn handle_cow_pagefault(_phys: PhysAddr, virt: VirtAddr, perms: CowMapping) { executable: perms.executable, }), ); + // This is updating an entry that was already valid, changing + // its OA, so we need to actually invalidate the TLB for it. core::arch::asm!("invlpg [{}]", in(reg) target_virt, options(readonly, nostack, preserves_flags)); } } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 8c20eb655..bc44d8e96 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -121,6 +121,7 @@ unsafe fn init_stack() -> u64 { executable: false, }), ); + crate::paging::barrier::first_valid_same_ctx(); } MAIN_STACK_TOP_GVA } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs index e9a164807..cde8118e3 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs @@ -229,6 +229,7 @@ impl ProcCtrl { executable: false, }), ); + crate::paging::barrier::first_valid_same_ctx(); let ptr = ptr as *mut Self; (&raw mut (*ptr).gdt).write_bytes(0u8, 1); (&raw mut (*ptr).tss).write_bytes(0u8, 1); diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index a3df745f1..f9a785ffa 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -144,3 +144,58 @@ pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> { GuestMappingOperations::new().fallible_phys_to_virt(gpa) } + +/// Barriers that other code may need to use when updating page tables +pub mod barrier { + /// Call this function when a virtual address has just been made + /// valid for the first time after the last tlb invalidate that + /// affected it, and it will be used for the first time in the + /// same execution context as has made the modification. + /// + /// On most architectures, TLBs will not cache invalid entries, so + /// this does not need to issue a TLB. However, it does need to + /// ensure coherency between the previous writes and any future + /// uses by a page table walker. + /// + /// # Architecture-specific (amd64) notes + /// + /// The exact details around page walk coherency on amd64 seem a + /// bit fuzzy. The Intel manual notes that a serialising + /// instruction is necessary specifically to synchronise table + /// walks performed during instruction fetch [1], but is + /// relatively quiet about other page walks. The AMD manual notes + /// [2] that "a table entry is allowed to be upgraded (by marking + /// it as present, or by removing its write, execute or supervisor + /// restrictions) without explicitly maintaining TLB coherency", + /// but only states that TLB any upper-level TLB cache entries + /// will be flushed before re-walking to confirm the fault, which + /// does not clearly seem strong enough. + /// + /// In some limited testing, `mfence` typically seems to be + /// enough, but as it is not a serializing instruction on Intel + /// platforms, we assume it may not be quite good enough. `cpuid` + /// is likely to be very slow, since we are definitely running + /// under a hypervisor (and often even nested). Currently, for + /// simplicity's sake, this just copies cr0 to itself, but other + /// options (including the `serialize` instruction where + /// available) could be worth exploring. + /// + /// [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3: System Programming Guide + /// Chapter 5: Paging + /// §5.10: Caching Translation Information + /// §5.10.4: Invalidation of TLBs and Paging-Structure Caches + /// §5.10.4.3: Optional Invalidation + /// [2] AMD64 Architecture Programmer's Manual, Volume 2: System Programming + /// Section 5: Page Translation and Protection + /// §5.5: Translation-Lookaside Buffer + /// §5.5.3: TLB Management + #[inline(always)] + pub fn first_valid_same_ctx() { + unsafe { + core::arch::asm!(" + mov rax, cr0 + mov cr0, rax + ", out("rax") _, out("rcx") _); + } + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 296ace27c..e958d7dcb 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -593,6 +593,7 @@ fn read_mapped_buffer(base: u64, len: u64, do_map: bool) -> Vec { executable: true, }), ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); } } @@ -623,7 +624,8 @@ fn write_mapped_buffer(base: u64, len: u64) -> bool { writable: true, executable: true, }), - ) + ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); }; let data = unsafe { core::slice::from_raw_parts_mut(base, len) }; @@ -650,7 +652,8 @@ fn exec_mapped_buffer(base: u64, len: u64) -> bool { writable: true, executable: true, }), - ) + ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); }; let data = unsafe { core::slice::from_raw_parts(base, len) };