diff --git a/fuzz/fuzz_targets/host_call.rs b/fuzz/fuzz_targets/host_call.rs index befd95ff2..b0d37cf1a 100644 --- a/fuzz/fuzz_targets/host_call.rs +++ b/fuzz/fuzz_targets/host_call.rs @@ -35,6 +35,7 @@ fuzz_target!( let mut cfg = SandboxConfiguration::default(); cfg.set_output_data_size(64 * 1024); // 64 KB output buffer cfg.set_input_data_size(64 * 1024); // 64 KB input buffer + cfg.set_scratch_size(512 * 1024); // large scratch region to contain those buffers, any data copies, etc. let u_sbox = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_for_fuzzing_as_string().expect("Guest Binary Missing")), Some(cfg) diff --git a/src/hyperlight_common/src/arch/amd64/layout.rs b/src/hyperlight_common/src/arch/amd64/layout.rs index b1471b8ce..14a9cd62a 100644 --- a/src/hyperlight_common/src/arch/amd64/layout.rs +++ b/src/hyperlight_common/src/arch/amd64/layout.rs @@ -37,6 +37,8 @@ pub const MAX_GPA: usize = 0x0000_000f_ffff_ffff; /// - A page for the smallest possible non-exception stack /// - (up to) 3 pages for mapping that /// - Two pages for the exception stack and metadata -pub fn min_scratch_size() -> usize { - 12 * crate::vmem::PAGE_SIZE +/// - A page-aligned amount of memory for I/O buffers (for now) +pub fn min_scratch_size(input_data_size: usize, output_data_size: usize) -> usize { + (input_data_size + output_data_size).next_multiple_of(crate::vmem::PAGE_SIZE) + + 12 * crate::vmem::PAGE_SIZE } diff --git a/src/hyperlight_common/src/arch/amd64/vmem.rs b/src/hyperlight_common/src/arch/amd64/vmem.rs index 110598a9a..1c64e8ca9 100644 --- a/src/hyperlight_common/src/arch/amd64/vmem.rs +++ b/src/hyperlight_common/src/arch/amd64/vmem.rs @@ -26,7 +26,8 @@ limitations under the License. //! allocating intermediate tables as needed and setting appropriate flags on leaf PTEs use crate::vmem::{ - BasicMapping, Mapping, MappingKind, TableMovabilityBase, TableOps, TableReadOps, Void, + BasicMapping, CowMapping, Mapping, MappingKind, TableMovabilityBase, TableOps, TableReadOps, + Void, }; // Paging Flags @@ -58,12 +59,17 @@ const PAGE_NX: u64 = 1 << 63; /// This masks out the lower 12 flag bits AND the upper bits including NX (bit 63) const PTE_ADDR_MASK: u64 = 0x000F_FFFF_FFFF_F000; const PAGE_USER_ACCESS_DISABLED: u64 = 0 << 2; // U/S bit not set - supervisor mode only (no code runs in user mode for now) -const PAGE_DIRTY_CLEAR: u64 = 0 << 6; // D - dirty bit cleared (set by CPU when written) -const PAGE_ACCESSED_CLEAR: u64 = 0 << 5; // A - accessed bit cleared (set by CPU when accessed) +const PAGE_DIRTY_SET: u64 = 1 << 6; // D - dirty bit +const PAGE_ACCESSED_SET: u64 = 1 << 5; // A - accessed bit const PAGE_CACHE_ENABLED: u64 = 0 << 4; // PCD - page cache disable bit not set (caching enabled) const PAGE_WRITE_BACK: u64 = 0 << 3; // PWT - page write-through bit not set (write-back caching) const PAGE_PAT_WB: u64 = 0 << 7; // PAT - page attribute table index bit (0 for write-back memory when PCD=0, PWT=0) +// We use various patterns of the available-for-software-use bits to +// represent certain special mappings. +const PTE_AVL_MASK: u64 = 0x0000_0000_0000_0E00; +const PAGE_AVL_COW: u64 = 1 << 9; + /// Returns PAGE_RW if writable is true, 0 otherwise #[inline(always)] const fn page_rw_flag(writable: bool) -> u64 { @@ -101,7 +107,7 @@ fn bits(x: u64) -> u64 { #[allow(clippy::precedence)] fn pte_for_table(table_addr: Op::TableAddr) -> u64 { Op::to_phys(table_addr) | - PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag PAGE_CACHE_ENABLED | // leave caching enabled PAGE_WRITE_BACK | // use write-back caching PAGE_USER_ACCESS_DISABLED |// dont allow user access (no code runs in user mode for now) @@ -417,7 +423,7 @@ unsafe fn map_page< r: MapResponse, ) { let pte = match &mapping.kind { - MappingKind::BasicMapping(bm) => + MappingKind::Basic(bm) => // TODO: Support not readable // NOTE: On x86-64, there is no separate "readable" bit in the page table entry. // This means that pages cannot be made write-only or execute-only without also being readable. @@ -429,14 +435,27 @@ unsafe fn map_page< (mapping.phys_base + (r.vmin - mapping.virt_base)) | page_nx_flag(bm.executable) | // NX - no execute unless allowed PAGE_PAT_WB | // PAT index bit for write-back memory - PAGE_DIRTY_CLEAR | // dirty bit (set by CPU when written) - PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present) + PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag PAGE_CACHE_ENABLED | // leave caching enabled PAGE_WRITE_BACK | // use write-back caching PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now) page_rw_flag(bm.writable) | // R/W - set if writable PAGE_PRESENT // P - this entry is present } + MappingKind::Cow(cm) => { + (mapping.phys_base + (r.vmin - mapping.virt_base)) | + page_nx_flag(cm.executable) | // NX - no execute unless allowed + PAGE_AVL_COW | + PAGE_PAT_WB | // PAT index bit for write-back memory + PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit + PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag + PAGE_CACHE_ENABLED | // leave caching enabled + PAGE_WRITE_BACK | // use write-back caching + PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now) + 0 | // R/W - Cow page is never writable + PAGE_PRESENT // P - this entry is present + } }; unsafe { write_entry_updating(op, r.update_parent, r.entry_ptr, pte); @@ -517,7 +536,7 @@ pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>( op: impl core::convert::AsRef + Copy + 'a, address: u64, len: u64, -) -> impl Iterator + 'a { +) -> impl Iterator + 'a { // Undo sign-extension, and mask off any sub-page bits let vmin = (address & ((1u64 << VA_BITS) - 1)) & !(PAGE_SIZE as u64 - 1); let vmax = core::cmp::min(vmin + len, 1u64 << VA_BITS); @@ -540,12 +559,27 @@ pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>( let sgn_bit = r.vmin >> (VA_BITS - 1); let sgn_bits = 0u64.wrapping_sub(sgn_bit) << VA_BITS; let virt_addr = sgn_bits | r.vmin; - let perms = BasicMapping { - readable: true, - writable: (pte & PAGE_RW) != 0, - executable: (pte & PAGE_NX) == 0, + + let executable = (pte & PAGE_NX) == 0; + let avl = pte & PTE_AVL_MASK; + let kind = if avl == PAGE_AVL_COW { + MappingKind::Cow(CowMapping { + readable: true, + executable, + }) + } else { + MappingKind::Basic(BasicMapping { + readable: true, + writable: (pte & PAGE_RW) != 0, + executable, + }) }; - Some((virt_addr, phys_addr, perms)) + Some(Mapping { + phys_base: phys_addr, + virt_base: virt_addr, + len: PAGE_SIZE as u64, + kind, + }) }) } @@ -721,7 +755,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -754,7 +788,7 @@ mod tests { phys_base: 0x2000, virt_base: 0x2000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: false, executable: true, @@ -777,7 +811,7 @@ mod tests { phys_base: 0x10000, virt_base: 0x10000, len: 4 * PAGE_SIZE as u64, // 4 pages = 16KB - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -810,7 +844,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -824,7 +858,7 @@ mod tests { phys_base: 0x5000, virt_base: 0x5000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -849,7 +883,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, @@ -860,8 +894,75 @@ mod tests { let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() }; assert!(result.is_some(), "Should find mapped address"); - let pte = result.unwrap(); - assert_eq!(pte.1, 0x1000); + let mapping = result.unwrap(); + assert_eq!(mapping.phys_base, 0x1000); + } + + #[test] + fn test_virt_to_phys_unaligned_virt() { + let ops = MockTableOps::new(); + let mapping = Mapping { + phys_base: 0x1000, + virt_base: 0x1000, + len: PAGE_SIZE as u64, + kind: MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + }; + + unsafe { map(&ops, mapping) }; + + let result = unsafe { virt_to_phys(&ops, 0x1234, 1).next() }; + assert!(result.is_some(), "Should find mapped address"); + let mapping = result.unwrap(); + assert_eq!(mapping.phys_base, 0x1000); + } + + #[test] + fn test_virt_to_phys_perms() { + let test = |kind| { + let ops = MockTableOps::new(); + let mapping = Mapping { + phys_base: 0x1000, + virt_base: 0x1000, + len: PAGE_SIZE as u64, + kind, + }; + unsafe { map(&ops, mapping) }; + let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() }; + let mapping = result.unwrap(); + assert_eq!(mapping.kind, kind); + }; + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: false, + executable: false, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: false, + executable: true, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + })); + test(MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + })); + test(MappingKind::Cow(CowMapping { + readable: true, + executable: false, + })); + test(MappingKind::Cow(CowMapping { + readable: true, + executable: true, + })); } #[test] @@ -880,7 +981,7 @@ mod tests { phys_base: 0x1000, virt_base: 0x1000, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: false, diff --git a/src/hyperlight_common/src/arch/i686/vmem.rs b/src/hyperlight_common/src/arch/i686/vmem.rs index 83c749975..778b557a5 100644 --- a/src/hyperlight_common/src/arch/i686/vmem.rs +++ b/src/hyperlight_common/src/arch/i686/vmem.rs @@ -17,7 +17,7 @@ limitations under the License. // This file is just dummy definitions at the moment, in order to // allow compiling the guest for real mode boot scenarios. -use crate::vmem::{BasicMapping, Mapping, TableOps, TableReadOps, Void}; +use crate::vmem::{Mapping, TableOps, TableReadOps, Void}; pub const PAGE_SIZE: usize = 4096; pub const PAGE_TABLE_SIZE: usize = 4096; @@ -31,10 +31,7 @@ pub unsafe fn map(_op: &Op, _mapping: Mapping) { } #[allow(clippy::missing_safety_doc)] -pub unsafe fn virt_to_phys( - _op: &Op, - _address: u64, -) -> impl Iterator { +pub unsafe fn virt_to_phys(_op: &Op, _address: u64) -> impl Iterator { panic!( "vmem::virt_to_phys: i686 guests do not support booting the full hyperlight guest kernel" ); diff --git a/src/hyperlight_common/src/mem.rs b/src/hyperlight_common/src/mem.rs index 35fa9c183..3c86f8ecf 100644 --- a/src/hyperlight_common/src/mem.rs +++ b/src/hyperlight_common/src/mem.rs @@ -31,7 +31,6 @@ pub struct GuestMemoryRegion { #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct HyperlightPEB { - pub guest_function_dispatch_ptr: u64, pub input_stack: GuestMemoryRegion, pub output_stack: GuestMemoryRegion, pub init_data: GuestMemoryRegion, diff --git a/src/hyperlight_common/src/vmem.rs b/src/hyperlight_common/src/vmem.rs index 6ddf172dd..c206e50f1 100644 --- a/src/hyperlight_common/src/vmem.rs +++ b/src/hyperlight_common/src/vmem.rs @@ -180,16 +180,23 @@ pub trait TableOps: TableReadOps { ); } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct BasicMapping { pub readable: bool, pub writable: bool, pub executable: bool, } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct CowMapping { + pub readable: bool, + pub executable: bool, +} + +#[derive(Debug, PartialEq, Clone, Copy)] pub enum MappingKind { - BasicMapping(BasicMapping), + Basic(BasicMapping), + Cow(CowMapping), /* TODO: What useful things other than basic mappings actually * require touching the tables? */ } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs new file mode 100644 index 000000000..972ecb715 --- /dev/null +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -0,0 +1,70 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! This module contains the architecture-specific dispatch function +//! entry sequence + +// The extern "C" here is a lie until #![feature(abi_custom)] is stabilised +unsafe extern "C" { + /// There are two reasons that we need an architecture-specific + /// assembly stub currently: (1) to ensure that the fact that the + /// dispatch function never returns but can be executed again is + /// OK (i.e. we must ensure that there is no stack frame + /// teardown); (2) at least presently, to ensure that the TLB is + /// flushed in certain cases. + /// + /// # TLB flushing + /// + /// The hyperlight host likes to use one partition and reset it in + /// various ways; if that has happened, there might stale TLB + /// entries hanging around from the former user of the + /// partition. Flushing the TLB here is not quite the right thing + /// to do, since incorrectly cached entries could make even this + /// code not exist, but regrettably there is not a simple way for + /// the host to trigger flushing when it ought to happen, so for + /// now this works in practice, since the text segment is always + /// part of the big identity-mapped region at the base of the + /// guest. The stack, however, is not part of the snapshot region + /// which is (in practice) idmapped for the relevant area, so this + /// cannot touch the stack until the flush is done. + /// + /// Currently this just always flips CR4.PGE back and forth to + /// trigger a tlb flush. We should use a faster approach where + /// available + /// + /// # ABI + /// + /// The ZF should be set if a TLB flush is required on this call + /// (e.g. the first call after a snapshot restore) + /// + /// The stack pointer should, unusually for amd64, by 16-byte + /// aligned at the beginning of the function---no return address + /// should be pushed. + pub(crate) unsafe fn dispatch_function(must_flush_tlb: u64); +} +core::arch::global_asm!(" + .global dispatch_function + dispatch_function: + jnz flush_done + mov rdi, cr4 + xor rdi, 0x80 + mov cr4, rdi + xor rdi, 0x80 + mov cr4, rdi + flush_done: + call {internal_dispatch_function}\n + hlt\n +", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs index 91fad866a..f14d37ea8 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/handle.rs @@ -17,6 +17,9 @@ limitations under the License. use core::fmt::Write; use hyperlight_common::outb::Exception; +use hyperlight_common::vmem::{ + BasicMapping, CowMapping, MappingKind, PAGE_SIZE, PhysAddr, VirtAddr, +}; use hyperlight_guest::exit::write_abort; use hyperlight_guest::layout::{MAIN_STACK_LIMIT_GVA, MAIN_STACK_TOP_GVA}; @@ -57,6 +60,107 @@ pub type ExceptionHandler = fn( page_fault_address: u64, ) -> bool; +fn handle_stack_pagefault(gva: u64) { + // TODO: perhaps we should have a sanity check that the + // stack grows only one page at a time, which should be + // ensured by our stack probing discipline? + unsafe { + let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); + crate::paging::map_region( + new_page, + (gva & !0xfff) as *mut u8, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + ); + // This is mapping an entry for the first time, so we don't + // need to worry about TLB maintenance. We do (probably) need + // to worry about coherence between these stores and the page + // table walker, but this page won't be accessed again until + // after [`iret`], which is a serializing instruction, so + // that's already handled as well. + } +} + +fn handle_cow_pagefault(_phys: PhysAddr, virt: VirtAddr, perms: CowMapping) { + unsafe { + let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); + let target_virt = virt as *mut u8; + core::ptr::copy( + target_virt, + crate::paging::phys_to_virt(new_page).unwrap(), + PAGE_SIZE, + ); + // todo(multithreading): when we have multiple threads, we + // will likely need to (at least in some situations) do a + // break-before-make sequence here to avoid any possible + // issues with incoherent TLBs. + crate::paging::map_region( + new_page, + target_virt, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + // Inherit R bit from the original mapping (always 1 at the moment) + readable: perms.readable, + // If we got here, the original marking was marked + // CoW, so the copied mapping should always be + // writable + writable: true, + executable: perms.executable, + }), + ); + // This is updating an entry that was already valid, changing + // its OA, so we need to actually invalidate the TLB for it. + core::arch::asm!("invlpg [{}]", in(reg) target_virt, options(readonly, nostack, preserves_flags)); + } +} + +fn try_handle_internal_pagefault( + exn_info: *mut ExceptionInfo, + _ctx: *mut Context, + gva: u64, +) -> bool { + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; + let present = (error_code & (1 << 0)) != 0; // bit 0 is P + if !present { + // If the fault was caused by a not-present page, check if we + // should populate it with a stack page + if (MAIN_STACK_LIMIT_GVA..MAIN_STACK_TOP_GVA).contains(&gva) { + handle_stack_pagefault(gva); + return true; + } + return false; + } + let mut orig_mappings = crate::paging::virt_to_phys(gva); + + let fault_was_rsvd_entry = (error_code & (1 << 3)) != 0; + if fault_was_rsvd_entry { + // We don't expect this to ever happen + return false; + } + let access_was_write = (error_code & (1 << 1)) != 0; + let access_was_user = (error_code & (1 << 2)) != 0; + let access_was_insn = (error_code & (1 << 4)) != 0; + if access_was_write && !access_was_user && !access_was_insn { + // The fault was probably caused by a lack of write + // permission. Check if that's because the page needs to be + // CoW'd + if let Some(mapping) = orig_mappings.next() + && let None = orig_mappings.next() + && let MappingKind::Cow(cm) = mapping.kind + { + handle_cow_pagefault(mapping.phys_base, mapping.virt_base, cm); + return true; + } + + return false; + }; + false +} + /// Internal exception handler invoked by the low-level exception entry code. /// /// This function is called from assembly when an exception occurs. It checks for @@ -67,32 +171,14 @@ pub(crate) extern "C" fn hl_exception_handler( exception_number: u64, page_fault_address: u64, ) { - // TODO: is this always needed? surely only needed if CoW - crate::paging::flush_tlb(); - let ctx = stack_pointer as *mut Context; let exn_info = (stack_pointer + size_of::() as u64) as *mut ExceptionInfo; let exception = Exception::try_from(exception_number as u8).expect("Invalid exception number"); - let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; - let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; - - if exception_number == 14 - && (MAIN_STACK_LIMIT_GVA..MAIN_STACK_TOP_GVA).contains(&page_fault_address) - { - // TODO: perhaps we should have a sanity check that the - // stack grows only one page at a time, which should be - // ensured by our stack probing discipline? - unsafe { - let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1); - crate::paging::map_region( - new_page, - (page_fault_address & !0xfff) as *mut u8, - hyperlight_common::vmem::PAGE_SIZE as u64, - ); - return; - } + // Check if it is a page fault that needs to be handled for normal Hyperlight operation + if exception_number == 14 && try_handle_internal_pagefault(exn_info, ctx, page_fault_address) { + return; } // Check for registered user handlers (only for architecture-defined vectors 0-30) @@ -110,6 +196,9 @@ pub(crate) extern "C" fn hl_exception_handler( } } + // Otherwise, abort due to unexpected exception + let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; let bytes_at_rip = unsafe { (saved_rip as *const [u8; 8]).read_volatile() }; // begin abort sequence by writing the error code diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 61e4da900..bc44d8e96 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -110,11 +110,18 @@ unsafe fn init_stack() -> u64 { use hyperlight_guest::layout::MAIN_STACK_TOP_GVA; let stack_top_page_base = (MAIN_STACK_TOP_GVA - 1) & !0xfff; unsafe { + use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; crate::paging::map_region( hyperlight_guest::prim_alloc::alloc_phys_pages(1), stack_top_page_base as *mut u8, - hyperlight_common::vmem::PAGE_SIZE as u64, + PAGE_SIZE as u64, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), ); + crate::paging::barrier::first_valid_same_ctx(); } MAIN_STACK_TOP_GVA } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs index 79bc7cab8..cde8118e3 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs @@ -16,7 +16,7 @@ limitations under the License. use core::mem; -use hyperlight_common::vmem::PAGE_SIZE; +use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; use super::layout::PROC_CONTROL_GVA; @@ -223,7 +223,13 @@ impl ProcCtrl { hyperlight_guest::prim_alloc::alloc_phys_pages(2), ptr, PAGE_SIZE as u64 * 2, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: false, + }), ); + crate::paging::barrier::first_valid_same_ctx(); let ptr = ptr as *mut Self; (&raw mut (*ptr).gdt).write_bytes(0u8, 1); (&raw mut (*ptr).tss).write_bytes(0u8, 1); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/mod.rs b/src/hyperlight_guest_bin/src/arch/amd64/mod.rs index 9ba50286d..acaf7e40d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/mod.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/mod.rs @@ -15,6 +15,7 @@ limitations under the License. */ pub(crate) mod context; +pub(crate) mod dispatch; pub mod exception; mod init; mod layout; diff --git a/src/hyperlight_guest_bin/src/guest_function/call.rs b/src/hyperlight_guest_bin/src/guest_function/call.rs index c460b06c6..bfeec104e 100644 --- a/src/hyperlight_guest_bin/src/guest_function/call.rs +++ b/src/hyperlight_guest_bin/src/guest_function/call.rs @@ -22,7 +22,6 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, Functi use hyperlight_common::flatbuffer_wrappers::function_types::{FunctionCallResult, ParameterType}; use hyperlight_common::flatbuffer_wrappers::guest_error::{ErrorCode, GuestError}; use hyperlight_guest::error::{HyperlightGuestError, Result}; -use hyperlight_guest::exit::halt; use tracing::{Span, instrument}; use crate::{GUEST_HANDLE, REGISTERED_GUEST_FUNCTIONS}; @@ -79,14 +78,8 @@ pub(crate) fn call_guest_function(function_call: FunctionCall) -> Result } } -// This function is marked as no_mangle/inline to prevent the compiler from inlining it , if its inlined the epilogue will not be called -// and we will leak memory as the epilogue will not be called as halt() is not going to return. -// -// This function may panic, as we have no other ways of dealing with errors at this level -#[unsafe(no_mangle)] -#[inline(never)] #[instrument(skip_all, parent = Span::current(), level= "Trace")] -fn internal_dispatch_function() { +pub(crate) fn internal_dispatch_function() { let handle = unsafe { GUEST_HANDLE }; #[cfg(debug_assertions)] @@ -115,34 +108,3 @@ fn internal_dispatch_function() { } } } - -// This is implemented as a separate function to make sure that epilogue in the internal_dispatch_function is called before the halt() -// which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return -// when running in the hypervisor. -#[instrument(skip_all, parent = Span::current(), level= "Trace")] -pub(crate) extern "C" fn dispatch_function() { - // The hyperlight host likes to use one partition and reset it in - // various ways; if that has happened, there might stale TLB - // entries hanging around from the former user of the - // partition. Flushing the TLB here is not quite the right thing - // to do, since incorrectly cached entries could make even this - // code not exist, but regrettably there is not a simple way for - // the host to trigger flushing when it ought to happen, so for - // now this works in practice, since the text segment is always - // part of the big identity-mapped region at the base of the - // guest. - crate::paging::flush_tlb(); - - // Read the current TSC to report it to the host with the spans/events - // This helps calculating the timestamps relative to the guest call - #[cfg(feature = "trace_guest")] - { - let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc(); - // Reset the trace state for the new guest function call with the new start TSC - // This clears any existing spans/events from previous calls ensuring a clean state - hyperlight_guest_tracing::new_call(guest_start_tsc); - } - - internal_dispatch_function(); - halt(); -} diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index 69ffc37af..3bcce40db 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -20,8 +20,8 @@ extern crate alloc; use core::fmt::Write; +use arch::dispatch::dispatch_function; use buddy_system_allocator::LockedHeap; -use guest_function::call::dispatch_function; use guest_function::register::GuestFunctionRegister; use guest_logger::init_logger; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; @@ -178,8 +178,13 @@ unsafe extern "C" { /// Architecture-nonspecific initialisation: set up the heap, /// coordinate some addresses and configuration with the host, and run /// user initialisation -pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max_log_level: u64) { - let peb_ptr = unsafe { +pub(crate) extern "C" fn generic_init( + peb_address: u64, + seed: u64, + ops: u64, + max_log_level: u64, +) -> u64 { + unsafe { GUEST_HANDLE = GuestHandle::init(peb_address as *mut HyperlightPEB); #[allow(static_mut_refs)] let peb_ptr = GUEST_HANDLE.peb().unwrap(); @@ -202,8 +207,6 @@ pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc(); unsafe { - (*peb_ptr).guest_function_dispatch_ptr = dispatch_function as usize as u64; - let srand_seed = (((peb_address << 8) ^ (seed >> 4)) >> 32) as u32; // Set the seed for the random number generator for C code using rand; srand(srand_seed); @@ -231,6 +234,8 @@ pub(crate) extern "C" fn generic_init(peb_address: u64, seed: u64, ops: u64, max unsafe { hyperlight_main(); } + + dispatch_function as usize as u64 } #[cfg(feature = "macros")] diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index 15721a825..f9a785ffa 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -29,31 +29,27 @@ use hyperlight_guest::prim_alloc::alloc_phys_pages; #[derive(Copy, Clone)] struct GuestMappingOperations { - snapshot_pt_base_gpa: u64, - snapshot_pt_base_gva: u64, scratch_base_gpa: u64, scratch_base_gva: u64, } impl GuestMappingOperations { fn new() -> Self { Self { - snapshot_pt_base_gpa: unsafe { - hyperlight_guest::layout::snapshot_pt_gpa_base_gva().read_volatile() - }, - snapshot_pt_base_gva: hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64, scratch_base_gpa: hyperlight_guest::layout::scratch_base_gpa(), scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(), } } - fn phys_to_virt(&self, addr: u64) -> *mut u8 { + fn fallible_phys_to_virt(&self, addr: u64) -> Option<*mut u8> { if addr >= self.scratch_base_gpa { - (self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8 - } else if addr >= self.snapshot_pt_base_gpa { - (self.snapshot_pt_base_gva + (addr - self.snapshot_pt_base_gpa)) as *mut u8 + Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8) } else { - panic!("phys_to_virt encountered snapshot non-PT page") + None } } + fn phys_to_virt(&self, addr: u64) -> *mut u8 { + self.fallible_phys_to_virt(addr) + .expect("phys_to_virt encountered snapshot non-PT page") + } } // for virt_to_phys impl core::convert::AsRef for GuestMappingOperations { @@ -90,6 +86,11 @@ impl vmem::TableReadOps for GuestMappingOperations { } impl vmem::TableOps for GuestMappingOperations { + // Currently, we don't actually move tables anywhere on amd64 + // because of issues with guest PTs in IPAs that are mapped + // readonly in Stage 2 translation. However, this code all works + // and will re-enabled as soon as there is improved + // architecture/hypervisor support. type TableMovability = vmem::MayMoveTable; unsafe fn alloc_table(&self) -> u64 { let page_addr = unsafe { alloc_phys_pages(1) }; @@ -100,26 +101,11 @@ impl vmem::TableOps for GuestMappingOperations { page_addr } unsafe fn write_entry(&self, addr: u64, entry: u64) -> Option { - let mut addr = addr; - let mut ret = None; - if addr >= self.snapshot_pt_base_gpa && addr < self.scratch_base_gpa { - // This needs to be CoW'd over to the scratch region - unsafe { - let new_table = alloc_phys_pages(1); - core::ptr::copy( - self.phys_to_virt(addr & !0xfff), - self.phys_to_virt(new_table), - vmem::PAGE_TABLE_SIZE, - ); - addr = new_table | (addr & 0xfff); - ret = Some(new_table); - } - } let addr = self.phys_to_virt(addr); unsafe { asm!("mov qword ptr [{}], {}", in(reg) addr, in(reg) entry); } - ret + None } unsafe fn update_root(&self, new_root: u64) { unsafe { @@ -137,7 +123,7 @@ impl vmem::TableOps for GuestMappingOperations { /// as such do not use concurrently with any other page table operations /// - TLB invalidation is not performed, /// if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards. -pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { +pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64, kind: vmem::MappingKind) { unsafe { vmem::map( &GuestMappingOperations::new(), @@ -145,37 +131,71 @@ pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { phys_base, virt_base: virt_base as u64, len, - kind: vmem::MappingKind::BasicMapping(vmem::BasicMapping { - readable: true, - writable: true, - executable: true, - }), + kind, }, ); } } -pub fn virt_to_phys( - gva: u64, -) -> impl Iterator { +pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator { unsafe { vmem::virt_to_phys::<_>(GuestMappingOperations::new(), gva, 1) } } -pub fn flush_tlb() { - // Currently this just always flips CR4.PGE back and forth to - // trigger a tlb flush. We should use a faster approach where - // available - let mut orig_cr4: u64; - unsafe { - asm!("mov {}, cr4", out(reg) orig_cr4); - } - let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE - unsafe { - asm!( - "mov cr4, {}", - "mov cr4, {}", - in(reg) tmp_cr4, - in(reg) orig_cr4 - ); +pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> { + GuestMappingOperations::new().fallible_phys_to_virt(gpa) +} + +/// Barriers that other code may need to use when updating page tables +pub mod barrier { + /// Call this function when a virtual address has just been made + /// valid for the first time after the last tlb invalidate that + /// affected it, and it will be used for the first time in the + /// same execution context as has made the modification. + /// + /// On most architectures, TLBs will not cache invalid entries, so + /// this does not need to issue a TLB. However, it does need to + /// ensure coherency between the previous writes and any future + /// uses by a page table walker. + /// + /// # Architecture-specific (amd64) notes + /// + /// The exact details around page walk coherency on amd64 seem a + /// bit fuzzy. The Intel manual notes that a serialising + /// instruction is necessary specifically to synchronise table + /// walks performed during instruction fetch [1], but is + /// relatively quiet about other page walks. The AMD manual notes + /// [2] that "a table entry is allowed to be upgraded (by marking + /// it as present, or by removing its write, execute or supervisor + /// restrictions) without explicitly maintaining TLB coherency", + /// but only states that TLB any upper-level TLB cache entries + /// will be flushed before re-walking to confirm the fault, which + /// does not clearly seem strong enough. + /// + /// In some limited testing, `mfence` typically seems to be + /// enough, but as it is not a serializing instruction on Intel + /// platforms, we assume it may not be quite good enough. `cpuid` + /// is likely to be very slow, since we are definitely running + /// under a hypervisor (and often even nested). Currently, for + /// simplicity's sake, this just copies cr0 to itself, but other + /// options (including the `serialize` instruction where + /// available) could be worth exploring. + /// + /// [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3: System Programming Guide + /// Chapter 5: Paging + /// §5.10: Caching Translation Information + /// §5.10.4: Invalidation of TLBs and Paging-Structure Caches + /// §5.10.4.3: Optional Invalidation + /// [2] AMD64 Architecture Programmer's Manual, Volume 2: System Programming + /// Section 5: Page Translation and Protection + /// §5.5: Translation-Lookaside Buffer + /// §5.5.3: TLB Management + #[inline(always)] + pub fn first_valid_same_ctx() { + unsafe { + core::arch::asm!(" + mov rax, cr0 + mov cr0, rax + ", out("rax") _, out("rcx") _); + } } } diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index c9ee5cdfc..592d6042d 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -62,11 +62,13 @@ impl SandboxSize { Self::Medium => { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(MEDIUM_HEAP_SIZE); + cfg.set_scratch_size(0x50000); Some(cfg) } Self::Large => { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(LARGE_HEAP_SIZE); + cfg.set_scratch_size(0x100000); Some(cfg) } } @@ -386,6 +388,7 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) { let mut config = SandboxConfiguration::default(); config.set_input_data_size(2 * SIZE + (1024 * 1024)); // 2 * SIZE + 1 MB, to allow 1MB for the rest of the serialized function call config.set_heap_size(SIZE as u64 * 15); + config.set_scratch_size(6 * SIZE + 4 * (1024 * 1024)); // Big enough for the IO data regions and enough of the heap to be used let sandbox = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), @@ -397,7 +400,7 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) { b.iter(|| { sandbox .call::<()>("LargeParameters", (large_vec.clone(), large_string.clone())) - .unwrap() + .unwrap(); }); }); diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 1bca9e944..bb37c31db 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -160,6 +160,11 @@ pub enum HyperlightError { #[error("Memory requested {0} exceeds maximum size allowed {1}")] MemoryRequestTooBig(usize, usize), + /// The memory request is too small to contain everything that is + /// required + #[error("Memory requested {0} exceeds maximum size allowed {1}")] + MemoryRequestTooSmall(usize, usize), + /// Metric Not Found. #[error("Metric Not Found {0:?}.")] MetricNotFound(&'static str), @@ -368,6 +373,7 @@ impl HyperlightError { | HyperlightError::MemoryAllocationFailed(_) | HyperlightError::MemoryProtectionFailed(_) | HyperlightError::MemoryRequestTooBig(_, _) + | HyperlightError::MemoryRequestTooSmall(_, _) | HyperlightError::MetricNotFound(_) | HyperlightError::MmapFailed(_) | HyperlightError::MprotectFailed(_) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index f8285d425..91a91acdf 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -68,6 +68,7 @@ use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; use crate::sandbox::outb::{HandleOutbError, handle_outb}; +use crate::sandbox::snapshot::NextAction; #[cfg(feature = "mem_profile")] use crate::sandbox::trace::MemTraceInfo; #[cfg(crashdump)] @@ -85,7 +86,7 @@ pub(crate) struct HyperlightVm { #[cfg(not(gdb))] vm: Box, page_size: usize, - entrypoint: Option, // only present if this vm has not yet been initialised + entrypoint: NextAction, // only present if this vm has not yet been initialised rsp_gva: u64, interrupt_handle: Arc, @@ -103,6 +104,8 @@ pub(crate) struct HyperlightVm { mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) + pending_tlb_flush: bool, + #[cfg(gdb)] gdb_conn: Option>, #[cfg(gdb)] @@ -120,6 +123,8 @@ pub enum DispatchGuestCallError { Run(#[from] RunVmError), #[error("Failed to setup registers: {0}")] SetupRegs(RegisterError), + #[error("VM was uninitialized")] + Uninitialized, } impl DispatchGuestCallError { @@ -129,7 +134,7 @@ impl DispatchGuestCallError { // These errors poison the sandbox because they can leave it in an inconsistent state // by returning before the guest can unwind properly DispatchGuestCallError::Run(_) => true, - DispatchGuestCallError::SetupRegs(_) => false, + DispatchGuestCallError::SetupRegs(_) | DispatchGuestCallError::Uninitialized => false, } } @@ -338,7 +343,7 @@ impl HyperlightVm { snapshot_mem: GuestSharedMemory, scratch_mem: GuestSharedMemory, _pml4_addr: u64, - entrypoint: Option, + entrypoint: NextAction, rsp_gva: u64, #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, @@ -418,6 +423,8 @@ impl HyperlightVm { mmap_regions: Vec::new(), + pending_tlb_flush: false, + #[cfg(gdb)] gdb_conn, #[cfg(gdb)] @@ -438,9 +445,9 @@ impl HyperlightVm { ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; // Add breakpoint to the entry point address, if we are going to initialise ret.vm.set_debug(true).map_err(VmError::Debug)?; - if let Some(entrypoint) = entrypoint { + if let NextAction::Initialise(initialise) = entrypoint { ret.vm - .add_hw_breakpoint(entrypoint) + .add_hw_breakpoint(initialise) .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; } } @@ -462,7 +469,7 @@ impl HyperlightVm { guest_max_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> std::result::Result<(), InitializeError> { - let Some(entrypoint) = self.entrypoint else { + let NextAction::Initialise(initialise) = self.entrypoint else { return Ok(()); }; @@ -474,7 +481,7 @@ impl HyperlightVm { }; let regs = CommonRegisters { - rip: entrypoint, + rip: initialise, // We usually keep the top of the stack 16-byte // aligned. However, the ABI requirement is that the stack // be aligned _before a call instruction_, which means @@ -508,6 +515,7 @@ impl HyperlightVm { return Err(InitializeError::InvalidStackPointer(regs.rsp)); } self.rsp_gva = regs.rsp; + self.entrypoint = NextAction::Call(regs.rax); Ok(()) } @@ -631,6 +639,16 @@ impl HyperlightVm { self.rsp_gva = gva; } + /// Get the current entrypoint action + pub(crate) fn get_entrypoint(&mut self) -> NextAction { + self.entrypoint + } + + /// Set the current entrypoint action + pub(crate) fn set_entrypoint(&mut self, entrypoint: NextAction) { + self.entrypoint = entrypoint + } + /// Dispatch a call from the host to the guest using the given pointer /// to the dispatch function _in the guest's address space_. /// @@ -641,22 +659,32 @@ impl HyperlightVm { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn dispatch_call_from_host( &mut self, - dispatch_func_addr: RawPtr, mem_mgr: &mut SandboxMemoryManager, host_funcs: &Arc>, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> std::result::Result<(), DispatchGuestCallError> { + let NextAction::Call(dispatch_func_addr) = self.entrypoint else { + return Err(DispatchGuestCallError::Uninitialized); + }; + let mut rflags = 1 << 1; // RFLAGS.1 is RES1 + if self.pending_tlb_flush { + rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes + self.pending_tlb_flush = false; + } // set RIP and RSP, reset others let regs = CommonRegisters { - rip: dispatch_func_addr.into(), + rip: dispatch_func_addr, // We usually keep the top of the stack 16-byte - // aligned. However, the ABI requirement is that the stack - // be aligned _before a call instruction_, which means - // that the stack needs to actually be ≡ 8 mod 16 at the - // first instruction (since, on x64, a call instruction - // automatically pushes a return address). - rsp: self.rsp_gva - 8, - rflags: 1 << 1, + // aligned. Since the usual ABI requirement is that the + // stack be aligned _before a call instruction_, one might + // expect that the stack pointer here needs to actually be + // ≡ 8 mod 16 at the first instruction (since, on x64, a + // call instruction automatically pushes a return + // address). However, the x64 entry stub in + // hyperlight_guest::arch::dispatch handles this itself, + // so we do use the aligned address here. + rsp: self.rsp_gva, + rflags, ..Default::default() }; self.vm @@ -755,13 +783,13 @@ impl HyperlightVm { match exit_reason { #[cfg(gdb)] Ok(VmExit::Debug { dr6, exception }) => { + let initialise = match self.entrypoint { + NextAction::Initialise(initialise) => initialise, + _ => 0, + }; // Handle debug event (breakpoints) - let stop_reason = arch::vcpu_stop_reason( - self.vm.as_mut(), - dr6, - self.entrypoint.unwrap_or(0), - exception, - )?; + let stop_reason = + arch::vcpu_stop_reason(self.vm.as_mut(), dr6, initialise, exception)?; if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { break Err(e.into()); } @@ -912,7 +940,7 @@ impl HyperlightVm { /// - Special registers (restored from snapshot, with CR3 updated to new page table location) // TODO: check if other state needs to be reset pub(crate) fn reset_vcpu( - &self, + &mut self, cr3: u64, sregs: &CommonSpecialRegisters, ) -> std::result::Result<(), RegisterError> { @@ -929,6 +957,7 @@ impl HyperlightVm { // to point to the new (relocated) page tables let mut sregs = *sregs; sregs.cr3 = cr3; + self.pending_tlb_flush = true; self.vm.set_sregs(&sregs)?; } #[cfg(not(feature = "init-paging"))] @@ -1133,6 +1162,11 @@ impl HyperlightVm { .and_then(|name| name.to_os_string().into_string().ok()) }); + let initialise = match self.entrypoint { + NextAction::Initialise(initialise) => initialise, + _ => 0, + }; + // Include dynamically mapped regions // TODO: include the snapshot and scratch regions let regions: Vec = self.get_mapped_regions().cloned().collect(); @@ -1140,7 +1174,7 @@ impl HyperlightVm { regions, regs, xsave.to_vec(), - self.entrypoint.unwrap_or(0), + initialise, self.rt_cfg.binary_path.clone(), filename, ))) @@ -1471,7 +1505,6 @@ mod tests { use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; use crate::mem::ptr::RawPtr; - use crate::mem::ptr_offset::Offset; use crate::mem::shared_mem::ExclusiveSharedMemory; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; @@ -2020,11 +2053,10 @@ mod tests { #[cfg(any(crashdump, gdb))] let rt_cfg: SandboxRuntimeConfig = Default::default(); - let mut layout = - SandboxMemoryLayout::new(config, code.len(), 4096, 4096, 0x3000, None).unwrap(); + let mut layout = SandboxMemoryLayout::new(config, code.len(), 4096, None).unwrap(); - let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + layout.get_pt_offset(); - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_base_gpa = layout.get_pt_base_gpa(); + let pt_buf = GuestPageTableBuffer::new(pt_base_gpa as usize); for rgn in layout .get_memory_regions_::(()) @@ -2032,14 +2064,13 @@ mod tests { .iter() { let readable = rgn.flags.contains(MemoryRegionFlags::READ); - let writable = rgn.flags.contains(MemoryRegionFlags::WRITE) - || rgn.flags.contains(MemoryRegionFlags::STACK_GUARD); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); let mapping = Mapping { phys_base: rgn.guest_region.start as u64, virt_base: rgn.guest_region.start as u64, len: rgn.guest_region.len() as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable, writable, executable, @@ -2048,22 +2079,6 @@ mod tests { unsafe { vmem::map(&pt_buf, mapping) }; } - let mut pt_size_mapped = 0; - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_base_gpa + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable: true, - writable: true, - executable: false, - }), - }; - unsafe { vmem::map(&pt_buf, mapping) }; - pt_size_mapped = pt_buf.size(); - } - // Map the scratch region at the top of the address space let scratch_size = config.get_scratch_size(); let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); @@ -2072,7 +2087,7 @@ mod tests { phys_base: scratch_gpa, virt_base: scratch_gva, len: scratch_size as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: true, // Match regular codepath (map_specials) @@ -2080,40 +2095,22 @@ mod tests { }; unsafe { vmem::map(&pt_buf, scratch_mapping) }; - // Re-map page tables if they grew from scratch mapping - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_base_gpa + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable: true, - writable: true, - executable: false, - }), - }; - unsafe { vmem::map(&pt_buf, mapping) }; - pt_size_mapped = pt_buf.size(); - } - let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len()).unwrap(); let mem_size = layout.get_memory_size().unwrap(); let mut eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); - eshm.copy_from_slice(&pt_bytes, layout.get_pt_offset()) - .unwrap(); + let snapshot_pt_start = mem_size - layout.get_pt_size(); + eshm.copy_from_slice(&pt_bytes, snapshot_pt_start).unwrap(); eshm.copy_from_slice(code, layout.get_guest_code_offset()) .unwrap(); - let load_addr = RawPtr::from(layout.get_guest_code_address() as u64); let scratch_mem = ExclusiveSharedMemory::new(config.get_scratch_size()).unwrap(); let mut mem_mgr = SandboxMemoryManager::new( layout, eshm, scratch_mem, - load_addr, - Some(Offset::from(0u64)), + NextAction::Initialise(layout.get_guest_code_address() as u64), ); mem_mgr.write_memory_layout().unwrap(); @@ -2176,7 +2173,7 @@ mod tests { fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes const CODE: [u8; 2] = [0x50, 0xf4]; - let hyperlight_vm = hyperlight_vm(&CODE); + let mut hyperlight_vm = hyperlight_vm(&CODE); let available_hv = *get_available_hypervisor().as_ref().unwrap(); // Get the initial CR3 value before dirtying sregs @@ -2339,7 +2336,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // After run, check registers match expected dirty state let regs = hyperlight_vm.vm.regs().unwrap(); @@ -2441,7 +2438,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // After run, check FPU state matches expected dirty values let fpu = hyperlight_vm.vm.fpu().unwrap(); @@ -2526,7 +2523,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // Verify debug registers are dirty let debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); @@ -2572,7 +2569,7 @@ mod tests { a.hlt().unwrap(); let code = a.assemble(0).unwrap(); - let hyperlight_vm = hyperlight_vm(&code); + let mut hyperlight_vm = hyperlight_vm(&code); // Get the initial CR3 value and expected defaults let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; @@ -2632,8 +2629,11 @@ mod tests { // Re-run from entrypoint (flag=1 means guest skips dirty phase, just does FXSAVE) // Use stack_top - 8 to match initialise()'s behavior (simulates call pushing return addr) + let NextAction::Call(rip) = ctx.ctx.vm.entrypoint else { + panic!("entrypoint should be call"); + }; let regs = CommonRegisters { - rip: ctx.ctx.vm.entrypoint.expect("entrypoint should be set"), + rip, rsp: ctx.stack_top_gva() - 8, rflags: 1 << 1, ..Default::default() @@ -2715,7 +2715,7 @@ mod tests { let mut fxsave = [0u8; 512]; self.ctx .hshm - .shared_mem + .scratch_mem .copy_to_slice(&mut fxsave, self.fxsave_offset) .unwrap(); fxsave @@ -2738,9 +2738,9 @@ mod tests { // These are in the output_data region which starts at a known offset. // We use a default SandboxConfiguration to get the same layout as create_test_vm_context. let config: SandboxConfiguration = Default::default(); - let layout = SandboxMemoryLayout::new(config, 512, 4096, 4096, 0x3000, None).unwrap(); - let fxsave_offset = layout.get_output_data_offset(); - let fxsave_gva = (SandboxMemoryLayout::BASE_ADDRESS + fxsave_offset) as u64; + let layout = SandboxMemoryLayout::new(config, 512, 4096, None).unwrap(); + let fxsave_offset = layout.get_output_data_buffer_scratch_host_offset(); + let fxsave_gva = layout.get_output_data_buffer_gva(); let flag_gva = fxsave_gva + 512; let mut a = CodeAssembler::new(64).unwrap(); @@ -2791,6 +2791,9 @@ mod tests { a.mov(rax, fxsave_gva).unwrap(); a.fxsave(ptr(rax)).unwrap(); + // Return dispatch ptr + a.mov(rax, layout.get_guest_code_address() as u64).unwrap(); + a.hlt().unwrap(); let code = a.assemble(0).unwrap(); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 74d7834ee..b681acae3 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -359,9 +359,12 @@ impl VirtualMachine for MshvVm { #[cfg(gdb)] impl DebuggableVm for MshvVm { fn translate_gva(&self, gva: u64) -> std::result::Result { - use mshv_bindings::{HV_TRANSLATE_GVA_VALIDATE_READ, HV_TRANSLATE_GVA_VALIDATE_WRITE}; + use mshv_bindings::HV_TRANSLATE_GVA_VALIDATE_READ; - let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; + // Do not use HV_TRANSLATE_GVA_VALIDATE_WRITE, since many + // things that are interesting to debug are not in fact + // writable from the guest's point of view. + let flags = HV_TRANSLATE_GVA_VALIDATE_READ as u64; let (addr, _) = self .vcpu_fd .translate_gva(gva, flags) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 94f415b7e..c22e97ba3 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -156,7 +156,6 @@ impl VirtualMachine for WhpVm { MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), - MemoryRegionFlags::STACK_GUARD => Ok(WHvMapGpaRangeFlagNone), _ => Err(MapMemoryError::InvalidFlags(format!( "Invalid memory region flag: {:?}", flag diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 925067c21..cceb23f2f 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -27,10 +27,6 @@ limitations under the License. //! +-------------------------------------------+ //! | Guest Heap | //! +-------------------------------------------+ -//! | Output Data | -//! +-------------------------------------------+ -//! | Input Data | -//! +-------------------------------------------+ //! | PEB Struct | (HyperlightPEB size) //! +-------------------------------------------+ //! | Guest Code | @@ -46,17 +42,23 @@ limitations under the License. //! - `InitData` - some extra data that can be loaded onto the sandbox during //! initialization. //! -//! - `HostDefinitions` - the length of this is the `HostFunctionDefinitionSize` -//! field from `SandboxConfiguration` -//! -//! - `InputData` - this is a buffer that is used for input data to the host program. -//! the length of this field is `InputDataSize` from `SandboxConfiguration` -//! -//! - `OutputData` - this is a buffer that is used for output data from host program. -//! the length of this field is `OutputDataSize` from `SandboxConfiguration` -//! //! - `GuestHeap` - this is a buffer that is used for heap data in the guest. the length //! of this field is returned by the `heap_size()` method of this struct +//! +//! There is also a scratch region at the top of physical memory, +//! which is mostly laid out as a large undifferentiated blob of +//! memory, although at present the snapshot process specially +//! privileges the statically allocated input and output data regions: +//! +//! +-------------------------------------------+ (top of physical memory) +//! | Exception Stack, Metadata | +//! +-------------------------------------------+ (1 page below) +//! | Scratch Memory | +//! +-------------------------------------------+ +//! | Output Data | +//! +-------------------------------------------+ +//! | Input Data | +//! +-------------------------------------------+ (scratch size) use std::fmt::Debug; use std::mem::{offset_of, size_of}; @@ -64,15 +66,15 @@ use std::mem::{offset_of, size_of}; use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB, PAGE_SIZE_USIZE}; use tracing::{Span, instrument}; -#[cfg(feature = "init-paging")] -use super::memory_region::MemoryRegionType::PageTables; -use super::memory_region::MemoryRegionType::{Code, Heap, InitData, InputData, OutputData, Peb}; +use super::memory_region::MemoryRegionType::{Code, Heap, InitData, Peb}; use super::memory_region::{ DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegion_, MemoryRegionFlags, MemoryRegionKind, MemoryRegionVecBuilder, }; use super::shared_mem::{ExclusiveSharedMemory, SharedMemory}; -use crate::error::HyperlightError::{GuestOffsetIsInvalid, MemoryRequestTooBig}; +use crate::error::HyperlightError::{ + GuestOffsetIsInvalid, MemoryRequestTooBig, MemoryRequestTooSmall, +}; use crate::sandbox::SandboxConfiguration; use crate::{Result, new_error}; @@ -86,19 +88,13 @@ pub(crate) struct SandboxMemoryLayout { /// The following fields are offsets to the actual PEB struct fields. /// They are used when writing the PEB struct itself peb_offset: usize, - peb_guest_dispatch_function_ptr_offset: usize, // set by guest in guest entrypoint peb_input_data_offset: usize, peb_output_data_offset: usize, peb_init_data_offset: usize, peb_heap_data_offset: usize, - // The following are the actual values - // that are written to the PEB struct - pub(super) input_data_buffer_offset: usize, - pub(super) output_data_buffer_offset: usize, guest_heap_buffer_offset: usize, init_data_offset: usize, - pt_offset: usize, pt_size: Option, // other @@ -129,10 +125,6 @@ impl Debug for SandboxMemoryLayout { .field("PEB Address", &format_args!("{:#x}", self.peb_address)) .field("PEB Offset", &format_args!("{:#x}", self.peb_offset)) .field("Code Size", &format_args!("{:#x}", self.code_size)) - .field( - "Guest Dispatch Function Pointer Offset", - &format_args!("{:#x}", self.peb_guest_dispatch_function_ptr_offset), - ) .field( "Input Data Offset", &format_args!("{:#x}", self.peb_input_data_offset), @@ -149,14 +141,6 @@ impl Debug for SandboxMemoryLayout { "Guest Heap Offset", &format_args!("{:#x}", self.peb_heap_data_offset), ) - .field( - "Input Data Buffer Offset", - &format_args!("{:#x}", self.input_data_buffer_offset), - ) - .field( - "Output Data Buffer Offset", - &format_args!("{:#x}", self.output_data_buffer_offset), - ) .field( "Guest Heap Buffer Offset", &format_args!("{:#x}", self.guest_heap_buffer_offset), @@ -165,7 +149,6 @@ impl Debug for SandboxMemoryLayout { "Init Data Offset", &format_args!("{:#x}", self.init_data_offset), ) - .field("PT Offset", &format_args!("{:#x}", self.pt_offset)) .field("PT Size", &format_args!("{:#x}", self.pt_size.unwrap_or(0))) .field( "Guest Code Offset", @@ -181,9 +164,14 @@ impl Debug for SandboxMemoryLayout { impl SandboxMemoryLayout { /// The maximum amount of memory a single sandbox will be allowed. - /// The addressable virtual memory with current paging setup is virtual address 0x0 - 0x40000000 (excl.), - /// However, the memory up to Self::BASE_ADDRESS is not used. - const MAX_MEMORY_SIZE: usize = 0x40000000 - Self::BASE_ADDRESS; + /// + /// Currently, both the scratch region and the snapshot region are + /// bounded by this size. The current value is essentially + /// arbitrary and chosen for historical reasons; the modern + /// sandbox virtual memory layout can support much more, so this + /// could be increased should use cases for larger sandboxes + /// arise. + const MAX_MEMORY_SIZE: usize = 0x4000_0000 - Self::BASE_ADDRESS; /// The base address of the sandbox's memory. #[cfg(feature = "init-paging")] @@ -192,7 +180,7 @@ impl SandboxMemoryLayout { pub(crate) const BASE_ADDRESS: usize = 0x0; // the offset into a sandbox's input/output buffer where the stack starts - const STACK_POINTER_SIZE_BYTES: u64 = 8; + pub(crate) const STACK_POINTER_SIZE_BYTES: u64 = 8; /// Create a new `SandboxMemoryLayout` with the given /// `SandboxConfiguration`, code size and stack/heap size. @@ -200,16 +188,25 @@ impl SandboxMemoryLayout { pub(crate) fn new( cfg: SandboxConfiguration, code_size: usize, - heap_size: usize, - scratch_size: usize, init_data_size: usize, init_data_permissions: Option, ) -> Result { + let heap_size = usize::try_from(cfg.get_heap_size())?; + let scratch_size = cfg.get_scratch_size(); + if scratch_size > Self::MAX_MEMORY_SIZE { + return Err(MemoryRequestTooBig(scratch_size, Self::MAX_MEMORY_SIZE)); + } + let min_scratch_size = hyperlight_common::layout::min_scratch_size( + cfg.get_input_data_size(), + cfg.get_output_data_size(), + ); + if scratch_size < min_scratch_size { + return Err(MemoryRequestTooSmall(scratch_size, min_scratch_size)); + } + let guest_code_offset = 0; // The following offsets are to the fields of the PEB struct itself! let peb_offset = code_size.next_multiple_of(PAGE_SIZE_USIZE); - let peb_guest_dispatch_function_ptr_offset = - peb_offset + offset_of!(HyperlightPEB, guest_function_dispatch_ptr); let peb_input_data_offset = peb_offset + offset_of!(HyperlightPEB, input_stack); let peb_output_data_offset = peb_offset + offset_of!(HyperlightPEB, output_stack); let peb_init_data_offset = peb_offset + offset_of!(HyperlightPEB, init_data); @@ -218,38 +215,29 @@ impl SandboxMemoryLayout { // The following offsets are the actual values that relate to memory layout, // which are written to PEB struct let peb_address = Self::BASE_ADDRESS + peb_offset; - // make sure input data buffer starts at 4K boundary - let input_data_buffer_offset = (peb_heap_data_offset + size_of::()) - .next_multiple_of(PAGE_SIZE_USIZE); - let output_data_buffer_offset = (input_data_buffer_offset + cfg.get_input_data_size()) - .next_multiple_of(PAGE_SIZE_USIZE); // make sure heap buffer starts at 4K boundary - let guest_heap_buffer_offset = (output_data_buffer_offset + cfg.get_output_data_size()) + let guest_heap_buffer_offset = (peb_heap_data_offset + size_of::()) .next_multiple_of(PAGE_SIZE_USIZE); + // make sure init data starts at 4K boundary let init_data_offset = (guest_heap_buffer_offset + heap_size).next_multiple_of(PAGE_SIZE_USIZE); - let pt_offset = (init_data_offset + init_data_size).next_multiple_of(PAGE_SIZE_USIZE); Ok(Self { peb_offset, heap_size, - peb_guest_dispatch_function_ptr_offset, peb_input_data_offset, peb_output_data_offset, peb_init_data_offset, peb_heap_data_offset, sandbox_memory_config: cfg, code_size, - input_data_buffer_offset, - output_data_buffer_offset, guest_heap_buffer_offset, peb_address, guest_code_offset, init_data_offset, init_data_size, init_data_permissions, - pt_offset, pt_size: None, scratch_size, }) @@ -290,11 +278,18 @@ impl SandboxMemoryLayout { self.get_init_data_size_offset() + size_of::() } - /// Get the offset in guest memory to the start of output data. + /// Get the guest virtual address of the start of output data. + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_output_data_buffer_gva(&self) -> u64 { + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + + self.sandbox_memory_config.get_input_data_size() as u64 + } + + /// Get the offset into the host scratch buffer of the start of + /// the output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] - #[cfg(test)] - pub(crate) fn get_output_data_offset(&self) -> usize { - self.output_data_buffer_offset + pub(crate) fn get_output_data_buffer_scratch_host_offset(&self) -> usize { + self.sandbox_memory_config.get_input_data_size() } /// Get the offset in guest memory to the input data size. @@ -312,11 +307,41 @@ impl SandboxMemoryLayout { self.get_input_data_size_offset() + size_of::() } - /// Get the offset in guest memory to where the guest dispatch function - /// pointer is written + /// Get the guest virtual address of the start of input data + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + fn get_input_data_buffer_gva(&self) -> u64 { + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + } + + /// Get the offset into the host scratch buffer of the start of + /// the input data #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_dispatch_function_pointer_offset(&self) -> usize { - self.peb_guest_dispatch_function_ptr_offset + pub(crate) fn get_input_data_buffer_scratch_host_offset(&self) -> usize { + 0 + } + + /// Get the offset from the beginning of the scratch region to the + /// location where page tables will be eagerly copied on restore + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_pt_base_scratch_offset(&self) -> usize { + (self.sandbox_memory_config.get_input_data_size() + + self.sandbox_memory_config.get_output_data_size()) + .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) + } + + /// Get the base GPA to which the page tables will be eagerly + /// copied on restore + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_pt_base_gpa(&self) -> u64 { + hyperlight_common::layout::scratch_base_gpa(self.scratch_size) + + self.get_pt_base_scratch_offset() as u64 + } + + /// Get the first GPA of the scratch region that the host hasn't + /// used for something else + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_first_free_scratch_gpa(&self) -> u64 { + self.get_pt_base_gpa() + self.pt_size.unwrap_or(0) as u64 } /// Get the offset in guest memory to the heap size @@ -337,7 +362,7 @@ impl SandboxMemoryLayout { /// layout. #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_unaligned_memory_size(&self) -> usize { - self.pt_offset + self.pt_size.unwrap_or(0) + self.init_data_offset + self.init_data_size } /// get the code offset @@ -374,16 +399,25 @@ impl SandboxMemoryLayout { } } - /// Get the offset into the snapshot region of the page tables + /// Sets the size of the memory region used for page tables #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_pt_offset(&self) -> usize { - self.pt_offset + pub(crate) fn set_pt_size(&mut self, size: usize) -> Result<()> { + let min_fixed_scratch = hyperlight_common::layout::min_scratch_size( + self.sandbox_memory_config.get_input_data_size(), + self.sandbox_memory_config.get_output_data_size(), + ); + let min_scratch = min_fixed_scratch + size; + if self.scratch_size < min_scratch { + return Err(MemoryRequestTooSmall(self.scratch_size, min_scratch)); + } + self.pt_size = Some(size); + Ok(()) } - /// Sets the size of the memory region used for page tables + /// Get the size of the memory region used for page tables #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn set_pt_size(&mut self, size: usize) { - self.pt_size = Some(size); + pub(crate) fn get_pt_size(&mut self) -> usize { + self.pt_size.unwrap_or(0) } /// Returns the memory regions associated with this memory layout, @@ -413,47 +447,12 @@ impl SandboxMemoryLayout { } // PEB - let input_data_offset = builder.push_page_aligned( + let heap_offset = builder.push_page_aligned( size_of::(), MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, Peb, ); - let expected_input_data_offset = TryInto::::try_into(self.input_data_buffer_offset)?; - - if input_data_offset != expected_input_data_offset { - return Err(new_error!( - "Input Data offset does not match expected Input Data offset expected: {}, actual: {}", - expected_input_data_offset, - input_data_offset - )); - } - - // guest input data - let output_data_offset = builder.push_page_aligned( - self.sandbox_memory_config.get_input_data_size(), - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - InputData, - ); - - let expected_output_data_offset = - TryInto::::try_into(self.output_data_buffer_offset)?; - - if output_data_offset != expected_output_data_offset { - return Err(new_error!( - "Output Data offset does not match expected Output Data offset expected: {}, actual: {}", - expected_output_data_offset, - output_data_offset - )); - } - - // guest output data - let heap_offset = builder.push_page_aligned( - self.sandbox_memory_config.get_output_data_size(), - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - OutputData, - ); - let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset)?; if heap_offset != expected_heap_offset { @@ -498,31 +497,6 @@ impl SandboxMemoryLayout { init_data_offset }; - #[cfg(feature = "init-paging")] - // page tables - let final_offset = { - let expected_pt_offset = TryInto::::try_into(self.pt_offset)?; - - if after_init_offset != expected_pt_offset { - return Err(new_error!( - "Page table offset does not match expected: {}, actual: {}", - expected_pt_offset, - after_init_offset - )); - } - - if let Some(pt_size) = self.pt_size { - builder.push_page_aligned( - pt_size, - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - PageTables, - ) - } else { - after_init_offset - } - }; - - #[cfg(not(feature = "init-paging"))] let final_offset = after_init_offset; let expected_final_offset = TryInto::::try_into(self.get_memory_size()?)?; @@ -584,8 +558,10 @@ impl SandboxMemoryLayout { .get_input_data_size() .try_into()?, )?; - let addr = get_address!(input_data_buffer_offset); - shared_mem.write_u64(self.get_input_data_pointer_offset(), addr)?; + shared_mem.write_u64( + self.get_input_data_pointer_offset(), + self.get_input_data_buffer_gva(), + )?; // Set up output buffer pointer shared_mem.write_u64( @@ -594,8 +570,10 @@ impl SandboxMemoryLayout { .get_output_data_size() .try_into()?, )?; - let addr = get_address!(output_data_buffer_offset); - shared_mem.write_u64(self.get_output_data_pointer_offset(), addr)?; + shared_mem.write_u64( + self.get_output_data_pointer_offset(), + self.get_output_data_buffer_gva(), + )?; // Set up init data pointer shared_mem.write_u64( @@ -612,17 +590,10 @@ impl SandboxMemoryLayout { // End of setting up the PEB - // Initialize the stack pointers of input data and output data - // to point to the ninth (index 8) byte, which is the first free address - // of the each respective stack. The first 8 bytes are the stack pointer itself. - shared_mem.write_u64( - self.input_data_buffer_offset, - Self::STACK_POINTER_SIZE_BYTES, - )?; - shared_mem.write_u64( - self.output_data_buffer_offset, - Self::STACK_POINTER_SIZE_BYTES, - )?; + // The input and output data regions do not have their layout + // initialised here, because they are in the scratch + // region---they are instead set in + // [`SandboxMemoryManager::update_scratch_bookkeeping`]. Ok(()) } @@ -636,17 +607,12 @@ mod tests { // helper func for testing fn get_expected_memory_size(layout: &SandboxMemoryLayout) -> usize { - let cfg = layout.sandbox_memory_config; let mut expected_size = 0; // in order of layout expected_size += layout.code_size; expected_size += size_of::().next_multiple_of(PAGE_SIZE_USIZE); - expected_size += cfg.get_input_data_size().next_multiple_of(PAGE_SIZE_USIZE); - - expected_size += cfg.get_output_data_size().next_multiple_of(PAGE_SIZE_USIZE); - expected_size += layout.heap_size.next_multiple_of(PAGE_SIZE_USIZE); expected_size @@ -655,8 +621,7 @@ mod tests { #[test] fn test_get_memory_size() { let sbox_cfg = SandboxConfiguration::default(); - let sbox_mem_layout = - SandboxMemoryLayout::new(sbox_cfg, 4096, 4096, 0x3000, 0, None).unwrap(); + let sbox_mem_layout = SandboxMemoryLayout::new(sbox_cfg, 4096, 0, None).unwrap(); assert_eq!( sbox_mem_layout.get_memory_size().unwrap(), get_expected_memory_size(&sbox_mem_layout) diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index dbe83f1b0..a2b37e982 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -48,8 +48,6 @@ bitflags! { const WRITE = 2; /// allow guest to execute const EXECUTE = 4; - /// identifier that this is a stack guard page - const STACK_GUARD = 8; } } @@ -118,20 +116,12 @@ impl TryFrom for MemoryRegionFlags { #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] /// The type of memory region pub enum MemoryRegionType { - /// The region contains the guest's page tables - PageTables, /// The region contains the guest's code Code, /// The region contains the guest's init data InitData, /// The region contains the PEB Peb, - /// The region contains the Host Function Definitions - HostFunctionDefinitions, - /// The region contains the Input Data - InputData, - /// The region contains the Output Data - OutputData, /// The region contains the Heap Heap, /// The region contains the Guard Page diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index ea390e8b2..8691df380 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -24,11 +24,9 @@ use tracing::{Span, instrument}; use super::layout::SandboxMemoryLayout; use super::memory_region::MemoryRegion; -use super::ptr::{GuestPtr, RawPtr}; -use super::ptr_offset::Offset; use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory}; use crate::hypervisor::regs::CommonSpecialRegisters; -use crate::sandbox::snapshot::Snapshot; +use crate::sandbox::snapshot::{NextAction, Snapshot}; use crate::{Result, new_error}; /// A struct that is responsible for laying out and managing the memory @@ -41,10 +39,8 @@ pub(crate) struct SandboxMemoryManager { pub(crate) scratch_mem: S, /// The memory layout of the underlying shared memory pub(crate) layout: SandboxMemoryLayout, - /// Pointer to where to load memory from - pub(crate) load_addr: RawPtr, /// Offset for the execution entrypoint from `load_addr` - pub(crate) entrypoint_offset: Option, + pub(crate) entrypoint: NextAction, /// How many memory regions were mapped after sandbox creation pub(crate) mapped_rgns: u64, /// Buffer for accumulating guest abort messages @@ -128,10 +124,8 @@ impl GuestPageTableBuffer { } } - pub(crate) fn phys_base(&self) -> usize { - self.phys_base - } - + #[cfg(test)] + #[allow(dead_code)] pub(crate) fn size(&self) -> usize { self.buffer.borrow().len() } @@ -151,15 +145,13 @@ where layout: SandboxMemoryLayout, shared_mem: S, scratch_mem: S, - load_addr: RawPtr, - entrypoint_offset: Option, + entrypoint: NextAction, ) -> Self { Self { layout, shared_mem, scratch_mem, - load_addr, - entrypoint_offset, + entrypoint, mapped_rgns: 0, abort_buffer: Vec::new(), } @@ -184,6 +176,7 @@ where root_pt_gpa: u64, rsp_gva: u64, sregs: CommonSpecialRegisters, + entrypoint: NextAction, ) -> Result { Snapshot::new( &mut self.shared_mem, @@ -195,6 +188,7 @@ where root_pt_gpa, rsp_gva, sregs, + entrypoint, ) } } @@ -205,16 +199,8 @@ impl SandboxMemoryManager { let mut shared_mem = ExclusiveSharedMemory::new(s.mem_size())?; shared_mem.copy_from_slice(s.memory(), 0)?; let scratch_mem = ExclusiveSharedMemory::new(s.layout().get_scratch_size())?; - let load_addr: RawPtr = RawPtr::try_from(layout.get_guest_code_address())?; - let entrypoint_gva = s.preinitialise(); - let entrypoint_offset = entrypoint_gva.map(|x| (x - u64::from(&load_addr)).into()); - Ok(Self::new( - layout, - shared_mem, - scratch_mem, - load_addr, - entrypoint_offset, - )) + let entrypoint = s.entrypoint(); + Ok(Self::new(layout, shared_mem, scratch_mem, entrypoint)) } /// Write memory layout @@ -250,8 +236,7 @@ impl SandboxMemoryManager { shared_mem: hshm, scratch_mem: hscratch, layout: self.layout, - load_addr: self.load_addr.clone(), - entrypoint_offset: self.entrypoint_offset, + entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: self.abort_buffer, }; @@ -259,38 +244,21 @@ impl SandboxMemoryManager { shared_mem: gshm, scratch_mem: gscratch, layout: self.layout, - load_addr: self.load_addr.clone(), - entrypoint_offset: self.entrypoint_offset, + entrypoint: self.entrypoint, mapped_rgns: self.mapped_rgns, abort_buffer: Vec::new(), // Guest doesn't need abort buffer }; - host_mgr.update_scratch_bookkeeping( - (SandboxMemoryLayout::BASE_ADDRESS + self.layout.get_pt_offset()) as u64, - )?; + host_mgr.update_scratch_bookkeeping()?; Ok((host_mgr, guest_mgr)) } } impl SandboxMemoryManager { - /// Get the address of the dispatch function in memory - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_pointer_to_dispatch_function(&self) -> Result { - let guest_dispatch_function_ptr = self - .shared_mem - .read::(self.layout.get_dispatch_function_pointer_offset())?; - - // This pointer is written by the guest library but is accessible to - // the guest engine so we should bounds check it before we return it. - - let guest_ptr = GuestPtr::try_from(RawPtr::from(guest_dispatch_function_ptr))?; - guest_ptr.absolute() - } - /// Reads a host function call from memory #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_host_function_call(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -304,8 +272,8 @@ impl SandboxMemoryManager { let mut builder = FlatBufferBuilder::new(); let data = res.encode(&mut builder); - self.shared_mem.push_buffer( - self.layout.input_data_buffer_offset, + self.scratch_mem.push_buffer( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), data, ) @@ -321,19 +289,20 @@ impl SandboxMemoryManager { ) })?; - self.shared_mem.push_buffer( - self.layout.input_data_buffer_offset, + self.scratch_mem.push_buffer( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), buffer, - ) + )?; + Ok(()) } /// Reads a function call result from memory. /// A function call result can be either an error or a successful return value. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_function_call_result(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -341,8 +310,8 @@ impl SandboxMemoryManager { /// Read guest log data from the `SharedMemory` contained within `self` #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn read_guest_log_data(&mut self) -> Result { - self.shared_mem.try_pop_buffer_into::( - self.layout.output_data_buffer_offset, + self.scratch_mem.try_pop_buffer_into::( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) } @@ -350,8 +319,8 @@ impl SandboxMemoryManager { pub(crate) fn clear_io_buffers(&mut self) { // Clear the output data buffer loop { - let Ok(_) = self.shared_mem.try_pop_buffer_into::>( - self.layout.output_data_buffer_offset, + let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( + self.layout.get_output_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_output_data_size(), ) else { break; @@ -359,8 +328,8 @@ impl SandboxMemoryManager { } // Clear the input data buffer loop { - let Ok(_) = self.shared_mem.try_pop_buffer_into::>( - self.layout.input_data_buffer_offset, + let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( + self.layout.get_input_data_buffer_scratch_host_offset(), self.layout.sandbox_memory_config.get_input_data_size(), ) else { break; @@ -398,29 +367,47 @@ impl SandboxMemoryManager { Some(gscratch) }; - self.update_scratch_bookkeeping(snapshot.root_pt_gpa())?; + self.layout = *snapshot.layout(); + self.update_scratch_bookkeeping()?; Ok((gsnapshot, gscratch)) } - fn update_scratch_bookkeeping(&mut self, snapshot_pt_base_gpa: u64) -> Result<()> { + #[inline] + fn update_scratch_bookkeeping_item(&mut self, offset: u64, value: u64) -> Result<()> { let scratch_size = self.scratch_mem.mem_size(); + let base_offset = scratch_size - offset as usize; + self.scratch_mem.write::(base_offset, value) + } - let size_offset = - scratch_size - hyperlight_common::layout::SCRATCH_TOP_SIZE_OFFSET as usize; - self.scratch_mem - .write::(size_offset, scratch_size as u64)?; + fn update_scratch_bookkeeping(&mut self) -> Result<()> { + use hyperlight_common::layout::*; + let scratch_size = self.scratch_mem.mem_size(); + self.update_scratch_bookkeeping_item(SCRATCH_TOP_SIZE_OFFSET, scratch_size as u64)?; + self.update_scratch_bookkeeping_item( + SCRATCH_TOP_ALLOCATOR_OFFSET, + self.layout.get_first_free_scratch_gpa(), + )?; - let alloc_offset = - scratch_size - hyperlight_common::layout::SCRATCH_TOP_ALLOCATOR_OFFSET as usize; + // Initialise the guest input and output data buffers in + // scratch memory. TODO: remove the need for this. self.scratch_mem.write::( - alloc_offset, - hyperlight_common::layout::scratch_base_gpa(scratch_size), + self.layout.get_input_data_buffer_scratch_host_offset(), + SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, + )?; + self.scratch_mem.write::( + self.layout.get_output_data_buffer_scratch_host_offset(), + SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, )?; - let snapshot_pt_base_gpa_offset = scratch_size - - hyperlight_common::layout::SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET as usize; - self.scratch_mem - .write::(snapshot_pt_base_gpa_offset, snapshot_pt_base_gpa)?; + // Copy the page tables into the scratch region + let snapshot_pt_end = self.shared_mem.mem_size(); + let snapshot_pt_start = snapshot_pt_end - self.layout.get_pt_size(); + self.shared_mem.with_exclusivity(|snap| { + self.scratch_mem.with_exclusivity(|scratch| { + let bytes = &snap.as_slice()[snapshot_pt_start..snapshot_pt_end]; + scratch.copy_from_slice(bytes, self.layout.get_pt_base_scratch_offset()) + }) + })???; Ok(()) } @@ -429,7 +416,7 @@ impl SandboxMemoryManager { #[cfg(test)] #[cfg(all(feature = "init-paging", target_arch = "x86_64"))] mod tests { - use hyperlight_common::vmem::PAGE_TABLE_SIZE; + use hyperlight_common::vmem::{MappingKind, PAGE_TABLE_SIZE}; use hyperlight_testing::sandbox_sizes::{LARGE_HEAP_SIZE, MEDIUM_HEAP_SIZE, SMALL_HEAP_SIZE}; use hyperlight_testing::simple_guest_as_string; @@ -472,15 +459,22 @@ mod tests { // Verify identity mapping (phys == virt for low memory) assert_eq!( - mapping.1, addr, + mapping.phys_base, addr, "{}: {:?} region: address 0x{:x} should identity map, got phys 0x{:x}", - name, region.region_type, addr, mapping.1 + name, region.region_type, addr, mapping.phys_base ); + // Verify kind is Basic + let MappingKind::Basic(bm) = mapping.kind else { + panic!( + "{}: {:?} region: address 0x{:x} should be kind basic, got {:?}", + name, region.region_type, addr, mapping.kind + ); + }; + // Verify writable - let actual = mapping.2.writable; - let expected = region.flags.contains(MemoryRegionFlags::WRITE) - || region.flags.contains(MemoryRegionFlags::STACK_GUARD); + let actual = bm.writable; + let expected = region.flags.contains(MemoryRegionFlags::WRITE); assert_eq!( actual, expected, "{}: {:?} region: address 0x{:x} has writable {}, expected {} (region flags: {:?})", @@ -488,7 +482,7 @@ mod tests { ); // Verify executable - let actual = mapping.2.executable; + let actual = bm.executable; let expected = region.flags.contains(MemoryRegionFlags::EXECUTE); assert_eq!( actual, expected, @@ -518,6 +512,7 @@ mod tests { ("large (256MB heap)", { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(LARGE_HEAP_SIZE); + cfg.set_scratch_size(0x100000); cfg }), ]; diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index cae355aae..ae9545e59 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -169,6 +169,8 @@ unsafe impl Send for GuestSharedMemory {} /// communication buffers, allowing it to be used concurrently with a /// GuestSharedMemory. /// +/// # Concurrency model +/// /// Given future requirements for asynchronous I/O with a minimum /// amount of copying (e.g. WASIp3 streams), we would like it to be /// possible to safely access these buffers concurrently with the @@ -299,6 +301,24 @@ unsafe impl Send for GuestSharedMemory {} /// \[4\] P1152R0: Deprecating `volatile`. JF Bastien. /// \[5\] P1382R1: `volatile_load` and `volatile_store`. JF Bastien, Paul McKenney, Jeffrey Yasskin, and the indefatigable TBD. /// \[6\] Documentation for std::sync::atomic::fence. +/// +/// # Note \[Keeping mappings in sync between userspace and the guest\] +/// +/// When using this structure with mshv on Linux, it is necessary to +/// be a little bit careful: since the hypervisor is not directly +/// integrated with the host kernel virtual memory subsystem, it is +/// easy for the memory region in userspace to get out of sync with +/// the memory region mapped into the guest. Generally speaking, when +/// the [`SharedMemory`] is mapped into a partition, the MSHV kernel +/// module will call `pin_user_pages(FOLL_PIN|FOLL_WRITE)` on it, +/// which will eagerly do any CoW, etc needing to obtain backing pages +/// pinned in memory, and then map precisely those backing pages into +/// the virtual machine. After that, the backing pages mapped into the +/// VM will not change until the region is unmapped or remapped. This +/// means that code in this module needs to be very careful to avoid +/// changing the backing pages of the region in the host userspace, +/// since that would result in hyperlight-host's view of the memory +/// becoming completely divorced from the view of the VM. #[derive(Clone, Debug)] pub struct HostSharedMemory { region: Arc, @@ -314,11 +334,12 @@ impl ExclusiveSharedMemory { #[cfg(target_os = "linux")] #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub fn new(min_size_bytes: usize) -> Result { - #[cfg(miri)] - use libc::MAP_PRIVATE; - use libc::{MAP_ANONYMOUS, MAP_FAILED, PROT_READ, PROT_WRITE, c_int, mmap, off_t, size_t}; + use libc::{ + MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, PROT_READ, PROT_WRITE, c_int, mmap, off_t, + size_t, + }; #[cfg(not(miri))] - use libc::{MAP_NORESERVE, MAP_SHARED, PROT_NONE, mprotect}; + use libc::{MAP_NORESERVE, PROT_NONE, mprotect}; if min_size_bytes == 0 { return Err(new_error!("Cannot create shared memory with size 0")); @@ -346,7 +367,7 @@ impl ExclusiveSharedMemory { // allocate the memory #[cfg(not(miri))] - let flags = MAP_ANONYMOUS | MAP_SHARED | MAP_NORESERVE; + let flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE; #[cfg(miri)] let flags = MAP_ANONYMOUS | MAP_PRIVATE; @@ -655,9 +676,10 @@ impl GuestSharedMemory { region_type: MemoryRegionType, ) -> MemoryRegion { let flags = match region_type { - MemoryRegionType::Scratch | MemoryRegionType::Snapshot => { + MemoryRegionType::Scratch => { MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE } + MemoryRegionType::Snapshot => MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE, #[allow(clippy::panic)] // In the future, all the host side knowledge about memory // region types should collapse down to Snapshot vs @@ -764,7 +786,9 @@ pub trait SharedMemory { #[allow(unused_mut)] // unused on some platforms, although not others let mut do_copy = true; // TODO: Compare & add heuristic thresholds: mmap, MADV_DONTNEED, MADV_REMOVE, MADV_FREE (?) - #[cfg(target_os = "linux")] + // TODO: Find a similar lazy zeroing approach that works on MSHV. + // (See Note [Keeping mappings in sync between userspace and the guest]) + #[cfg(all(target_os = "linux", feature = "kvm", not(any(feature = "mshv3"))))] unsafe { let ret = libc::madvise( e.region.ptr as *mut libc::c_void, diff --git a/src/hyperlight_host/src/sandbox/config.rs b/src/hyperlight_host/src/sandbox/config.rs index 044c5f2fb..f12387a0b 100644 --- a/src/hyperlight_host/src/sandbox/config.rs +++ b/src/hyperlight_host/src/sandbox/config.rs @@ -92,7 +92,7 @@ impl SandboxConfiguration { /// The default heap size of a hyperlight sandbox pub const DEFAULT_HEAP_SIZE: u64 = 131072; /// The default size of the scratch region - pub const DEFAULT_SCRATCH_SIZE: usize = 0x40000; + pub const DEFAULT_SCRATCH_SIZE: usize = 0x48000; #[allow(clippy::too_many_arguments)] /// Create a new configuration for a sandbox with the given sizes. diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 138ebdaf6..666ba4f98 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -42,7 +42,6 @@ use crate::mem::memory_region::MemoryRegion; #[cfg(unix)] use crate::mem::memory_region::{MemoryRegionFlags, MemoryRegionType}; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; use crate::metrics::{ METRIC_GUEST_ERROR, METRIC_GUEST_ERROR_LABEL_CODE, maybe_time_and_emit_guest_call, @@ -95,7 +94,6 @@ pub struct MultiUseSandbox { pub(super) host_funcs: Arc>, pub(crate) mem_mgr: SandboxMemoryManager, vm: HyperlightVm, - dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, /// If the current state of the sandbox has been captured in a snapshot, @@ -114,7 +112,6 @@ impl MultiUseSandbox { host_funcs: Arc>, mgr: SandboxMemoryManager, vm: HyperlightVm, - dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> MultiUseSandbox { Self { @@ -123,7 +120,6 @@ impl MultiUseSandbox { host_funcs, mem_mgr: mgr, vm, - dispatch_ptr, #[cfg(gdb)] dbg_mem_access_fn, snapshot: None, @@ -178,12 +174,14 @@ impl MultiUseSandbox { .vm .get_snapshot_sregs() .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; + let entrypoint = self.vm.get_entrypoint(); let memory_snapshot = self.mem_mgr.snapshot( self.id, mapped_regions_vec, root_pt_gpa, stack_top_gpa, sregs, + entrypoint, )?; let snapshot = Arc::new(memory_snapshot); self.snapshot = Some(snapshot.clone()); @@ -319,6 +317,7 @@ impl MultiUseSandbox { })?; self.vm.set_stack_top(snapshot.stack_top_gva()); + self.vm.set_entrypoint(snapshot.entrypoint()); let current_regions: HashSet<_> = self.vm.get_mapped_regions().cloned().collect(); let snapshot_regions: HashSet<_> = snapshot.regions().iter().cloned().collect(); @@ -532,11 +531,6 @@ impl MultiUseSandbox { if self.poisoned { return Err(crate::HyperlightError::PoisonedSandbox); } - if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { - // Stack guard pages are an internal implementation detail - // (which really should be moved into the guest) - log_then_return!("Cannot map host memory as a stack guard page"); - } if rgn.flags.contains(MemoryRegionFlags::WRITE) { // TODO: Implement support for writable mappings, which // need to be registered with the memory manager so that @@ -653,7 +647,6 @@ impl MultiUseSandbox { self.mem_mgr.write_guest_function_call(buffer)?; let dispatch_res = self.vm.dispatch_call_from_host( - self.dispatch_ptr.clone(), &mut self.mem_mgr, &self.host_funcs, #[cfg(gdb)] @@ -1030,16 +1023,21 @@ mod tests { assert_eq!(res, 0); } - // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (1K) and heap(14K). + // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (24K) and heap(20K). // This test effectively ensures that the stack is being properly reset after each call and we are not leaking memory in the Guest. #[test] fn test_with_small_stack_and_heap() { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(20 * 1024); // min_scratch_size already includes 1 page (4k on most - // platforms) of guest stack, so add 20k more to get 24k total - let min_scratch = hyperlight_common::layout::min_scratch_size(); - cfg.set_scratch_size(min_scratch + 0x5000); + // platforms) of guest stack, so add 20k more to get 24k + // total, and then add some more for the eagerly-copied page + // tables on amd64 + let min_scratch = hyperlight_common::layout::min_scratch_size( + cfg.get_input_data_size(), + cfg.get_output_data_size(), + ); + cfg.set_scratch_size(min_scratch + 0x10000 + 0x10000); let mut sbox1: MultiUseSandbox = { let path = simple_guest_as_string().unwrap(); @@ -1423,6 +1421,7 @@ mod tests { for (name, heap_size) in test_cases { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); + cfg.set_scratch_size(0x100000); let path = simple_guest_as_string().unwrap(); let sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg)) diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index 688f37a8d..76ec53cee 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -297,8 +297,8 @@ mod tests { let log_msg = new_guest_log_data(LogLevel::Information); let guest_log_data_buffer: Vec = log_msg.try_into().unwrap(); - let offset = mgr.layout.get_output_data_offset(); - mgr.get_shared_mem_mut() + let offset = mgr.layout.get_output_data_buffer_scratch_host_offset(); + mgr.scratch_mem .push_buffer( offset, sandbox_cfg.get_output_data_size(), @@ -335,9 +335,9 @@ mod tests { let log_data = new_guest_log_data(level); let guest_log_data_buffer: Vec = log_data.clone().try_into().unwrap(); - mgr.get_shared_mem_mut() + mgr.scratch_mem .push_buffer( - layout.get_output_data_offset(), + layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), guest_log_data_buffer.as_slice(), ) @@ -417,9 +417,9 @@ mod tests { subscriber.clear(); let guest_log_data_buffer: Vec = log_data.try_into().unwrap(); - mgr.get_shared_mem_mut() + mgr.scratch_mem .push_buffer( - layout.get_output_data_offset(), + layout.get_output_data_buffer_scratch_host_offset(), sandbox_cfg.get_output_data_size(), guest_log_data_buffer.as_slice(), ) diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index 20cd046ad..53c45ef21 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -17,7 +17,7 @@ limitations under the License. use std::sync::atomic::{AtomicU64, Ordering}; use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; -use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; +use hyperlight_common::vmem::{self, BasicMapping, CowMapping, Mapping, MappingKind, PAGE_SIZE}; use tracing::{Span, instrument}; use crate::HyperlightError::MemoryRegionSizeMismatch; @@ -33,6 +33,29 @@ use crate::sandbox::uninitialized::{GuestBinary, GuestEnvironment}; pub(super) static SANDBOX_CONFIGURATION_COUNTER: AtomicU64 = AtomicU64::new(0); +/// Presently, a snapshot can be of a preinitialised sandbox, which +/// still needs an initialise function called in order to determine +/// how to call into it, or of an already-properly-initialised sandbox +/// which can be immediately called into. This keeps track of the +/// difference. +/// +/// TODO: this should not necessarily be around in the long term: +/// ideally we would just preinitialise earlier in the snapshot +/// creation process and never need this. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum NextAction { + /// A sandbox in the preinitialise state still needs to be + /// initialised by calling the initialise function + Initialise(u64), + /// A sandbox in the ready state can immediately be called into, + /// using the dispatch function pointer. + Call(u64), + /// Only when compiling for tests: a sandbox that cannot actually + /// be used + #[cfg(test)] + None, +} + /// A wrapper around a `SharedMemory` reference and a snapshot /// of the memory therein pub struct Snapshot { @@ -67,32 +90,18 @@ pub struct Snapshot { /// It is not a [`blake3::Hash`] because we do not presently /// require constant-time equality checking hash: [u8; 32], - /// The address of the root page table - root_pt_gpa: u64, /// The address of the top of the guest stack stack_top_gva: u64, /// Special register state captured from the vCPU during snapshot. - /// None for snapshots created directly from a binary (before guest runs). - /// Some for snapshots taken from a running sandbox. - /// Note: CR3 in this struct is NOT used on restore - instead, the new - /// root_pt_gpa field is used since page tables are relocated during snapshot. + /// None for snapshots created directly from a binary (before + /// guest runs). Some for snapshots taken from a running sandbox. + /// Note: CR3 in this struct is NOT used on restore, since page + /// tables are relocated during snapshot. sregs: Option, - /// Preinitialisation entry point for snapshots created directly from a - /// guest binary. - /// - /// When creating a snapshot directly from a guest binary, this tracks - /// the address that we need to call into before actually using a - /// sandbox from this snapshot in order to perform guest-side - /// preinitialisation. - /// - /// Long-term, the intention is to run this preinitialisation eagerly as - /// part of the snapshot creation process so that restored sandboxes can - /// begin executing from their normal entry point without requiring this - /// field. Until that refactoring happens, this remains part of the - /// snapshot format and must be preserved. - preinitialise: Option, + /// The next action that should be performed on this snapshot + entrypoint: NextAction, } impl core::convert::AsRef for Snapshot { fn as_ref(&self) -> &Self { @@ -126,7 +135,7 @@ impl hyperlight_common::vmem::TableReadOps for Snapshot { addr } fn root_table(&self) -> u64 { - self.root_pt_gpa + self.root_pt_gpa() } } @@ -243,25 +252,26 @@ fn filtered_mappings<'a>( regions: &[MemoryRegion], scratch_size: usize, root_pt: u64, -) -> Vec<(u64, u64, BasicMapping, &'a [u8])> { +) -> Vec<(Mapping, &'a [u8])> { let op = SharedMemoryPageTableBuffer::new(snap, scratch, scratch_size, root_pt); unsafe { hyperlight_common::vmem::virt_to_phys(&op, 0, hyperlight_common::layout::MAX_GVA as u64) } - .filter_map(move |(gva, gpa, bm)| { + .filter_map(move |mapping| { // the scratch map doesn't count - if gva >= scratch_base_gva(scratch_size) { + if mapping.virt_base >= scratch_base_gva(scratch_size) { return None; } // neither does the mapping of the snapshot's own page tables - if gva >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 - && gva <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 + if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 + && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 { return None; } // todo: is it useful to warn if we can't resolve this? - let contents = unsafe { guest_page(snap, scratch, regions, scratch_size, gpa) }?; - Some((gva, gpa, bm, contents)) + let contents = + unsafe { guest_page(snap, scratch, regions, scratch_size, mapping.phys_base) }?; + Some((mapping, contents)) }) .collect() } @@ -308,30 +318,15 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { phys_base: scratch_base_gpa(scratch_size), virt_base: scratch_base_gva(scratch_size), len: scratch_size as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, - executable: true, + // assume that the guest will map these pages elsewhere if + // it actually needs to execute from them + executable: false, }), }; unsafe { vmem::map(pt_buf, mapping) }; - // Map the page tables themselves, in order to allow the - // guest to update them easily - let mut pt_size_mapped = 0; - while pt_buf.size() > pt_size_mapped { - let mapping = Mapping { - phys_base: (pt_buf.phys_base() + pt_size_mapped) as u64, - virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, - len: (pt_buf.size() - pt_size_mapped) as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable: true, - writable: true, - executable: false, - }), - }; - pt_size_mapped = pt_buf.size(); - unsafe { vmem::map(pt_buf, mapping) }; - } } impl Snapshot { @@ -359,8 +354,6 @@ impl Snapshot { let mut layout = crate::mem::layout::SandboxMemoryLayout::new( cfg, exe_info.loaded_size(), - usize::try_from(cfg.get_heap_size())?, - cfg.get_scratch_size(), guest_blob_size, guest_blob_mem_flags, )?; @@ -379,35 +372,34 @@ impl Snapshot { .transpose()?; #[cfg(feature = "init-paging")] - let pt_base_gpa = { + { // Set up page table entries for the snapshot - let pt_base_gpa = - crate::mem::layout::SandboxMemoryLayout::BASE_ADDRESS + layout.get_pt_offset(); - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; // 1. Map the (ideally readonly) pages of snapshot data for rgn in layout.get_memory_regions_::(())?.iter() { let readable = rgn.flags.contains(MemoryRegionFlags::READ); - let writable = rgn.flags.contains(MemoryRegionFlags::WRITE) - // Temporary hack: the stack guard page is - // currently checked for in the host, rather than - // the guest, so we need to mark it writable in - // the Stage 1 translation so that the fault - // exception on a write is taken to the - // hypervisor, rather than the guest kernel - || rgn.flags.contains(MemoryRegionFlags::STACK_GUARD); let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); + let kind = if writable { + MappingKind::Cow(CowMapping { + readable, + executable, + }) + } else { + MappingKind::Basic(BasicMapping { + readable, + writable: false, + executable, + }) + }; let mapping = Mapping { phys_base: rgn.guest_region.start as u64, virt_base: rgn.guest_region.start as u64, len: rgn.guest_region.len() as u64, - kind: MappingKind::BasicMapping(BasicMapping { - readable, - writable, - executable, - }), + kind, }; unsafe { vmem::map(&pt_buf, mapping) }; } @@ -416,12 +408,9 @@ impl Snapshot { map_specials(&pt_buf, layout.get_scratch_size()); let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len())?; memory.extend(&pt_bytes); - pt_base_gpa }; - #[cfg(not(feature = "init-paging"))] - let pt_base_gpa = 0usize; let exn_stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET @@ -437,10 +426,9 @@ impl Snapshot { regions: extra_regions, load_info, hash, - root_pt_gpa: pt_base_gpa as u64, stack_top_gva: exn_stack_top_gva, sregs: None, - preinitialise: Some(load_addr + entrypoint_offset), + entrypoint: NextAction::Initialise(load_addr + entrypoint_offset), }) } @@ -463,8 +451,9 @@ impl Snapshot { root_pt_gpa: u64, stack_top_gva: u64, sregs: CommonSpecialRegisters, + entrypoint: NextAction, ) -> Result { - let (new_root_pt_gpa, memory) = shared_mem.with_exclusivity(|snap_e| { + let memory = shared_mem.with_exclusivity(|snap_e| { scratch_mem.with_exclusivity(|scratch_e| { let scratch_size = layout.get_scratch_size(); @@ -474,29 +463,40 @@ impl Snapshot { // Pass 2: copy them, and map them // TODO: Look for opportunities to hugepage map - let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + live_pages.len() * PAGE_SIZE; - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); let mut snapshot_memory: Vec = Vec::new(); - for (gva, _, bm, contents) in live_pages { + for (mapping, contents) in live_pages { let new_offset = snapshot_memory.len(); snapshot_memory.extend(contents); let new_gpa = new_offset + SandboxMemoryLayout::BASE_ADDRESS; + let kind = match mapping.kind { + MappingKind::Cow(cm) => MappingKind::Cow(cm), + MappingKind::Basic(bm) if bm.writable => MappingKind::Cow(CowMapping { + readable: bm.readable, + executable: bm.executable, + }), + MappingKind::Basic(bm) => MappingKind::Basic(BasicMapping { + readable: bm.readable, + writable: false, + executable: bm.executable, + }), + }; let mapping = Mapping { phys_base: new_gpa as u64, - virt_base: gva, + virt_base: mapping.virt_base, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(bm), + kind, }; unsafe { vmem::map(&pt_buf, mapping) }; } // Phase 3: Map the special mappings map_specials(&pt_buf, layout.get_scratch_size()); let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()); + layout.set_pt_size(pt_bytes.len())?; snapshot_memory.extend(&pt_bytes); - (pt_base_gpa, snapshot_memory) + Ok::, crate::HyperlightError>(snapshot_memory) }) - })??; + })???; // We do not need the original regions anymore, as any uses of // them in the guest have been incorporated into the snapshot @@ -513,8 +513,7 @@ impl Snapshot { hash, stack_top_gva, sregs: Some(sregs), - root_pt_gpa: new_root_pt_gpa as u64, - preinitialise: None, + entrypoint, }) } @@ -550,7 +549,7 @@ impl Snapshot { } pub(crate) fn root_pt_gpa(&self) -> u64 { - self.root_pt_gpa + self.layout.get_pt_base_gpa() } pub(crate) fn stack_top_gva(&self) -> u64 { @@ -566,8 +565,8 @@ impl Snapshot { self.sregs.as_ref() } - pub(crate) fn preinitialise(&self) -> Option { - self.preinitialise + pub(crate) fn entrypoint(&self) -> NextAction { + self.entrypoint } } @@ -592,14 +591,15 @@ mod tests { } fn make_simple_pt_mems() -> (SandboxMemoryManager, u64) { - let scratch_mem = ExclusiveSharedMemory::new(PAGE_SIZE).unwrap(); + let cfg = crate::sandbox::SandboxConfiguration::default(); + let scratch_mem = ExclusiveSharedMemory::new(cfg.get_scratch_size()).unwrap(); let pt_base = PAGE_SIZE + SandboxMemoryLayout::BASE_ADDRESS; let pt_buf = GuestPageTableBuffer::new(pt_base); let mapping = Mapping { phys_base: SandboxMemoryLayout::BASE_ADDRESS as u64, virt_base: SandboxMemoryLayout::BASE_ADDRESS as u64, len: PAGE_SIZE as u64, - kind: MappingKind::BasicMapping(BasicMapping { + kind: MappingKind::Basic(BasicMapping { readable: true, writable: true, executable: true, @@ -612,13 +612,11 @@ mod tests { let mut snapshot_mem = ExclusiveSharedMemory::new(PAGE_SIZE + pt_bytes.len()).unwrap(); snapshot_mem.copy_from_slice(&pt_bytes, PAGE_SIZE).unwrap(); - let cfg = crate::sandbox::SandboxConfiguration::default(); let mgr = SandboxMemoryManager::new( - SandboxMemoryLayout::new(cfg, 4096, 2048, 4096, 0x3000, None).unwrap(), + SandboxMemoryLayout::new(cfg, 4096, 0x3000, None).unwrap(), snapshot_mem, scratch_mem, - 0.into(), - None, + super::NextAction::None, ); let (mgr, _) = mgr.build().unwrap(); (mgr, pt_base as u64) @@ -644,6 +642,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); @@ -675,6 +674,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); assert_eq!(snapshot.mem_size(), size); @@ -697,6 +697,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); @@ -713,6 +714,7 @@ mod tests { pt_base, 0, default_sregs(), + super::NextAction::None, ) .unwrap(); diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 4e13fa0d6..9d602cf70 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -1094,7 +1094,7 @@ mod tests { // Test 3: Create snapshot with custom scratch size { let mut cfg = SandboxConfiguration::default(); - cfg.set_scratch_size(128 * 1024); // 128KB scratch + cfg.set_scratch_size(256 * 1024); // 256KB scratch let env = GuestEnvironment::new(GuestBinary::FilePath(binary_path.clone()), None); @@ -1142,7 +1142,7 @@ mod tests { { let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(32 * 1024 * 1024); // 32MB heap - cfg.set_scratch_size(256 * 1024); // 256KB scratch + cfg.set_scratch_size(256 * 1024 * 2); // 512KB scratch (256KB will be input/output) cfg.set_input_data_size(128 * 1024); // 128KB input cfg.set_output_data_size(128 * 1024); // 128KB output diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index d9f73bfc8..f042c3f57 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -25,8 +25,7 @@ use super::uninitialized::SandboxRuntimeConfig; use crate::hypervisor::hyperlight_vm::{HyperlightVm, HyperlightVmError}; use crate::mem::exe::LoadInfo; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::ptr_offset::Offset; +use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::GuestSharedMemory; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; @@ -34,7 +33,7 @@ use crate::sandbox::config::DebugInfo; use crate::sandbox::trace::MemTraceInfo; #[cfg(target_os = "linux")] use crate::signal_handlers::setup_signal_handlers; -use crate::{MultiUseSandbox, Result, UninitializedSandbox, new_error}; +use crate::{MultiUseSandbox, Result, UninitializedSandbox}; #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { @@ -77,13 +76,6 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result Result Result { - let base_ptr = GuestPtr::try_from(Offset::from(0))?; - - let pml4_ptr = { - let pml4_offset_u64 = mgr.layout.get_pt_offset() as u64; - base_ptr + Offset::from(pml4_offset_u64) - }; - let entrypoint_ptr = mgr - .entrypoint_offset - .map(|x| { - let entrypoint_total_offset = mgr.load_addr.clone() + x; - GuestPtr::try_from(entrypoint_total_offset).and_then(|x| x.absolute()) - }) - .transpose()?; - // Create gdb thread if gdb is enabled and the configuration is provided #[cfg(gdb)] let gdb_conn = if let Some(DebugInfo { port }) = rt_cfg.debug_info { @@ -145,8 +122,8 @@ pub(crate) fn set_up_hypervisor_partition( Ok(HyperlightVm::new( mgr.shared_mem, mgr.scratch_mem, - pml4_ptr.absolute()?, - entrypoint_ptr, + mgr.layout.get_pt_base_gpa(), + mgr.entrypoint, stack_top_gva, config, #[cfg(gdb)] diff --git a/src/hyperlight_host/tests/common/mod.rs b/src/hyperlight_host/tests/common/mod.rs index ab58237b8..aeee8c658 100644 --- a/src/hyperlight_host/tests/common/mod.rs +++ b/src/hyperlight_host/tests/common/mod.rs @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ use hyperlight_host::func::HostFunction; +use hyperlight_host::sandbox::SandboxConfiguration; #[cfg(gdb)] use hyperlight_host::sandbox::config::DebugInfo; use hyperlight_host::{GuestBinary, MultiUseSandbox, Result, UninitializedSandbox}; @@ -33,7 +34,6 @@ pub fn new_uninit() -> Result { pub fn new_uninit_rust() -> Result { #[cfg(gdb)] { - use hyperlight_host::sandbox::SandboxConfiguration; let mut cfg = SandboxConfiguration::default(); let debug_info = DebugInfo { port: 8080 }; cfg.set_guest_debug_info(debug_info); @@ -59,6 +59,27 @@ pub fn new_uninit_c() -> Result { ) } +pub fn get_simpleguest_sandboxes_with_cfg( + writer: Option>, // An optional writer to make sure correct info is passed to the host printer + cfg: SandboxConfiguration, +) -> Vec { + let elf_path = get_c_or_rust_simpleguest_path(); + + let sandboxes = [ + // in hypervisor elf + UninitializedSandbox::new(GuestBinary::FilePath(elf_path.clone()), Some(cfg)).unwrap(), + ]; + + sandboxes + .into_iter() + .map(|mut sandbox| { + if let Some(writer) = writer.clone() { + sandbox.register_print(writer).unwrap(); + } + sandbox.evolve().unwrap() + }) + .collect() +} pub fn get_simpleguest_sandboxes( writer: Option>, // An optional writer to make sure correct info is passed to the host printer ) -> Vec { diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 083e690e3..4b2426cf2 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -704,13 +704,13 @@ fn log_message() { // - internal_dispatch_function does a log::trace! in debug mode // - logs from trace level tracing spans created as logs because of the tracing `log` feature // - 4 from evolve call (hyperlight_main + halt) - // - 16 from guest call (internal_dispatch_function + others) + // - 12 from guest call (internal_dispatch_function + others) // and are multiplied because we make 6 calls to `log_test_messages` // NOTE: These numbers need to be updated if log messages or spans are added/removed let num_fixed_trace_log = if cfg!(debug_assertions) { - (1 + 20) * 6 + (1 + 16) * 6 } else { - 20 * 6 + 16 * 6 }; let tests = vec![ diff --git a/src/hyperlight_host/tests/sandbox_host_tests.rs b/src/hyperlight_host/tests/sandbox_host_tests.rs index 6d040f061..f392fba14 100644 --- a/src/hyperlight_host/tests/sandbox_host_tests.rs +++ b/src/hyperlight_host/tests/sandbox_host_tests.rs @@ -26,7 +26,7 @@ use hyperlight_host::{ use hyperlight_testing::simple_guest_as_string; pub mod common; // pub to disable dead_code warning -use crate::common::get_simpleguest_sandboxes; +use crate::common::{get_simpleguest_sandboxes, get_simpleguest_sandboxes_with_cfg}; #[test] fn pass_byte_array() { @@ -121,7 +121,9 @@ fn invalid_guest_function_name() { #[test] fn set_static() { - for mut sandbox in get_simpleguest_sandboxes(None).into_iter() { + let mut cfg: SandboxConfiguration = Default::default(); + cfg.set_scratch_size(0x100A000); + for mut sandbox in get_simpleguest_sandboxes_with_cfg(None, cfg).into_iter() { let fn_name = "SetStatic"; let res = sandbox.call::(fn_name, ()); println!("{:?}", res); @@ -212,6 +214,7 @@ fn incorrect_parameter_num() { #[test] fn max_memory_sandbox() { let mut cfg = SandboxConfiguration::default(); + cfg.set_scratch_size(0x40004000); cfg.set_input_data_size(0x40000000); let a = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), @@ -224,6 +227,23 @@ fn max_memory_sandbox() { )); } +#[test] +fn small_scratch_sandbox() { + let mut cfg = SandboxConfiguration::default(); + cfg.set_scratch_size(0x48000); + cfg.set_input_data_size(0x24000); + cfg.set_output_data_size(0x24000); + let a = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + Some(cfg), + ); + + assert!(matches!( + a.unwrap_err(), + HyperlightError::MemoryRequestTooSmall(..) + )); +} + #[test] fn iostack_is_working() { for mut sandbox in get_simpleguest_sandboxes(None).into_iter() { diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index 0faead807..8effa6589 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -25,9 +25,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "shlex", @@ -47,9 +47,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flatbuffers" @@ -137,9 +137,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", "hashbrown", @@ -147,9 +147,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "linkme" @@ -422,6 +422,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.17" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" +checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d" diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 09f55212a..e958d7dcb 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -41,6 +41,7 @@ use hyperlight_common::flatbuffer_wrappers::function_types::{ use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::flatbuffer_wrappers::guest_log_level::LogLevel; use hyperlight_common::flatbuffer_wrappers::util::get_flatbuffer_result; +use hyperlight_common::vmem::{BasicMapping, MappingKind}; use hyperlight_guest::error::{HyperlightGuestError, Result}; use hyperlight_guest::exit::{abort_with_code, abort_with_code_and_message}; use hyperlight_guest_bin::exception::arch::{Context, ExceptionInfo}; @@ -582,8 +583,18 @@ fn read_mapped_buffer(base: u64, len: u64, do_map: bool) -> Vec { if do_map { unsafe { - hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) - }; + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); + } } let data = unsafe { core::slice::from_raw_parts(base, len) }; @@ -603,7 +614,19 @@ fn write_mapped_buffer(base: u64, len: u64) -> bool { let base = base as usize as *mut u8; let len = len as usize; - unsafe { hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) }; + unsafe { + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); + }; let data = unsafe { core::slice::from_raw_parts_mut(base, len) }; @@ -619,7 +642,19 @@ fn exec_mapped_buffer(base: u64, len: u64) -> bool { let base = base as usize as *mut u8; let len = len as usize; - unsafe { hyperlight_guest_bin::paging::map_region(base as _, base as _, len as u64 + 4096) }; + unsafe { + hyperlight_guest_bin::paging::map_region( + base as _, + base as _, + len as u64 + 4096, + MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, + }), + ); + hyperlight_guest_bin::paging::barrier::first_valid_same_ctx(); + }; let data = unsafe { core::slice::from_raw_parts(base, len) }; diff --git a/src/tests/rust_guests/witguest/Cargo.lock b/src/tests/rust_guests/witguest/Cargo.lock index 3715fc832..bedfdb2da 100644 --- a/src/tests/rust_guests/witguest/Cargo.lock +++ b/src/tests/rust_guests/witguest/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "shlex", @@ -141,9 +141,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flatbuffers"