diff --git a/litebox/src/fd/mod.rs b/litebox/src/fd/mod.rs index 30f991b1e..e0145032c 100644 --- a/litebox/src/fd/mod.rs +++ b/litebox/src/fd/mod.rs @@ -37,7 +37,12 @@ impl Descriptors { } /// Insert `entry` into the descriptor table, returning an `OwnedFd` to this entry. - pub(crate) fn insert( + #[expect( + clippy::missing_panics_doc, + reason = "panics impossible due to type invariants" + )] + #[must_use] + pub fn insert( &mut self, entry: impl Into, ) -> TypedFd { @@ -104,7 +109,7 @@ impl Descriptors { /// have been cleared out). /// /// If the `fd` was already closed out, then (obviously) it does not return an entry. - pub(crate) fn remove( + pub fn remove( &mut self, fd: &TypedFd, ) -> Option { @@ -285,7 +290,11 @@ impl Descriptors { /// Use the entry at `fd` as read-only. /// /// If the `fd` has been closed, then skips applying `f` and returns `None`. - pub(crate) fn with_entry(&self, fd: &TypedFd, f: F) -> Option + #[expect( + clippy::missing_panics_doc, + reason = "panics impossible due to type invariants" + )] + pub fn with_entry(&self, fd: &TypedFd, f: F) -> Option where Subsystem: FdEnabledSubsystem, F: FnOnce(&Subsystem::Entry) -> R, @@ -300,7 +309,11 @@ impl Descriptors { /// Use the entry at `fd` as mutably. /// /// If the `fd` has been closed, then skips applying `f` and returns `None`. - pub(crate) fn with_entry_mut(&self, fd: &TypedFd, f: F) -> Option + #[expect( + clippy::missing_panics_doc, + reason = "panics impossible due to type invariants" + )] + pub fn with_entry_mut(&self, fd: &TypedFd, f: F) -> Option where Subsystem: FdEnabledSubsystem, F: FnOnce(&mut Subsystem::Entry) -> R, @@ -312,6 +325,20 @@ impl Descriptors { Some(f(entry.as_subsystem_mut::())) } + /// Obtain a handle to the underlying entry for the `fd`. + /// + /// Similar to [`Self::with_entry`], except it does not require maintaining access to the table. + pub fn entry_handle( + &self, + fd: &TypedFd, + ) -> Option> { + // Since the typed FD should not have been created unless we had the correct subsystem in + // the first place, none of this should panic---if it does, someone has done a bad cast + // somewhere. + let entry = self.entries[fd.x.as_usize()?].as_ref()?; + Some(EntryHandle(Arc::clone(&entry.x), PhantomData)) + } + /// Use the entry at `internal_fd` as mutably. /// /// NOTE: Ideally, prefer using [`Self::with_entry_mut`] instead of this, since it provides a @@ -489,6 +516,24 @@ impl Descriptors { } } +/// A handle to a descriptor entry (via [`Descriptors::entry_handle`]) that can be used without +/// maintaining access to the descriptor table itself. +pub struct EntryHandle( + Arc>, + PhantomData, +); +impl + EntryHandle +{ + pub fn with_entry(&self, f: impl FnOnce(&Subsystem::Entry) -> R) -> R { + f(self.0.read().as_subsystem::()) + } + + pub fn with_entry_mut(&self, f: impl FnOnce(&mut Subsystem::Entry) -> R) -> R { + f(self.0.write().as_subsystem_mut::()) + } +} + /// Result of a [`Descriptors::close_and_duplicate_if_shared`] operation pub(crate) enum CloseResult { /// The FD was the last reference and has been closed, returning the entry @@ -621,6 +666,14 @@ impl RawDescriptorStorage { drop(underlying); Ok(ret) } + + /// Returns an iterator over raw integer indices that are currently alive (i.e., occupied). + pub fn iter_alive(&self) -> impl Iterator + '_ { + self.stored_fds + .iter() + .enumerate() + .filter_map(|(i, slot)| slot.as_ref().map(|_| i)) + } } macro_rules! multi_subsystem_generic { @@ -693,14 +746,13 @@ impl RawDescriptorStorage { multi_subsystem_generic! {invoke_matching_subsystem_4, typed_fd_at_raw_4, f1 S1, f2 S2, f3 S3, f4 S4} } -/// LiteBox subsystems that support having file descriptors. +/// A LiteBox subsystem that support having file descriptors. pub trait FdEnabledSubsystem: Sized { - #[doc(hidden)] + /// The per-FD entry type stored in the descriptor table for this subsystem type Entry: FdEnabledSubsystemEntry + 'static; } -/// Entries for a specific [`FdEnabledSubsystem`] -#[doc(hidden)] +/// A per-FD entry stored in the descriptor table for a specific [`FdEnabledSubsystem`] pub trait FdEnabledSubsystemEntry: Send + Sync + core::any::Any {} /// Possible errors from [`RawDescriptorStorage::fd_from_raw_integer`] and @@ -881,7 +933,6 @@ macro_rules! enable_fds_for_subsystem { $entry:ty; $(-> $fd:ident $(<$($fd_param:ident),*>)?;)? ) => { - #[allow(unused, reason = "NOTE(jayb): remove this lint before merging the PR")] #[doc(hidden)] // This wrapper type exists just to make sure `$entry` itself is not public, but we can // still satisfy requirements for `FdEnabledSubsystem`. diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index cd2fa1420..94904883b 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -48,6 +48,8 @@ pub mod syscalls; pub mod transport; mod wait; +use crate::syscalls::file::get_file_descriptor_flags; + pub type DefaultFS = LinuxFS; pub(crate) type LinuxFS = litebox::fs::layered::FileSystem< @@ -230,7 +232,9 @@ impl LinuxShim { egid, } = task; - let files = Arc::new(syscalls::file::FilesState::new(fs)); + let files = syscalls::file::FilesState::new(fs); + files.set_max_fd(syscalls::process::RLIMIT_NOFILE_CUR - 1); + let files = Arc::new(files); files.initialize_stdio_in_shared_descriptors_table(&self.0); let entrypoints = crate::LinuxShimEntrypoints { @@ -373,180 +377,58 @@ impl syscalls::file::FilesState { type ConstPtr = ::RawConstPointer; type MutPtr = ::RawMutPointer; -struct Descriptors { - descriptors: Vec>>, -} - -impl Descriptors { - fn new() -> Self { - Self { - descriptors: vec![ - Some(Descriptor::LiteBoxRawFd(0)), - Some(Descriptor::LiteBoxRawFd(1)), - Some(Descriptor::LiteBoxRawFd(2)), - ], - } - } - /// Inserts a descriptor at the first available file descriptor number, - /// respecting the RLIMIT_NOFILE limit for the task. - /// - /// Returns the assigned file descriptor number, or the descriptor back on failure - /// if the limit is exceeded. - fn insert( - &mut self, - task: &Task, - descriptor: Descriptor, - ) -> Result> { - self.insert_in_range( - descriptor, - 0, - task.process() - .limits - .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE), - ) - } - /// Inserts a descriptor at the first available slot within the specified range [min_idx, max_idx). - /// - /// Automatically grows the descriptor table if needed. Returns the assigned file descriptor number, - /// or the descriptor back if no slot is found within the limit. - fn insert_in_range( - &mut self, - descriptor: Descriptor, - min_idx: usize, - max_idx: usize, - ) -> Result> { - let idx = self - .descriptors - .iter() - .skip(min_idx) - .position(Option::is_none) - .unwrap_or_else(|| { - self.descriptors.push(None); - self.descriptors.len() - 1 - }); - if idx >= max_idx { - return Err(descriptor); - } - let old = self.descriptors[idx].replace(descriptor); - assert!(old.is_none()); - Ok(u32::try_from(idx).unwrap()) - } - /// Attempts to insert a descriptor at a specific file descriptor number, - /// respecting the RLIMIT_NOFILE limit for the task. - /// - /// Returns the previous descriptor at that slot (if any), or the new descriptor back on failure - /// if the index exceeds the limit. - fn insert_at( - &mut self, - task: &Task, - descriptor: Descriptor, - idx: usize, - ) -> Result>, Descriptor> { - if idx - >= task - .process() - .limits - .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE) - { - return Err(descriptor); - } - if idx >= self.descriptors.len() { - self.descriptors.resize_with(idx + 1, Default::default); - } - Ok(self - .descriptors - .get_mut(idx) - .and_then(|v| v.replace(descriptor))) - } - fn remove(&mut self, fd: u32) -> Option> { - let fd = fd as usize; - self.descriptors.get_mut(fd)?.take() - } - fn get_fd(&self, fd: u32) -> Option<&Descriptor> { - self.descriptors.get(fd as usize)?.as_ref() - } - - fn len(&self) -> usize { - self.descriptors.len() - } -} - impl Task { fn close_on_exec(&self) { let files = self.files.borrow(); - files - .file_descriptors - .write() - .descriptors - .iter_mut() - .for_each(|slot| { - if let Some(desc) = slot.take() - && let Ok(flags) = desc.get_file_descriptor_flags(&self.global, &files) - { - if flags.contains(litebox_common_linux::FileDescriptorFlags::FD_CLOEXEC) { - let _ = self.do_close(desc); - } else { - *slot = Some(desc); - } - } - }); - } -} - -enum Descriptor { - LiteBoxRawFd(usize), - Eventfd { - file: alloc::sync::Arc>, - close_on_exec: core::sync::atomic::AtomicBool, - }, - Epoll { - file: alloc::sync::Arc>, - close_on_exec: core::sync::atomic::AtomicBool, - }, - Unix { - file: alloc::sync::Arc>, - close_on_exec: core::sync::atomic::AtomicBool, - }, -} - -/// A strongly-typed FD. -/// -/// This enum only ever stores `Arc>`s, and should not store any additional data -/// alongside them (i.e., it is a trivial tagged union across the subsystems being used). -enum StrongFd { - FileSystem(Arc>), - Network(Arc>>), - Pipes(Arc>>), -} -impl StrongFd { - fn from_raw(files: &syscalls::file::FilesState, fd: usize) -> Result { - let rds = files.raw_descriptor_store.read(); - if let Ok(fd) = rds.fd_from_raw_integer::(fd) { - return Ok(StrongFd::FileSystem(fd)); - } - if let Ok(fd) = rds.fd_from_raw_integer::>(fd) { - return Ok(StrongFd::Network(fd)); - } - if let Ok(fd) = rds.fd_from_raw_integer::>(fd) { - return Ok(StrongFd::Pipes(fd)); + let alive_fds: Vec = files.raw_descriptor_store.read().iter_alive().collect(); + for raw_fd in alive_fds { + if let Ok(flags) = get_file_descriptor_flags(raw_fd, &self.global, &files) + && flags.contains(litebox_common_linux::FileDescriptorFlags::FD_CLOEXEC) + { + let _ = self.do_close(raw_fd); + } } - Err(Errno::EBADF) } } impl syscalls::file::FilesState { + #[expect(clippy::too_many_arguments)] pub(crate) fn run_on_raw_fd( &self, fd: usize, fs: impl FnOnce(&TypedFd) -> R, net: impl FnOnce(&TypedFd>) -> R, pipes: impl FnOnce(&TypedFd>) -> R, + eventfd: impl FnOnce(&TypedFd) -> R, + epoll: impl FnOnce(&TypedFd>) -> R, + unix: impl FnOnce(&TypedFd>) -> R, ) -> Result { - match StrongFd::::from_raw(self, fd)? { - StrongFd::FileSystem(fd) => Ok(fs(&fd)), - StrongFd::Network(fd) => Ok(net(&fd)), - StrongFd::Pipes(fd) => Ok(pipes(&fd)), + let rds = self.raw_descriptor_store.read(); + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(fs(&fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(net(&fd)); } + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(pipes(&fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(eventfd(&fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(epoll(&fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer(fd) { + drop(rds); + return Ok(unix(&fd)); + } + Err(Errno::EBADF) } } diff --git a/litebox_shim_linux/src/syscalls/epoll.rs b/litebox_shim_linux/src/syscalls/epoll.rs index 74e7c815a..71755bd39 100644 --- a/litebox_shim_linux/src/syscalls/epoll.rs +++ b/litebox_shim_linux/src/syscalls/epoll.rs @@ -15,13 +15,20 @@ use litebox::{ polling::{Pollee, TryOpError}, wait::{WaitContext, WaitError, Waker}, }, + fd::{FdEnabledSubsystem, FdEnabledSubsystemEntry, TypedFd}, utils::ReinterpretUnsignedExt, }; use litebox_common_linux::{EpollEvent, EpollOp, errno::Errno}; use litebox_platform_multiplex::Platform; use super::file::FilesState; -use crate::{Descriptor, GlobalState, ShimFS, StrongFd}; +use crate::{GlobalState, ShimFS}; + +pub(crate) struct EpollSubsystem(core::marker::PhantomData); +impl FdEnabledSubsystem for EpollSubsystem { + type Entry = EpollFile; +} +impl FdEnabledSubsystemEntry for EpollFile {} bitflags::bitflags! { /// Linux's epoll flags. @@ -35,36 +42,46 @@ bitflags::bitflags! { } pub(crate) enum EpollDescriptor { - Eventfd(Arc>), - Epoll(Arc>), + Eventfd(Arc>), + Epoll(Arc>>), File(Arc>), Socket(Arc), Pipe(Arc>), - Unix(Arc>), + Unix(Arc>>), } impl EpollDescriptor { - pub fn try_from(files: &FilesState, desc: &Descriptor) -> Result { - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => match StrongFd::::from_raw(files, *raw_fd)? { - StrongFd::FileSystem(fd) => Ok(EpollDescriptor::File(fd)), - StrongFd::Network(fd) => Ok(EpollDescriptor::Socket(fd)), - StrongFd::Pipes(fd) => Ok(EpollDescriptor::Pipe(fd)), - }, - Descriptor::Eventfd { file, .. } => Ok(EpollDescriptor::Eventfd(file.clone())), - Descriptor::Epoll { file, .. } => Ok(EpollDescriptor::Epoll(file.clone())), - Descriptor::Unix { file, .. } => Ok(EpollDescriptor::Unix(file.clone())), + pub fn try_from(files: &FilesState, raw_fd: usize) -> Result { + let rds = files.raw_descriptor_store.read(); + if let Ok(fd) = rds.fd_from_raw_integer::(raw_fd) { + return Ok(EpollDescriptor::File(fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer::>(raw_fd) { + return Ok(EpollDescriptor::Socket(fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer::>(raw_fd) { + return Ok(EpollDescriptor::Pipe(fd)); } + if let Ok(fd) = rds.fd_from_raw_integer::(raw_fd) { + return Ok(EpollDescriptor::Eventfd(fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer::>(raw_fd) { + return Ok(EpollDescriptor::Epoll(fd)); + } + if let Ok(fd) = rds.fd_from_raw_integer::>(raw_fd) { + return Ok(EpollDescriptor::Unix(fd)); + } + Err(Errno::EBADF) } } enum DescriptorRef { - Eventfd(Weak>), - Epoll(Weak>), + Eventfd(Weak>), + Epoll(Weak>>), File(Weak>), Socket(Weak), Pipe(Weak>), - Unix(Weak>), + Unix(Weak>>), } impl DescriptorRef { @@ -106,12 +123,15 @@ impl EpollDescriptor { } iop.check_io_events() & (mask | Events::ALWAYS_POLLED) }; - let io_pollable: &dyn IOPollable = match self { - EpollDescriptor::Eventfd(file) => file, + match self { + EpollDescriptor::Eventfd(fd) => { + let handle = global.litebox.descriptor_table().entry_handle(fd)?; + Some(handle.with_entry(|entry| poll(entry))) + } EpollDescriptor::Epoll(_file) => unimplemented!(), EpollDescriptor::File(_file) => { // TODO: probably polling on stdio files, return dummy events for now - return Some(Events::OUT & mask); + Some(Events::OUT & mask) } EpollDescriptor::Socket(fd) => { let proxy = match global.get_proxy(fd) { @@ -121,14 +141,14 @@ impl EpollDescriptor { return None; } }; - return Some(poll(&proxy)); + Some(poll(&proxy)) } - EpollDescriptor::Pipe(fd) => { - return global.pipes.with_iopollable(fd, poll).ok(); + EpollDescriptor::Pipe(fd) => global.pipes.with_iopollable(fd, poll).ok(), + EpollDescriptor::Unix(fd) => { + let handle = global.litebox.descriptor_table().entry_handle(fd)?; + Some(handle.with_entry(|entry| poll(entry))) } - EpollDescriptor::Unix(file) => file, - }; - Some(poll(io_pollable)) + } } } @@ -507,12 +527,11 @@ impl PollSet { waker: Option<&Waker>, ) -> bool { let mut is_ready = false; - let fds = files.file_descriptors.read(); for entry in &mut self.entries { entry.revents = if entry.fd < 0 { continue; - } else if let Some(file) = fds.get_fd(entry.fd.reinterpret_as_unsigned()) - && let Ok(poll_descriptor) = EpollDescriptor::try_from(files, file) + } else if let Ok(poll_descriptor) = + EpollDescriptor::try_from(files, entry.fd.reinterpret_as_unsigned() as usize) { let observer = if !is_ready && let Some(waker) = waker { // TODO: a separate allocation is necessary here @@ -598,7 +617,6 @@ mod test { use alloc::sync::Arc; use litebox::event::Events; use litebox::event::wait::WaitState; - use litebox::utils::ReinterpretUnsignedExt as _; use litebox_common_linux::{EfdFlags, EpollEvent}; use litebox_platform_multiplex::platform; @@ -617,15 +635,22 @@ mod test { #[test] fn test_epoll_with_eventfd() { let (task, epoll) = setup_epoll(); - let eventfd = Arc::new(crate::syscalls::eventfd::EventFile::new( - 0, - EfdFlags::CLOEXEC, - )); + let eventfd = crate::syscalls::eventfd::EventFile::new(0, EfdFlags::CLOEXEC); + let typed = task + .global + .litebox + .descriptor_table_mut() + .insert::(eventfd); + let files = Arc::new(FilesState::new(task.files.borrow().fs.clone())); + let Ok(raw_fd) = files.insert_raw_fd(typed) else { + unreachable!() + }; + let descriptor = super::EpollDescriptor::try_from(&files, raw_fd).unwrap(); epoll .add_interest( &task.global, 10, - &super::EpollDescriptor::Eventfd(eventfd.clone()), + &descriptor, EpollEvent { events: Events::IN.bits(), data: 0, @@ -634,12 +659,23 @@ mod test { .unwrap(); // spawn a thread to write to the eventfd - let copied_eventfd = eventfd.clone(); - std::thread::spawn(move || { - copied_eventfd - .write(&WaitState::new(platform()).context(), 1) - .unwrap(); - }); + { + let global = task.global.clone(); + let files = Arc::clone(&files); + std::thread::spawn(move || { + let typed = files + .raw_descriptor_store + .read() + .fd_from_raw_integer::(raw_fd) + .unwrap(); + let _ = global + .litebox + .descriptor_table() + .with_entry(&typed, |entry| { + entry.write(&WaitState::new(platform()).context(), 1) + }); + }); + } epoll .wait(&task.global, &WaitState::new(platform()).context(), 1024) .unwrap(); @@ -694,24 +730,19 @@ mod test { let task = crate::syscalls::tests::init_platform(None); let mut set = super::PollSet::with_capacity(0); - let eventfd = Arc::new(crate::syscalls::eventfd::EventFile::new( - 0, - EfdFlags::empty(), - )); - - let fd = 10i32; - let descriptor = crate::Descriptor::Eventfd { - file: eventfd.clone(), - close_on_exec: core::sync::atomic::AtomicBool::new(false), - }; + let eventfd = crate::syscalls::eventfd::EventFile::new(0, EfdFlags::empty()); + let typed = task + .global + .litebox + .descriptor_table_mut() + .insert::(eventfd); let no_fds = FilesState::new(task.files.borrow().fs.clone()); - let fds = FilesState::new(task.files.borrow().fs.clone()); - let _ = fds.file_descriptors.write().insert_at( - &task, - descriptor, - fd.reinterpret_as_unsigned() as usize, - ); + let fds = Arc::new(FilesState::new(task.files.borrow().fs.clone())); + let Ok(raw_fd) = fds.insert_raw_fd(typed) else { + unreachable!() + }; + let fd = i32::try_from(raw_fd).unwrap(); set.add_fd(fd, Events::IN); let revents = |set: &super::PollSet| { @@ -724,14 +755,36 @@ mod test { .unwrap(); assert_eq!(revents(&set), Events::NVAL); - eventfd - .write(&WaitState::new(platform()).context(), 1) - .unwrap(); + { + let typed = fds + .raw_descriptor_store + .read() + .fd_from_raw_integer::(raw_fd) + .unwrap(); + task.global + .litebox + .descriptor_table() + .with_entry(&typed, |entry| { + entry.write(&WaitState::new(platform()).context(), 1) + }); + } set.wait(&task.global, &WaitState::new(platform()).context(), &fds) .unwrap(); assert_eq!(revents(&set), Events::IN); - eventfd.read(&WaitState::new(platform()).context()).unwrap(); + { + let typed = fds + .raw_descriptor_store + .read() + .fd_from_raw_integer::(raw_fd) + .unwrap(); + task.global + .litebox + .descriptor_table() + .with_entry(&typed, |entry| { + entry.read(&WaitState::new(platform()).context()) + }); + } set.wait( &task.global, &WaitState::new(platform()) @@ -743,11 +796,21 @@ mod test { assert!(revents(&set).is_empty()); // spawn a thread to write to the eventfd - let copied_eventfd = eventfd.clone(); + let global = task.global.clone(); + let fds_for_thread = Arc::clone(&fds); std::thread::spawn(move || { - copied_eventfd - .write(&WaitState::new(platform()).context(), 1) + let typed = fds_for_thread + .raw_descriptor_store + .read() + .fd_from_raw_integer::(raw_fd) + .unwrap(); + let handle = global + .litebox + .descriptor_table() + .entry_handle(&typed) .unwrap(); + let _ = + handle.with_entry(|entry| entry.write(&WaitState::new(platform()).context(), 1)); }); set.wait(&task.global, &WaitState::new(platform()).context(), &fds) diff --git a/litebox_shim_linux/src/syscalls/eventfd.rs b/litebox_shim_linux/src/syscalls/eventfd.rs index a66c5991f..d73d3877f 100644 --- a/litebox_shim_linux/src/syscalls/eventfd.rs +++ b/litebox_shim_linux/src/syscalls/eventfd.rs @@ -12,11 +12,19 @@ use litebox::{ polling::{Pollee, TryOpError}, wait::WaitContext, }, + fd::{FdEnabledSubsystem, FdEnabledSubsystemEntry}, fs::OFlags, platform::TimeProvider, sync::RawSyncPrimitivesProvider, }; use litebox_common_linux::{EfdFlags, errno::Errno}; +use litebox_platform_multiplex::Platform; + +pub(crate) struct EventfdSubsystem; +impl FdEnabledSubsystem for EventfdSubsystem { + type Entry = EventFile; +} +impl FdEnabledSubsystemEntry for EventFile {} pub(crate) struct EventFile { counter: litebox::sync::Mutex, diff --git a/litebox_shim_linux/src/syscalls/file.rs b/litebox_shim_linux/src/syscalls/file.rs index e90f5a464..922a539e7 100644 --- a/litebox_shim_linux/src/syscalls/file.rs +++ b/litebox_shim_linux/src/syscalls/file.rs @@ -22,8 +22,8 @@ use litebox_common_linux::{ }; use litebox_platform_multiplex::Platform; -use crate::{ConstPtr, Descriptor, Descriptors, GlobalState, MutPtr, ShimFS, Task}; -use core::sync::atomic::Ordering; +use crate::{ConstPtr, GlobalState, MutPtr, ShimFS, Task}; +use core::sync::atomic::{AtomicUsize, Ordering}; /// Task state shared by `CLONE_FS`. pub(crate) struct FsState { @@ -60,20 +60,43 @@ impl FsState { pub(crate) struct FilesState { /// The filesystem implementation, shared across tasks that share file system. pub(crate) fs: alloc::sync::Arc, - pub file_descriptors: litebox::sync::RwLock>, - pub raw_descriptor_store: litebox::sync::RwLock, + pub(crate) raw_descriptor_store: + litebox::sync::RwLock, + max_fd: AtomicUsize, } impl FilesState { - pub fn new(fs: alloc::sync::Arc) -> Self { + pub(crate) fn new(fs: alloc::sync::Arc) -> Self { Self { fs, - file_descriptors: litebox::sync::RwLock::new(Descriptors::new()), raw_descriptor_store: litebox::sync::RwLock::new( litebox::fd::RawDescriptorStorage::new(), ), + max_fd: AtomicUsize::new(usize::MAX), } } + + pub(crate) fn set_max_fd(&self, max_fd: usize) { + self.max_fd.store(max_fd, Ordering::Relaxed); + } + + // Returns Ok(raw_fd) if it fits within the max limits already set up; otherwise returns the + // Err(typed_fd) + pub(crate) fn insert_raw_fd( + &self, + typed_fd: TypedFd, + ) -> Result> { + // XXX(jb): should we try to somehow enforce that it is set at the smallest + // available/unassigned FD number? + let mut rds = self.raw_descriptor_store.write(); + let raw_fd = rds.fd_into_raw_integer(typed_fd); + let max_fd = self.max_fd.load(Ordering::Relaxed); + if raw_fd > max_fd { + let orig = rds.fd_consume_raw_integer::(raw_fd).unwrap(); + return Err(alloc::sync::Arc::into_inner(orig).unwrap()); + } + Ok(raw_fd) + } } /// Path in the file system @@ -185,12 +208,11 @@ impl Task { }; } let files = self.files.borrow(); - let raw_fd = files.raw_descriptor_store.write().fd_into_raw_integer(file); - files - .file_descriptors - .write() - .insert(self, Descriptor::LiteBoxRawFd(raw_fd)) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE)) + let raw_fd = files.insert_raw_fd(file).map_err(|file| { + files.fs.close(&file).unwrap(); + Errno::EMFILE + })?; + Ok(u32::try_from(raw_fd).unwrap()) } /// Handle syscall `openat` @@ -219,22 +241,21 @@ impl Task { /// Handle syscall `ftruncate` pub(crate) fn sys_ftruncate(&self, fd: i32, length: usize) -> Result<(), Errno> { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let file_table = files.file_descriptors.read(); - let desc = file_table.get_fd(fd).ok_or(Errno::EBADF)?; - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, + files + .run_on_raw_fd( + raw_fd, |fd| files.fs.truncate(fd, length, false).map_err(Errno::from), |_fd| todo!("net"), |_fd| todo!("pipes"), - ), - _ => Err(Errno::EINVAL), - } - .flatten() + |_fd| Err(Errno::EINVAL), + |_fd| Err(Errno::EINVAL), + |_fd| Err(Errno::EINVAL), + ) + .flatten() } /// Handle syscall `unlinkat` @@ -268,64 +289,73 @@ impl Task { /// `offset` is an optional offset to read from. If `None`, it will read from the current file position. /// If `Some`, it will read from the specified offset without changing the current file position. pub fn sys_read(&self, fd: i32, buf: &mut [u8], offset: Option) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let file_table = files.file_descriptors.read(); - let desc = file_table.get_fd(fd).ok_or(Errno::EBADF)?; - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => { - let raw_fd = *raw_fd; - drop(file_table); - // We need to do this cell dance because otherwise Rust can't recognize that the two - // closures are mutually exclusive. - let buf: core::cell::RefCell<&mut [u8]> = core::cell::RefCell::new(buf); - files - .run_on_raw_fd( - raw_fd, - |fd| { - files - .fs - .read(fd, &mut buf.borrow_mut(), offset) - .map_err(Errno::from) - }, - |fd| { - self.global.receive( - &self.wait_cx(), - fd, - &mut buf.borrow_mut(), - litebox_common_linux::ReceiveFlags::empty(), - None, - ) - }, - |fd| { - self.global - .pipes - .read(&self.wait_cx(), fd, &mut buf.borrow_mut()) - .map_err(Errno::from) - }, + // We need to do this cell dance because otherwise Rust can't recognize that the two + // closures are mutually exclusive. + let buf: core::cell::RefCell<&mut [u8]> = core::cell::RefCell::new(buf); + files + .run_on_raw_fd( + raw_fd, + |fd| { + files + .fs + .read(fd, &mut buf.borrow_mut(), offset) + .map_err(Errno::from) + }, + |fd| { + self.global.receive( + &self.wait_cx(), + fd, + &mut buf.borrow_mut(), + litebox_common_linux::ReceiveFlags::empty(), + None, ) - .flatten() - } - Descriptor::Epoll { .. } => Err(Errno::EINVAL), - Descriptor::Eventfd { file, .. } => { - let file = file.clone(); - drop(file_table); - if buf.len() < size_of::() { - return Err(Errno::EINVAL); - } - let value = file.read(&self.wait_cx())?; - buf[..size_of::()].copy_from_slice(&value.to_le_bytes()); - Ok(size_of::()) - } - Descriptor::Unix { file, .. } => file.recvfrom( - &self.wait_cx(), - buf, - litebox_common_linux::ReceiveFlags::empty(), - None, - ), - } + }, + |fd| { + self.global + .pipes + .read(&self.wait_cx(), fd, &mut buf.borrow_mut()) + .map_err(Errno::from) + }, + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + let buf = &mut buf.borrow_mut(); + if buf.len() < size_of::() { + return Err(Errno::EINVAL); + } + let value = file.read(&self.wait_cx())?; + buf[..size_of::()].copy_from_slice(&value.to_le_bytes()); + Ok(size_of::()) + }) + }, + |_fd| Err(Errno::EINVAL), + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + file.recvfrom( + &self.wait_cx(), + &mut buf.borrow_mut(), + litebox_common_linux::ReceiveFlags::empty(), + None, + ) + }) + }, + ) + .flatten() } /// Handle syscall `write` @@ -333,53 +363,59 @@ impl Task { /// `offset` is an optional offset to write to. If `None`, it will write to the current file position. /// If `Some`, it will write to the specified offset without changing the current file position. pub fn sys_write(&self, fd: i32, buf: &[u8], offset: Option) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let file_table = files.file_descriptors.read(); - let desc = file_table.get_fd(fd).ok_or(Errno::EBADF)?; - let res = match desc { - Descriptor::LiteBoxRawFd(raw_fd) => { - let raw_fd = *raw_fd; - drop(file_table); - files - .run_on_raw_fd( - raw_fd, - |fd| files.fs.write(fd, buf, offset).map_err(Errno::from), - |fd| { - self.global.sendto( - &self.wait_cx(), - fd, - buf, - litebox_common_linux::SendFlags::empty(), - None, - ) - }, - |fd| { - self.global - .pipes - .write(&self.wait_cx(), fd, buf) - .map_err(Errno::from) - }, + let res = files + .run_on_raw_fd( + raw_fd, + |fd| files.fs.write(fd, buf, offset).map_err(Errno::from), + |fd| { + self.global.sendto( + &self.wait_cx(), + fd, + buf, + litebox_common_linux::SendFlags::empty(), + None, ) - .flatten() - } - Descriptor::Epoll { .. } => Err(Errno::EINVAL), - Descriptor::Eventfd { file, .. } => { - let file = file.clone(); - drop(file_table); - let value: u64 = u64::from_le_bytes( - buf[..size_of::()] - .try_into() - .map_err(|_| Errno::EINVAL)?, - ); - file.write(&self.wait_cx(), value) - } - Descriptor::Unix { file, .. } => { - file.sendto(self, buf, litebox_common_linux::SendFlags::empty(), None) - } - }; + }, + |fd| { + self.global + .pipes + .write(&self.wait_cx(), fd, buf) + .map_err(Errno::from) + }, + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + let value: u64 = u64::from_le_bytes( + buf[..size_of::()] + .try_into() + .map_err(|_| Errno::EINVAL)?, + ); + file.write(&self.wait_cx(), value) + }) + }, + |_fd| Err(Errno::EINVAL), + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + file.sendto(self, buf, litebox_common_linux::SendFlags::empty(), None) + }) + }, + ) + .flatten(); if let Err(Errno::EPIPE) = res { unimplemented!("send SIGPIPE to the current task"); } @@ -415,25 +451,21 @@ pub(crate) fn try_into_whence(value: i16) -> Result { impl Task { /// Handle syscall `lseek` pub fn sys_lseek(&self, fd: i32, offset: isize, whence: SeekWhence) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let file_table = files.file_descriptors.read(); - let desc = file_table.get_fd(fd).ok_or(Errno::EBADF)?; - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files - .run_on_raw_fd( - *raw_fd, - |fd| files.fs.seek(fd, offset, whence).map_err(Errno::from), - |_| Err(Errno::ESPIPE), - |_| Err(Errno::ESPIPE), - ) - .flatten(), - Descriptor::Epoll { .. } | Descriptor::Eventfd { .. } | Descriptor::Unix { .. } => { - Err(Errno::ESPIPE) - } - } + files + .run_on_raw_fd( + raw_fd, + |fd| files.fs.seek(fd, offset, whence).map_err(Errno::from), + |_| Err(Errno::ESPIPE), + |_| Err(Errno::ESPIPE), + |_| Err(Errno::ESPIPE), + |_| Err(Errno::ESPIPE), + |_| Err(Errno::ESPIPE), + ) + .flatten() } /// Handle syscall `mkdir` @@ -447,64 +479,67 @@ impl Task { .map_err(Errno::from) } - pub(crate) fn do_close(&self, desc: Descriptor) -> Result<(), Errno> { + pub(crate) fn do_close(&self, raw_fd: usize) -> Result<(), Errno> { let files = self.files.borrow(); - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => { - let mut rds = files.raw_descriptor_store.write(); - match rds.fd_consume_raw_integer(raw_fd) { - Ok(fd) => { - drop(rds); - files.fs.close(&fd).map_err(Errno::from) - } - Err(litebox::fd::ErrRawIntFd::NotFound) => Err(Errno::EBADF), - Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => { - match rds - .fd_consume_raw_integer::>(raw_fd) - { - Ok(fd) => { - drop(rds); - self.global.close_socket(&self.wait_cx(), fd) - }, - Err(litebox::fd::ErrRawIntFd::NotFound) => Err(Errno::EBADF), - Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => { - match rds.fd_consume_raw_integer::>(raw_fd) { - Ok(fd) => { - drop(rds); - self.global.pipes.close(&fd).map_err(Errno::from) - } - Err(litebox::fd::ErrRawIntFd::NotFound) => Err(Errno::EBADF), - Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => { - // We currently only have fs, net and pipes FDs at the moment, - // if/when we add more, we need to expand this out too. - unreachable!() - } - } - } - } - } - } + let mut rds = files.raw_descriptor_store.write(); + match rds.fd_consume_raw_integer(raw_fd) { + Ok(fd) => { + drop(rds); + return files.fs.close(&fd).map_err(Errno::from); } - Descriptor::Eventfd { .. } | Descriptor::Epoll { .. } | Descriptor::Unix { .. } => { - Ok(()) + Err(litebox::fd::ErrRawIntFd::NotFound) => { + return Err(Errno::EBADF); } + Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => { + // fallthrough + } + } + if let Ok(fd) = rds.fd_consume_raw_integer(raw_fd) { + drop(rds); + return self.global.close_socket(&self.wait_cx(), fd); + } + if let Ok(fd) = rds.fd_consume_raw_integer(raw_fd) { + drop(rds); + return self.global.pipes.close(&fd).map_err(Errno::from); + } + if let Ok(fd) = rds.fd_consume_raw_integer::(raw_fd) { + drop(rds); + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + drop(entry); + return Ok(()); + } + if let Ok(fd) = rds.fd_consume_raw_integer::>(raw_fd) { + drop(rds); + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + drop(entry); + return Ok(()); } + if let Ok(fd) = rds.fd_consume_raw_integer::>(raw_fd) { + drop(rds); + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + drop(entry); + return Ok(()); + } + // All the above cases should cover all the known subsystems, and we've already + // early-handled the "raw FD not found" case. + unreachable!() } /// Handle syscall `close` pub(crate) fn sys_close(&self, fd: i32) -> Result<(), Errno> { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; - let files = self.files.borrow(); - let mut file_table = files.file_descriptors.write(); - match file_table.remove(fd) { - Some(desc) => { - drop(file_table); // drop before potentially blocking `close` - self.do_close(desc) - } - None => Err(Errno::EBADF), - } + self.do_close(raw_fd) } /// Handle syscall `readv` @@ -514,13 +549,11 @@ impl Task { iovec: ConstPtr>>, iovcnt: usize, ) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let iovs: &[IoReadVec>] = &iovec.to_owned_slice(iovcnt).ok_or(Errno::EFAULT)?; let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let desc = locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)?; let mut total_read = 0; let mut kernel_buffer = vec![ 0u8; @@ -540,24 +573,22 @@ impl Task { // TODO: The data transfers performed by readv() and writev() are atomic: the data // written by writev() is written as a single block that is not intermingled with // output from writes in other processes - let size = match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files - .run_on_raw_fd( - *raw_fd, - |fd| { - files - .fs - .read(fd, &mut kernel_buffer, None) - .map_err(Errno::from) - }, - |_fd| todo!("net"), - |_fd| todo!("pipes"), - ) - .flatten()?, - Descriptor::Epoll { .. } => return Err(Errno::EINVAL), - Descriptor::Eventfd { .. } => todo!(), - Descriptor::Unix { .. } => todo!(), - }; + let size = files + .run_on_raw_fd( + raw_fd, + |fd| { + files + .fs + .read(fd, &mut kernel_buffer, None) + .map_err(Errno::from) + }, + |_fd| todo!("net"), + |_fd| todo!("pipes"), + |_fd| todo!("eventfd"), + |_fd| Err(Errno::EINVAL), + |_fd| todo!("unix"), + ) + .flatten()?; iov.iov_base .copy_from_slice(0, &kernel_buffer[..size]) .ok_or(Errno::EFAULT)?; @@ -602,48 +633,40 @@ impl Task { iovec: ConstPtr>>, iovcnt: usize, ) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let iovs: &[IoWriteVec>] = &iovec.to_owned_slice(iovcnt).ok_or(Errno::EFAULT)?; let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let desc = locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)?; // TODO: The data transfers performed by readv() and writev() are atomic: the data // written by writev() is written as a single block that is not intermingled with // output from writes in other processes - let res = match desc { - Descriptor::LiteBoxRawFd(raw_fd) => { - let raw_fd = *raw_fd; - drop(locked_file_descriptors); // drop before potentially blocking write - files - .run_on_raw_fd( - raw_fd, - |fd| { - write_to_iovec(iovs, |buf: &[u8]| { - files.fs.write(fd, buf, None).map_err(Errno::from) - }) - }, - |fd| { - write_to_iovec(iovs, |buf| { - self.global.sendto( - &self.wait_cx(), - fd, - buf, - litebox_common_linux::SendFlags::empty(), - None, - ) - }) - }, - |_fd| todo!("pipes"), - ) - .flatten() - } - Descriptor::Epoll { .. } => Err(Errno::EINVAL), - Descriptor::Eventfd { .. } => todo!(), - Descriptor::Unix { .. } => todo!(), - }; + let res = files + .run_on_raw_fd( + raw_fd, + |fd| { + write_to_iovec(iovs, |buf: &[u8]| { + files.fs.write(fd, buf, None).map_err(Errno::from) + }) + }, + |fd| { + write_to_iovec(iovs, |buf| { + self.global.sendto( + &self.wait_cx(), + fd, + buf, + litebox_common_linux::SendFlags::empty(), + None, + ) + }) + }, + |_fd| todo!("pipes"), + |_fd| todo!("eventfd"), + |_fd| Err(Errno::EINVAL), + |_fd| todo!("unix"), + ) + .flatten(); if let Err(Errno::EPIPE) = res { unimplemented!("send SIGPIPE to the current task"); } @@ -733,183 +756,171 @@ impl Task { } } -impl Descriptor { - fn stat(&self, task: &Task) -> Result { - let fstat = match self { - Descriptor::LiteBoxRawFd(raw_fd) => task - .files - .borrow() - .run_on_raw_fd( - *raw_fd, - |fd| { - task.files - .borrow() - .fs - .fd_file_status(fd) - .map(FileStat::from) - .map_err(Errno::from) - }, - |_fd| { - Ok(FileStat { - // TODO: give correct values - st_dev: 0, - st_ino: 0, - st_nlink: 1, - st_mode: (litebox_common_linux::InodeType::Socket as u32 - | (Mode::RWXU | Mode::RWXG | Mode::RWXO).bits()) - .truncate(), - st_uid: 0, - st_gid: 0, - st_rdev: 0, - st_size: 0, - st_blksize: 4096, - st_blocks: 0, - ..Default::default() - }) - }, - |fd| { - let half_pipe_type = task.global.pipes.half_pipe_type(fd)?; - let read_write_mode = match half_pipe_type { - litebox::pipes::HalfPipeType::SenderHalf => Mode::WUSR, - litebox::pipes::HalfPipeType::ReceiverHalf => Mode::RUSR, - }; - Ok(FileStat { - // TODO: give correct values - st_dev: 0, - st_ino: 0, - st_nlink: 1, - st_mode: (read_write_mode.bits() - | litebox_common_linux::InodeType::NamedPipe as u32) - .truncate(), - st_uid: 0, - st_gid: 0, - st_rdev: 0, - st_size: 0, - st_blksize: 4096, - st_blocks: 0, - ..Default::default() - }) - }, - ) - .flatten()?, - Descriptor::Eventfd { .. } => FileStat { - // TODO: give correct values - st_dev: 0, - st_ino: 0, - st_nlink: 1, - st_mode: (Mode::RUSR | Mode::WUSR).bits().truncate(), - st_uid: 0, - st_gid: 0, - st_rdev: 0, - st_size: 0, - st_blksize: 4096, - st_blocks: 0, - ..Default::default() +fn descriptor_stat(raw_fd: usize, task: &Task) -> Result { + let fstat = task + .files + .borrow() + .run_on_raw_fd( + raw_fd, + |fd| { + task.files + .borrow() + .fs + .fd_file_status(fd) + .map(FileStat::from) + .map_err(Errno::from) }, - Descriptor::Epoll { .. } => FileStat { - // TODO: give correct values - st_dev: 0, - st_ino: 0, - st_nlink: 1, - st_mode: (Mode::RUSR | Mode::WUSR).bits().truncate(), - st_uid: 0, - st_gid: 0, - st_rdev: 0, - st_size: 0, - st_blksize: 0, - st_blocks: 0, - ..Default::default() + |_fd| { + Ok(FileStat { + // TODO: give correct values + st_dev: 0, + st_ino: 0, + st_nlink: 1, + st_mode: (litebox_common_linux::InodeType::Socket as u32 + | (Mode::RWXU | Mode::RWXG | Mode::RWXO).bits()) + .truncate(), + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_blksize: 4096, + st_blocks: 0, + ..Default::default() + }) }, - Descriptor::Unix { .. } => FileStat { - // TODO: give correct values - st_dev: 0, - st_ino: 0, - st_nlink: 1, - st_mode: (litebox_common_linux::InodeType::Socket as u32 - | (Mode::RWXU | Mode::RWXG | Mode::RWXO).bits()) - .truncate(), - st_uid: 0, - st_gid: 0, - st_rdev: 0, - st_size: 0, - st_blksize: 4096, - st_blocks: 0, - ..Default::default() + |fd| { + let half_pipe_type = task.global.pipes.half_pipe_type(fd)?; + let read_write_mode = match half_pipe_type { + litebox::pipes::HalfPipeType::SenderHalf => Mode::WUSR, + litebox::pipes::HalfPipeType::ReceiverHalf => Mode::RUSR, + }; + Ok(FileStat { + // TODO: give correct values + st_dev: 0, + st_ino: 0, + st_nlink: 1, + st_mode: (read_write_mode.bits() + | litebox_common_linux::InodeType::NamedPipe as u32) + .truncate(), + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_blksize: 4096, + st_blocks: 0, + ..Default::default() + }) }, - }; - Ok(fstat) - } + |_fd| { + Ok(FileStat { + // TODO: give correct values + st_dev: 0, + st_ino: 0, + st_nlink: 1, + st_mode: (Mode::RUSR | Mode::WUSR).bits().truncate(), + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_blksize: 4096, + st_blocks: 0, + ..Default::default() + }) + }, + |_fd| { + Ok(FileStat { + // TODO: give correct values + st_dev: 0, + st_ino: 0, + st_nlink: 1, + st_mode: (Mode::RUSR | Mode::WUSR).bits().truncate(), + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_blksize: 0, + st_blocks: 0, + ..Default::default() + }) + }, + |_fd| { + Ok(FileStat { + // TODO: give correct values + st_dev: 0, + st_ino: 0, + st_nlink: 1, + st_mode: (litebox_common_linux::InodeType::Socket as u32 + | (Mode::RWXU | Mode::RWXG | Mode::RWXO).bits()) + .truncate(), + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_blksize: 4096, + st_blocks: 0, + ..Default::default() + }) + }, + ) + .flatten()?; + Ok(fstat) +} - pub(crate) fn get_file_descriptor_flags( - &self, +pub(crate) fn get_file_descriptor_flags( + raw_fd: usize, + global: &GlobalState, + files: &FilesState, +) -> Result { + // Currently, only one such flag is defined: FD_CLOEXEC, the close-on-exec flag. + // See https://www.man7.org/linux/man-pages/man2/F_GETFD.2const.html + fn get_flags( global: &GlobalState, - files: &FilesState, - ) -> Result { - // Currently, only one such flag is defined: FD_CLOEXEC, the close-on-exec flag. - // See https://www.man7.org/linux/man-pages/man2/F_GETFD.2const.html - fn get_flags( - global: &GlobalState, - fd: &TypedFd, - ) -> FileDescriptorFlags { - global - .litebox - .descriptor_table() - .with_metadata(fd, |flags: &FileDescriptorFlags| *flags) - .unwrap_or(FileDescriptorFlags::empty()) - } - match self { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, - |fd| get_flags(global, fd), - |fd| get_flags(global, fd), - |fd| get_flags(global, fd), - ), - Descriptor::Eventfd { close_on_exec, .. } - | Descriptor::Epoll { close_on_exec, .. } - | Descriptor::Unix { close_on_exec, .. } => Ok( - if close_on_exec.load(core::sync::atomic::Ordering::Relaxed) { - FileDescriptorFlags::FD_CLOEXEC - } else { - FileDescriptorFlags::empty() - }, - ), - } + fd: &TypedFd, + ) -> FileDescriptorFlags { + global + .litebox + .descriptor_table() + .with_metadata(fd, |flags: &FileDescriptorFlags| *flags) + .unwrap_or(FileDescriptorFlags::empty()) } - fn set_file_descriptor_flags( - &self, + files.run_on_raw_fd( + raw_fd, + |fd| get_flags(global, fd), + |fd| get_flags(global, fd), + |fd| get_flags(global, fd), + |fd| get_flags(global, fd), + |fd| get_flags(global, fd), + |fd| get_flags(global, fd), + ) +} + +fn set_file_descriptor_flags( + raw_fd: usize, + global: &GlobalState, + files: &FilesState, + flags: FileDescriptorFlags, +) -> Result<(), Errno> { + fn set_flags( global: &GlobalState, - files: &FilesState, + fd: &TypedFd, flags: FileDescriptorFlags, - ) -> Result<(), Errno> { - fn set_flags( - global: &GlobalState, - fd: &TypedFd, - flags: FileDescriptorFlags, - ) { - let _old = global - .litebox - .descriptor_table_mut() - .set_fd_metadata(fd, flags); - } - - match self { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, - |fd| set_flags(global, fd, flags), - |fd| set_flags(global, fd, flags), - |fd| set_flags(global, fd, flags), - )?, - Descriptor::Eventfd { close_on_exec, .. } - | Descriptor::Epoll { close_on_exec, .. } - | Descriptor::Unix { close_on_exec, .. } => { - close_on_exec.store( - flags.contains(FileDescriptorFlags::FD_CLOEXEC), - core::sync::atomic::Ordering::Relaxed, - ); - } - } - Ok(()) + ) { + let _old = global + .litebox + .descriptor_table_mut() + .set_fd_metadata(fd, flags); } + + files.run_on_raw_fd( + raw_fd, + |fd| set_flags(global, fd, flags), + |fd| set_flags(global, fd, flags), + |fd| set_flags(global, fd, flags), + |fd| set_flags(global, fd, flags), + |fd| set_flags(global, fd, flags), + |fd| set_flags(global, fd, flags), + )?; + Ok(()) } impl Task { @@ -946,16 +957,10 @@ impl Task { /// Handle syscall `fstat` pub fn sys_fstat(&self, fd: i32) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; - let files = self.files.borrow(); - files - .file_descriptors - .read() - .get_fd(fd) - .ok_or(Errno::EBADF)? - .stat(self) + descriptor_stat(raw_fd, self) } /// Handle syscall `newfstatat` @@ -978,12 +983,12 @@ impl Task { self.do_stat(path, !flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW))? } FsPath::Cwd => files.fs.file_status(get_cwd())?.into(), - FsPath::Fd(fd) => files - .file_descriptors - .read() - .get_fd(fd) - .ok_or(Errno::EBADF)? - .stat(self)?, + FsPath::Fd(fd) => { + let Ok(raw_fd) = usize::try_from(fd) else { + return Err(Errno::EBADF); + }; + descriptor_stat(raw_fd, self)? + } FsPath::FdRelative { .. } => todo!(), }; Ok(fstat) @@ -994,64 +999,81 @@ impl Task { fd: i32, arg: FcntlArg, ) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(desc) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let desc = locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)?; match arg { - FcntlArg::GETFD => Ok(locked_file_descriptors - .get_fd(fd) - .ok_or(Errno::EBADF)? - .get_file_descriptor_flags(&self.global, &files)? + FcntlArg::GETFD => Ok(get_file_descriptor_flags(desc, &self.global, &files)?.bits()), + FcntlArg::SETFD(flags) => { + set_file_descriptor_flags(desc, &self.global, &files, flags).map(|()| 0) + } + FcntlArg::GETFL => Ok(files + .run_on_raw_fd( + desc, + |fd| { + Ok(self + .global + .litebox + .descriptor_table() + .with_metadata(fd, |crate::StdioStatusFlags(flags)| { + *flags & OFlags::STATUS_FLAGS_MASK + }) + .unwrap_or(OFlags::empty())) + }, + |fd| { + Ok(self + .global + .litebox + .descriptor_table() + .with_metadata(fd, |crate::syscalls::net::SocketOFlags(flags)| { + *flags & OFlags::STATUS_FLAGS_MASK + }) + .unwrap_or(OFlags::empty())) + }, + |fd| { + let pipes = &self.global.pipes; + let flags = OFlags::from(pipes.get_flags(fd).map_err(Errno::from)?); + let dirn = match pipes.half_pipe_type(fd)? { + litebox::pipes::HalfPipeType::SenderHalf => OFlags::WRONLY, + litebox::pipes::HalfPipeType::ReceiverHalf => OFlags::RDONLY, + }; + Ok(dirn | flags) + }, + |fd| { + // TODO: Consider shared metadata table? + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| Ok(file.get_status())) + }, + |fd| { + // TODO: Consider shared metadata table? + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| Ok(file.get_status())) + }, + |fd| { + // TODO: Consider shared metadata table? + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| Ok(file.get_status())) + }, + ) + .flatten()? .bits()), - FcntlArg::SETFD(flags) => locked_file_descriptors - .get_fd(fd) - .ok_or(Errno::EBADF)? - .set_file_descriptor_flags(&self.global, &files, flags) - .map(|()| 0), - FcntlArg::GETFL => match desc { - Descriptor::LiteBoxRawFd(raw_fd) => Ok(files - .run_on_raw_fd( - *raw_fd, - |fd| { - Ok(self - .global - .litebox - .descriptor_table() - .with_metadata(fd, |crate::StdioStatusFlags(flags)| { - *flags & OFlags::STATUS_FLAGS_MASK - }) - .unwrap_or(OFlags::empty())) - }, - |fd| { - Ok(self - .global - .litebox - .descriptor_table() - .with_metadata(fd, |crate::syscalls::net::SocketOFlags(flags)| { - *flags & OFlags::STATUS_FLAGS_MASK - }) - .unwrap_or(OFlags::empty())) - }, - |fd| { - let pipes = &self.global.pipes; - let flags = OFlags::from(pipes.get_flags(fd).map_err(Errno::from)?); - let dirn = match pipes.half_pipe_type(fd)? { - litebox::pipes::HalfPipeType::SenderHalf => OFlags::WRONLY, - litebox::pipes::HalfPipeType::ReceiverHalf => OFlags::RDONLY, - }; - Ok(dirn | flags) - }, - ) - .flatten()? - .bits()), - Descriptor::Eventfd { file, .. } => Ok(file.get_status().bits()), - Descriptor::Epoll { file, .. } => Ok(file.get_status().bits()), - Descriptor::Unix { file, .. } => Ok(file.get_status().bits()), - }, FcntlArg::SETFL(flags) => { let setfl_mask = OFlags::APPEND | OFlags::NONBLOCK @@ -1059,90 +1081,101 @@ impl Task { | OFlags::DIRECT | OFlags::NOATIME; macro_rules! toggle_flags { - ($t:ident) => { + ($t:ident) => {{ let diff = $t.get_status() ^ flags; if diff.intersects(OFlags::APPEND | OFlags::DIRECT | OFlags::NOATIME) { todo!("unsupported flags"); } $t.set_status(flags & setfl_mask, true); $t.set_status(flags.complement() & setfl_mask, false); - }; - } - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, - |fd| { - self.global - .litebox - .descriptor_table_mut() - .with_metadata_mut(fd, |crate::StdioStatusFlags(f)| { - let diff = *f ^ flags; - if diff.intersects( - OFlags::APPEND | OFlags::DIRECT | OFlags::NOATIME, - ) { - todo!("unsupported flags"); - } - f.toggle(diff); - }) - .map_err(|err| match err { - MetadataError::ClosedFd => Errno::EBADF, - MetadataError::NoSuchMetadata => { - unimplemented!("SETFL on non-stdio") - } - }) - }, - |fd| { - self.global - .litebox - .descriptor_table_mut() - .with_metadata_mut(fd, |crate::syscalls::net::SocketOFlags(f)| { - let diff = *f ^ flags; - if diff.intersects( - OFlags::APPEND | OFlags::DIRECT | OFlags::NOATIME, - ) { - todo!("unsupported flags"); - } - f.toggle(diff); - }) - .map_err(|err| match err { - MetadataError::ClosedFd => Errno::EBADF, - MetadataError::NoSuchMetadata => { - unreachable!("all sockets have SocketOFlags when created") - } - }) - }, - |fd| { - if flags.intersects(OFlags::NONBLOCK.complement()) { - todo!("unsupported flags for pipes") - } - self.global - .pipes - .update_flags( - fd, - litebox::pipes::Flags::NON_BLOCKING, - flags.intersects(OFlags::NONBLOCK), - ) - .map_err(Errno::from) - }, - )??, - Descriptor::Eventfd { file, .. } => { - toggle_flags!(file); - } - Descriptor::Epoll { .. } => todo!(), - Descriptor::Unix { file, .. } => { - toggle_flags!(file); - } + }}; } + files.run_on_raw_fd( + desc, + |fd| { + self.global + .litebox + .descriptor_table_mut() + .with_metadata_mut(fd, |crate::StdioStatusFlags(f)| { + let diff = *f ^ flags; + if diff + .intersects(OFlags::APPEND | OFlags::DIRECT | OFlags::NOATIME) + { + todo!("unsupported flags"); + } + f.toggle(diff); + }) + .map_err(|err| match err { + MetadataError::ClosedFd => Errno::EBADF, + MetadataError::NoSuchMetadata => { + unimplemented!("SETFL on non-stdio") + } + }) + }, + |fd| { + self.global + .litebox + .descriptor_table_mut() + .with_metadata_mut(fd, |crate::syscalls::net::SocketOFlags(f)| { + let diff = *f ^ flags; + if diff + .intersects(OFlags::APPEND | OFlags::DIRECT | OFlags::NOATIME) + { + todo!("unsupported flags"); + } + f.toggle(diff); + }) + .map_err(|err| match err { + MetadataError::ClosedFd => Errno::EBADF, + MetadataError::NoSuchMetadata => { + unreachable!("all sockets have SocketOFlags when created") + } + }) + }, + |fd| { + if flags.intersects(OFlags::NONBLOCK.complement()) { + todo!("unsupported flags for pipes") + } + self.global + .pipes + .update_flags( + fd, + litebox::pipes::Flags::NON_BLOCKING, + flags.intersects(OFlags::NONBLOCK), + ) + .map_err(Errno::from) + }, + |fd| { + // TODO: Consider shared metadata table? + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| toggle_flags!(file)); + Ok(()) + }, + |_fd| todo!("epoll"), + |fd| { + // TODO: Consider shared metadata table? + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| toggle_flags!(file)); + Ok(()) + }, + )??; Ok(0) } FcntlArg::GETLK(lock) => { - let Descriptor::LiteBoxRawFd(raw_fd) = desc else { - return Err(Errno::EBADF); - }; self.files .borrow() .run_on_raw_fd( - *raw_fd, + desc, |_fd| { let mut flock = lock.read_at_offset(0).ok_or(Errno::EFAULT)?; let lock_type = litebox_common_linux::FlockType::try_from(flock.type_) @@ -1159,17 +1192,17 @@ impl Task { }, |_fd| todo!("net"), |_fd| todo!("pipes"), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), ) .flatten() } FcntlArg::SETLK(lock) | FcntlArg::SETLKW(lock) => { - let Descriptor::LiteBoxRawFd(raw_fd) = desc else { - return Err(Errno::EBADF); - }; self.files .borrow() .run_on_raw_fd( - *raw_fd, + desc, |_fd| { let flock = lock.read_at_offset(0).ok_or(Errno::EFAULT)?; let _ = litebox_common_linux::FlockType::try_from(flock.type_) @@ -1181,6 +1214,9 @@ impl Task { }, |_fd| todo!("net"), |_fd| todo!("pipes"), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), ) .flatten() } @@ -1200,12 +1236,11 @@ impl Task { if min_fd as usize >= max_fd { return Err(Errno::EINVAL); } - drop(locked_file_descriptors); // drop before acquiring write lock - files - .file_descriptors - .write() - .insert_in_range(new_file, min_fd as usize, max_fd) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE)) + if new_file < min_fd as usize || new_file > max_fd { + self.do_close(new_file)?; + return Err(Errno::EMFILE); + } + Ok(new_file.try_into().unwrap()) } _ => unimplemented!(), } @@ -1302,17 +1337,21 @@ impl Task { } let files = self.files.borrow(); - let mut rds = files.raw_descriptor_store.write(); - let wr_raw_fd = rds.fd_into_raw_integer(writer); - let rd_raw_fd = rds.fd_into_raw_integer(reader); - let mut fds = files.file_descriptors.write(); - let w = fds - .insert(self, Descriptor::LiteBoxRawFd(wr_raw_fd)) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE))?; - let r = fds - .insert(self, Descriptor::LiteBoxRawFd(rd_raw_fd)) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE))?; - Ok((r, w)) + let wr_raw_fd = files.insert_raw_fd(writer).map_err(|writer| { + self.global.pipes.close(&writer).unwrap(); + Errno::EMFILE + })?; + let rd_raw_fd = files.insert_raw_fd(reader).map_err(|reader| { + let writer = files + .raw_descriptor_store + .write() + .fd_consume_raw_integer(wr_raw_fd) + .unwrap(); + self.global.pipes.close(&writer).unwrap(); + self.global.pipes.close(&reader).unwrap(); + Errno::EMFILE + })?; + Ok((rd_raw_fd.try_into().unwrap(), wr_raw_fd.try_into().unwrap())) } pub fn sys_eventfd2(&self, initval: u32, flags: EfdFlags) -> Result { @@ -1323,20 +1362,23 @@ impl Task { } let eventfd = super::eventfd::EventFile::new(u64::from(initval), flags); + let mut dt = self.global.litebox.descriptor_table_mut(); + let typed = dt.insert::(eventfd); + if flags.contains(EfdFlags::CLOEXEC) { + let old = dt.set_fd_metadata(&typed, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + } + drop(dt); let files = self.files.borrow(); - files - .file_descriptors - .write() - .insert( - self, - Descriptor::Eventfd { - file: alloc::sync::Arc::new(eventfd), - close_on_exec: core::sync::atomic::AtomicBool::new( - flags.contains(EfdFlags::CLOEXEC), - ), - }, - ) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE)) + let raw_fd = files.insert_raw_fd(typed).map_err(|typed| { + self.global + .litebox + .descriptor_table_mut() + .remove(&typed) + .unwrap(); + Errno::EMFILE + })?; + Ok(raw_fd.try_into().unwrap()) } fn stdio_ioctl( @@ -1398,101 +1440,147 @@ impl Task { fd: i32, arg: IoctlArg, ) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(desc) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let desc = locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)?; match arg { IoctlArg::FIONBIO(arg) => { let val = arg.read_at_offset(0).ok_or(Errno::EFAULT)?; - match desc { - Descriptor::LiteBoxRawFd(raw_fd) => { - self.files.borrow().run_on_raw_fd( - *raw_fd, - |_file_fd| { - // TODO: stdio NONBLOCK? - #[cfg(debug_assertions)] - litebox::log_println!( - self.global.platform, - "Attempted to set non-blocking on raw fd; currently unimplemented" - ); - Ok(()) - }, - |socket_fd| { - if let Err(e) = self.global.litebox.descriptor_table_mut().with_metadata_mut( + self.files + .borrow() + .run_on_raw_fd( + desc, + |_file_fd| { + // TODO: stdio NONBLOCK? + #[cfg(debug_assertions)] + litebox::log_println!( + self.global.platform, + "Attempted to set non-blocking on raw fd; currently unimplemented" + ); + Ok(()) + }, + |socket_fd| { + if let Err(e) = self + .global + .litebox + .descriptor_table_mut() + .with_metadata_mut( socket_fd, |crate::syscalls::net::SocketOFlags(flags)| { flags.set(OFlags::NONBLOCK, val != 0); }, - ) { - match e { - MetadataError::ClosedFd => return Err(Errno::EBADF), - MetadataError::NoSuchMetadata => unreachable!(), - } + ) + { + match e { + MetadataError::ClosedFd => return Err(Errno::EBADF), + MetadataError::NoSuchMetadata => unreachable!(), } - Ok(()) - }, - |fd| { - self.global.pipes .update_flags(fd, litebox::pipes::Flags::NON_BLOCKING, val != 0) - .map_err(Errno::from) - }, - ) - .flatten()?; - } - Descriptor::Eventfd { file, .. } => file.set_status(OFlags::NONBLOCK, val != 0), - Descriptor::Epoll { file, .. } => { - file.set_status(OFlags::NONBLOCK, val != 0); - } - Descriptor::Unix { file, .. } => { - file.set_status(OFlags::NONBLOCK, val != 0); - } - } + } + Ok(()) + }, + |fd| { + self.global + .pipes + .update_flags(fd, litebox::pipes::Flags::NON_BLOCKING, val != 0) + .map_err(Errno::from) + }, + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + file.set_status(OFlags::NONBLOCK, val != 0); + }); + Ok(()) + }, + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + file.set_status(OFlags::NONBLOCK, val != 0); + }); + Ok(()) + }, + |fd| { + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|file| { + file.set_status(OFlags::NONBLOCK, val != 0); + }); + Ok(()) + }, + ) + .flatten()?; Ok(0) } - IoctlArg::FIOCLEX => match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, - |fd| { - let _old = self - .global - .litebox - .descriptor_table_mut() - .set_fd_metadata(fd, FileDescriptorFlags::FD_CLOEXEC); - Ok(0) - }, - |_fd| todo!("net"), - |_fd| todo!("pipes"), - )?, - Descriptor::Eventfd { close_on_exec, .. } - | Descriptor::Epoll { close_on_exec, .. } - | Descriptor::Unix { close_on_exec, .. } => { - close_on_exec.store(true, core::sync::atomic::Ordering::Relaxed); + IoctlArg::FIOCLEX => files.run_on_raw_fd( + desc, + |fd| { + let _old = self + .global + .litebox + .descriptor_table_mut() + .set_fd_metadata(fd, FileDescriptorFlags::FD_CLOEXEC); Ok(0) - } - }, + }, + |_fd| todo!("net"), + |_fd| todo!("pipes"), + |fd| { + let _old = self + .global + .litebox + .descriptor_table_mut() + .set_fd_metadata(fd, FileDescriptorFlags::FD_CLOEXEC); + Ok(0) + }, + |fd| { + let _old = self + .global + .litebox + .descriptor_table_mut() + .set_fd_metadata(fd, FileDescriptorFlags::FD_CLOEXEC); + Ok(0) + }, + |fd| { + let _old = self + .global + .litebox + .descriptor_table_mut() + .set_fd_metadata(fd, FileDescriptorFlags::FD_CLOEXEC); + Ok(0) + }, + )?, IoctlArg::TCGETS(..) | IoctlArg::TCSETS(..) | IoctlArg::TIOCGPTN(..) - | IoctlArg::TIOCGWINSZ(..) => match desc { - Descriptor::LiteBoxRawFd(raw_fd) => files.run_on_raw_fd( - *raw_fd, - |fd| { - if self.is_stdio(&files.fs, fd)? { - self.stdio_ioctl(&arg) - } else { - Err(Errno::ENOTTY) - } - }, - |_fd| Err(Errno::ENOTTY), - |_fd| Err(Errno::ENOTTY), - )?, - Descriptor::Eventfd { .. } | Descriptor::Epoll { .. } | Descriptor::Unix { .. } => { - Err(Errno::ENOTTY) - } - }, + | IoctlArg::TIOCGWINSZ(..) => files.run_on_raw_fd( + desc, + |fd| { + if self.is_stdio(&files.fs, fd)? { + self.stdio_ioctl(&arg) + } else { + Err(Errno::ENOTTY) + } + }, + |_fd| Err(Errno::ENOTTY), + |_fd| Err(Errno::ENOTTY), + |_fd| Err(Errno::ENOTTY), + |_fd| Err(Errno::ENOTTY), + |_fd| Err(Errno::ENOTTY), + )?, _ => { #[cfg(debug_assertions)] litebox::log_println!(self.global.platform, "\n\n\n{:?}\n\n\n", arg); @@ -1508,20 +1596,23 @@ impl Task { } let epoll_file = super::epoll::EpollFile::new(); + let mut dt = self.global.litebox.descriptor_table_mut(); + let typed = dt.insert::>(epoll_file); + if flags.contains(EpollCreateFlags::EPOLL_CLOEXEC) { + let old = dt.set_fd_metadata(&typed, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + } + drop(dt); let files = self.files.borrow(); - files - .file_descriptors - .write() - .insert( - self, - Descriptor::Epoll { - file: alloc::sync::Arc::new(epoll_file), - close_on_exec: core::sync::atomic::AtomicBool::new( - flags.contains(EpollCreateFlags::EPOLL_CLOEXEC), - ), - }, - ) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE)) + let raw_fd = files.insert_raw_fd(typed).map_err(|typed| { + self.global + .litebox + .descriptor_table_mut() + .remove(&typed) + .unwrap(); + Errno::EMFILE + })?; + Ok(raw_fd.try_into().unwrap()) } /// Handle syscall `epoll_ctl` @@ -1543,20 +1634,26 @@ impl Task { } let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let epoll_entry = locked_file_descriptors.get_fd(epfd).ok_or(Errno::EBADF)?; - let Descriptor::Epoll { file: epoll, .. } = epoll_entry else { - return Err(Errno::EBADF); - }; - let file = locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)?; - let file_descriptor = super::epoll::EpollDescriptor::try_from(&files, file)?; + let epoll_fd = files + .raw_descriptor_store + .read() + .fd_from_raw_integer::>(epfd as usize) + .map_err(|_| Errno::EBADF)?; + let file_descriptor = super::epoll::EpollDescriptor::try_from(&files, fd as usize)?; + let event = if op == litebox_common_linux::EpollOp::EpollCtlDel { None } else { Some(event.read_at_offset(0).ok_or(Errno::EFAULT)?) }; - epoll.epoll_ctl(&self.global, op, fd, &file_descriptor, event) + let handle = self + .global + .litebox + .descriptor_table() + .entry_handle(&epoll_fd) + .ok_or(Errno::EBADF)?; + handle.with_entry(|entry| entry.epoll_ctl(&self.global, op, fd, &file_descriptor, event)) } /// Handle syscall `epoll_pwait` @@ -1587,30 +1684,43 @@ impl Task { } else { None }; - let epoll_file = { + let handle = { let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - match locked_file_descriptors.get_fd(epfd).ok_or(Errno::EBADF)? { - Descriptor::Epoll { file, .. } => file.clone(), - _ => return Err(Errno::EBADF), + { + let raw_fd = usize::try_from(epfd).or(Err(Errno::EBADF))?; + let Ok(fd) = + files + .raw_descriptor_store + .read() + .fd_from_raw_integer::>(raw_fd) + else { + return Err(Errno::EBADF); + }; + self.global + .litebox + .descriptor_table() + .entry_handle(&fd) + .ok_or(Errno::EBADF)? } }; - match epoll_file.wait( - &self.global, - &self.wait_cx().with_timeout(timeout), - maxevents, - ) { - Ok(epoll_events) => { - if !epoll_events.is_empty() { - events - .copy_from_slice(0, &epoll_events) - .ok_or(Errno::EFAULT)?; + handle.with_entry(|epoll_file| { + match epoll_file.wait( + &self.global, + &self.wait_cx().with_timeout(timeout), + maxevents, + ) { + Ok(epoll_events) => { + if !epoll_events.is_empty() { + events + .copy_from_slice(0, &epoll_events) + .ok_or(Errno::EFAULT)?; + } + Ok(epoll_events.len()) } - Ok(epoll_events.len()) + Err(WaitError::TimedOut) => Ok(0), + Err(WaitError::Interrupted) => Err(Errno::EINTR), } - Err(WaitError::TimedOut) => Ok(0), - Err(WaitError::Interrupted) => Err(Errno::EINTR), - } + }) } /// Handle syscall `ppoll`. @@ -1687,7 +1797,11 @@ impl Task { exceptfds: Option<&mut bitvec::vec::BitVec>, timeout: Option, ) -> Result { - let file_table_len = self.files.borrow().file_descriptors.read().len(); + // XXX: semantic issue likely should be fixed here to make sure EBADF is triggered early + // enough if needed. Previously, `file_table_len` used to be + // `self.files.borrow().file_descriptors.read().len()` before `file_descriptors` was + // removed to clean up the table handling. + let file_table_len = usize::MAX; let mut set = super::epoll::PollSet::with_capacity(nfds as usize); for i in 0..nfds { let mut events = litebox::event::Events::empty(); @@ -1815,47 +1929,61 @@ impl Task { Ok(count) } - fn do_dup(&self, file: &Descriptor, flags: OFlags) -> Result, Errno> { + fn do_dup(&self, file: usize, flags: OFlags) -> Result { + self.do_dup_inner(file, flags, None) + } + + fn do_dup_inner( + &self, + file: usize, + flags: OFlags, + target: Option, + ) -> Result { + fn dup( + global: &GlobalState, + files: &FilesState, + fd: &TypedFd, + close_on_exec: bool, + target: Option, + ) -> Result { + let mut dt = global.litebox.descriptor_table_mut(); + let fd: TypedFd<_> = dt.duplicate(fd).ok_or(Errno::EBADF)?; + if close_on_exec { + let old = dt.set_fd_metadata(&fd, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + } + let mut rds = files.raw_descriptor_store.write(); + if let Some(target) = target { + if !rds.fd_into_specific_raw_integer(fd, target) { + return Err(Errno::EBADF); + } + Ok(target) + } else { + Ok(rds.fd_into_raw_integer(fd)) + } + } let close_on_exec = flags.contains(OFlags::CLOEXEC); let files = self.files.borrow(); - match file { - Descriptor::LiteBoxRawFd(raw_fd) => { - fn dup( - global: &GlobalState, - files: &FilesState, - fd: &TypedFd, - close_on_exec: bool, - ) -> Result, Errno> { - let mut dt = global.litebox.descriptor_table_mut(); - let fd: TypedFd<_> = dt.duplicate(fd).ok_or(Errno::EBADF)?; - if close_on_exec { - let old = dt.set_fd_metadata(&fd, FileDescriptorFlags::FD_CLOEXEC); - assert!(old.is_none()); - } - Ok(Descriptor::LiteBoxRawFd( - files.raw_descriptor_store.write().fd_into_raw_integer(fd), - )) - } - files.run_on_raw_fd( - *raw_fd, - |fd| dup(&self.global, &files, fd, close_on_exec), - |fd| dup(&self.global, &files, fd, close_on_exec), - |fd| dup(&self.global, &files, fd, close_on_exec), - )? + let new_fd = files.run_on_raw_fd( + file, + |fd| dup(&self.global, &files, fd, close_on_exec, target), + |fd| dup(&self.global, &files, fd, close_on_exec, target), + |fd| dup(&self.global, &files, fd, close_on_exec, target), + |fd| dup(&self.global, &files, fd, close_on_exec, target), + |fd| dup(&self.global, &files, fd, close_on_exec, target), + |fd| dup(&self.global, &files, fd, close_on_exec, target), + )??; + if target.is_none() { + let max_fd = self + .process() + .limits + .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE); + if new_fd >= max_fd { + self.do_close(new_fd)?; + return Err(Errno::EMFILE); } - Descriptor::Eventfd { file, .. } => Ok(Descriptor::Eventfd { - file: file.clone(), - close_on_exec: core::sync::atomic::AtomicBool::new(close_on_exec), - }), - Descriptor::Epoll { file, .. } => Ok(Descriptor::Epoll { - file: file.clone(), - close_on_exec: core::sync::atomic::AtomicBool::new(close_on_exec), - }), - Descriptor::Unix { file, .. } => Ok(Descriptor::Unix { - file: file.clone(), - close_on_exec: core::sync::atomic::AtomicBool::new(close_on_exec), - }), } + Ok(new_fd) } /// Handle syscall `dup/dup2/dup3` @@ -1872,13 +2000,7 @@ impl Task { let Ok(oldfd) = u32::try_from(oldfd) else { return Err(Errno::EBADF); }; - let files = self.files.borrow(); - let new_file = files - .file_descriptors - .read() - .get_fd(oldfd) - .ok_or(Errno::EBADF) - .map(|desc| self.do_dup(desc, flags.unwrap_or(OFlags::empty())))??; + let oldfd_usize = usize::try_from(oldfd).or(Err(Errno::EBADF))?; if let Some(newfd) = newfd { // dup2/dup3 let Ok(newfd) = u32::try_from(newfd) else { @@ -1895,30 +2017,19 @@ impl Task { Ok(oldfd) }; } - match files - .file_descriptors - .write() - .insert_at(self, new_file, newfd as usize) - { - Ok(old_file) => { - // replace an existing file descriptor - if let Some(old_file) = old_file { - self.do_close(old_file)?; - } - Ok(newfd) - } - Err(new_file) => { - // failed to insert due to file limit - Err(self.do_close(new_file).err().unwrap_or(Errno::EMFILE)) - } - } + // Close whatever is at newfd before duping into it + let newfd_usize = usize::try_from(newfd).or(Err(Errno::EBADF))?; + let _ = self.do_close(newfd_usize); + self.do_dup_inner( + oldfd_usize, + flags.unwrap_or(OFlags::empty()), + Some(newfd_usize), + )?; + Ok(newfd) } else { // dup - files - .file_descriptors - .write() - .insert(self, new_file) - .map_err(|desc| self.do_close(desc).err().unwrap_or(Errno::EMFILE)) + let new_file = self.do_dup(oldfd_usize, flags.unwrap_or(OFlags::empty()))?; + Ok(u32::try_from(new_file).unwrap()) } } } @@ -1937,18 +2048,12 @@ impl Task { dirp: MutPtr, count: usize, ) -> Result { - let Ok(fd) = u32::try_from(fd) else { + let Ok(fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; let files = self.files.borrow(); - let locked_file_descriptors = files.file_descriptors.read(); - let Descriptor::LiteBoxRawFd(raw_fd) = - locked_file_descriptors.get_fd(fd).ok_or(Errno::EBADF)? - else { - return Err(Errno::EBADF); - }; files.run_on_raw_fd( - *raw_fd, + fd, |file| { let dir_off: Diroff = self .global @@ -2005,6 +2110,9 @@ impl Task { }, |_fd| todo!("net"), |_fd| todo!("pipes"), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), + |_fd| Err(Errno::EBADF), )? } } diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index 158d5a735..ce6c3513c 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -102,15 +102,12 @@ impl Task { return None; } - let Ok(fd) = u32::try_from(fd) else { + let Ok(fd) = u32::try_from(fd).and_then(usize::try_from) else { return None; }; let files = self.files.borrow(); - let raw_fd = match files.file_descriptors.read().get_fd(fd)? { - crate::Descriptor::LiteBoxRawFd(raw_fd) => *raw_fd, - _ => return None, - }; + let raw_fd = fd; let static_data = files .run_on_raw_fd( @@ -118,6 +115,9 @@ impl Task { |typed_fd| files.fs.get_static_backing_data(typed_fd), |_| None, |_| None, + |_| None, + |_| None, + |_| None, ) .ok()??; diff --git a/litebox_shim_linux/src/syscalls/net.rs b/litebox_shim_linux/src/syscalls/net.rs index 23aaa26e6..f924b06b6 100644 --- a/litebox_shim_linux/src/syscalls/net.rs +++ b/litebox_shim_linux/src/syscalls/net.rs @@ -7,7 +7,6 @@ use core::{ ffi::CStr, mem::offset_of, net::{Ipv4Addr, SocketAddr, SocketAddrV4}, - sync::atomic::AtomicBool, }; use alloc::string::ToString; @@ -28,12 +27,12 @@ use litebox::{ utils::TruncateExt as _, }; use litebox_common_linux::{ - AddressFamily, IPProtocol, ReceiveFlags, SendFlags, SockFlags, SockType, SocketOption, - SocketOptionName, TcpOption, UnixProtocol, errno::Errno, + AddressFamily, FileDescriptorFlags, IPProtocol, ReceiveFlags, SendFlags, SockFlags, SockType, + SocketOption, SocketOptionName, TcpOption, UnixProtocol, errno::Errno, }; use zerocopy::{FromBytes, IntoBytes}; -use crate::{ConstPtr, Descriptor, MutPtr}; +use crate::{ConstPtr, MutPtr}; use crate::{GlobalState, ShimFS, Task}; use crate::{ Platform, @@ -57,22 +56,6 @@ macro_rules! convert_flags { pub(crate) type SocketFd = litebox::net::SocketFd; impl super::file::FilesState { - fn with_socket_fd( - &self, - raw_fd: usize, - f: impl FnOnce(&SocketFd) -> Result, - ) -> Result { - let rds = self.raw_descriptor_store.read(); - match rds.fd_from_raw_integer(raw_fd) { - Ok(fd) => { - drop(rds); - f(&fd) - } - Err(litebox::fd::ErrRawIntFd::NotFound) => Err(Errno::EBADF), - Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => Err(Errno::ENOTSOCK), - } - } - /// Helper to dispatch socket operations based on socket type (INET vs Unix). /// /// This method handles the common pattern of: @@ -85,24 +68,30 @@ impl super::file::FilesState { /// For Unix sockets, the `unix_op` closure is called with a cloned Arc to the socket. fn with_socket( &self, + global: &GlobalState, sockfd: u32, inet_op: impl FnOnce(&SocketFd) -> Result, - unix_op: impl FnOnce(Arc>) -> Result, + unix_op: impl FnOnce(&UnixSocket) -> Result, ) -> Result { - let file_table = self.file_descriptors.read(); - match file_table.get_fd(sockfd).ok_or(Errno::EBADF)? { - Descriptor::LiteBoxRawFd(raw_fd) => { - let raw_fd = *raw_fd; - drop(file_table); - self.with_socket_fd(raw_fd, inet_op) - } - Descriptor::Unix { file, .. } => { - let file = file.clone(); - drop(file_table); - unix_op(file) - } - _ => Err(Errno::ENOTSOCK), + let raw_fd = sockfd as usize; + let inet_fd = { + let rds = self.raw_descriptor_store.read(); + rds.fd_from_raw_integer(raw_fd).ok() + }; + if let Some(fd) = inet_fd { + return inet_op(&fd); } + let unix = self + .raw_descriptor_store + .read() + .fd_from_raw_integer::>(raw_fd) + .map_err(|_| Errno::ENOTSOCK)?; + let handle = global + .litebox + .descriptor_table() + .entry_handle(&unix) + .ok_or(Errno::EBADF)?; + handle.with_entry(|entry| unix_op(entry)) } } @@ -947,33 +936,37 @@ impl Task { }; let socket = self.global.net.lock().socket(protocol)?; let _ = self.global.initialize_socket(&socket, ty, flags); - Descriptor::LiteBoxRawFd( - files - .raw_descriptor_store - .write() - .fd_into_raw_integer(socket), - ) + let Ok(raw_fd) = files.insert_raw_fd(socket) else { + unimplemented!() + }; + raw_fd } AddressFamily::UNIX => { let _ = UnixProtocol::try_from(protocol).map_err(|_| Errno::EPROTONOSUPPORT)?; let socket = UnixSocket::new(ty, flags).ok_or(Errno::ESOCKTNOSUPPORT)?; - Descriptor::Unix { - file: Arc::new(socket), - close_on_exec: AtomicBool::new(flags.contains(SockFlags::CLOEXEC)), + let typed = self + .global + .litebox + .descriptor_table_mut() + .insert::>(socket); + if flags.contains(SockFlags::CLOEXEC) { + let old = self + .global + .litebox + .descriptor_table_mut() + .set_fd_metadata(&typed, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); } + + files.insert_raw_fd(typed).map_err(|typed| { + let _ = self.global.litebox.descriptor_table_mut().remove(&typed); + Errno::EMFILE + })? } AddressFamily::INET6 | AddressFamily::NETLINK => return Err(Errno::EAFNOSUPPORT), _ => unimplemented!(), }; - files - .file_descriptors - .write() - .insert(self, file) - .map_err(|desc| { - self.do_close(desc) - .expect("closing descriptor should succeed"); - Errno::EMFILE - }) + Ok(u32::try_from(file).unwrap()) } pub(crate) fn sys_socketpair( @@ -1005,15 +998,27 @@ impl Task { let _ = UnixProtocol::try_from(protocol).map_err(|_| Errno::EPROTONOSUPPORT)?; let (sock1, sock2) = UnixSocket::new_connected_pair(ty, flags).ok_or(Errno::ESOCKTNOSUPPORT)?; - let file1 = Descriptor::Unix { - file: Arc::new(sock1), - close_on_exec: AtomicBool::new(flags.contains(SockFlags::CLOEXEC)), - }; - let file2 = Descriptor::Unix { - file: Arc::new(sock2), - close_on_exec: AtomicBool::new(flags.contains(SockFlags::CLOEXEC)), - }; - (file1, file2) + let files = self.files.borrow(); + let mut dt = self.global.litebox.descriptor_table_mut(); + let typed1 = dt.insert::>(sock1); + let typed2 = dt.insert::>(sock2); + if flags.contains(SockFlags::CLOEXEC) { + let old = dt.set_fd_metadata(&typed1, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + let old = dt.set_fd_metadata(&typed2, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + } + drop(dt); + let raw_fd1 = files.insert_raw_fd(typed1).map_err(|typed| { + let _ = self.global.litebox.descriptor_table_mut().remove(&typed); + Errno::EMFILE + })?; + let raw_fd2 = files.insert_raw_fd(typed2).map_err(|typed| { + self.do_close(raw_fd1).unwrap(); + let _ = self.global.litebox.descriptor_table_mut().remove(&typed); + Errno::EMFILE + })?; + (raw_fd1, raw_fd2) } AddressFamily::INET | AddressFamily::INET6 | AddressFamily::NETLINK => { return Err(Errno::EOPNOTSUPP); @@ -1023,34 +1028,7 @@ impl Task { return Err(Errno::EAFNOSUPPORT); } }; - let files = self.files.borrow(); - let fd1 = files - .file_descriptors - .write() - .insert(self, desc1) - .map_err(|desc| { - self.do_close(desc) - .expect("closing descriptor should succeed"); - }); - let Ok(fd1) = fd1 else { - self.do_close(desc2) - .expect("closing descriptor should succeed"); - return Err(Errno::EMFILE); - }; - let fd2 = files - .file_descriptors - .write() - .insert(self, desc2) - .map_err(|desc| { - self.do_close(desc) - .expect("closing descriptor should succeed"); - }); - let Ok(fd2) = fd2 else { - self.sys_close(i32::try_from(fd1).unwrap()) - .expect("close should succeed"); - return Err(Errno::EMFILE); - }; - Ok((fd1, fd2)) + Ok((u32::try_from(desc1).unwrap(), u32::try_from(desc2).unwrap())) } } pub(crate) fn read_sockaddr_from_user( @@ -1201,6 +1179,7 @@ impl Task { let files = self.files.borrow(); let want_peer = peer.is_some(); let (file, peer_addr) = files.with_socket( + &self.global, sockfd, |fd| { let sock_type = self.global.get_socket_type(fd)?; @@ -1215,27 +1194,28 @@ impl Task { .global .initialize_socket(&accepted_file, sock_type, flags); proxy.set_state(SocketState::Connected); - Ok(( - Descriptor::LiteBoxRawFd( - files - .raw_descriptor_store - .write() - .fd_into_raw_integer(accepted_file), - ), - peer_addr, - )) + let Ok(raw_fd) = files.insert_raw_fd(accepted_file) else { + unimplemented!() + }; + Ok((raw_fd, peer_addr)) }, |file| { let mut socket_addr = want_peer.then_some(UnixSocketAddr::Unnamed); let accepted_file = file.accept(&self.wait_cx(), flags, socket_addr.as_mut())?; let peer_addr = socket_addr.map(SocketAddress::Unix); - Ok(( - Descriptor::Unix { - file: Arc::new(accepted_file), - close_on_exec: AtomicBool::new(flags.contains(SockFlags::CLOEXEC)), - }, - peer_addr, - )) + let mut dt = self.global.litebox.descriptor_table_mut(); + let typed = + dt.insert::>(accepted_file); + if flags.contains(SockFlags::CLOEXEC) { + let old = dt.set_fd_metadata(&typed, FileDescriptorFlags::FD_CLOEXEC); + assert!(old.is_none()); + } + drop(dt); + let raw_fd = files.insert_raw_fd(typed).map_err(|typed| { + let _ = self.global.litebox.descriptor_table_mut().remove(&typed); + Errno::EMFILE + })?; + Ok((raw_fd, peer_addr)) }, )?; @@ -1243,15 +1223,7 @@ impl Task { *peer = addr; } - files - .file_descriptors - .write() - .insert(self, file) - .map_err(|desc| { - self.do_close(desc) - .expect("closing descriptor should succeed"); - Errno::EMFILE - }) + Ok(u32::try_from(file).unwrap()) } /// Handle syscall `connect` @@ -1269,6 +1241,7 @@ impl Task { } fn do_connect(&self, sockfd: u32, sockaddr: SocketAddress) -> Result<(), Errno> { self.files.borrow().with_socket( + &self.global, sockfd, |fd| { let addr = sockaddr.clone().inet().ok_or(Errno::EAFNOSUPPORT)?; @@ -1296,6 +1269,7 @@ impl Task { } fn do_bind(&self, sockfd: u32, sockaddr: SocketAddress) -> Result<(), Errno> { self.files.borrow().with_socket( + &self.global, sockfd, |fd| { let addr = sockaddr.clone().inet().ok_or(Errno::EAFNOSUPPORT)?; @@ -1317,6 +1291,7 @@ impl Task { } fn do_listen(&self, sockfd: u32, backlog: u16) -> Result<(), Errno> { self.files.borrow().with_socket( + &self.global, sockfd, |fd| self.global.listen(fd, backlog), |file| file.listen(backlog, &self.global), @@ -1350,6 +1325,7 @@ impl Task { sockaddr: Option, ) -> Result { self.files.borrow().with_socket( + &self.global, sockfd, |fd| { let sockaddr = sockaddr @@ -1408,6 +1384,7 @@ impl Task { .to_owned_slice(msg.msg_iovlen) .ok_or(Errno::EFAULT)?; self.files.borrow().with_socket( + &self.global, sockfd, |fd| { let sock_addr = sock_addr @@ -1478,38 +1455,38 @@ impl Task { ) -> Result { let want_source = source_addr.is_some(); let files = self.files.borrow(); - let file_table = files.file_descriptors.read(); - let (size, addr) = match file_table.get_fd(sockfd).ok_or(Errno::EBADF)? { - Descriptor::LiteBoxRawFd(raw_fd) => { - let raw_fd = *raw_fd; - drop(file_table); - files.with_socket_fd(raw_fd, |fd| { + let raw_fd = usize::try_from(sockfd).or(Err(Errno::EBADF))?; + let (size, addr) = { + // We need to do this cell dance because otherwise Rust can't recognize that the two + // closures are mutually exclusive. + let buf: core::cell::RefCell<&mut [u8]> = core::cell::RefCell::new(buf); + files.with_socket( + &self.global, + raw_fd.truncate(), + |fd| { let mut addr = None; let size = self.global.receive( &self.wait_cx(), fd, - buf, + &mut buf.borrow_mut(), flags, if want_source { Some(&mut addr) } else { None }, )?; let src_addr = addr.map(SocketAddress::Inet); Ok((size, src_addr)) - })? - } - Descriptor::Unix { file, .. } => { - let file = file.clone(); - drop(file_table); - let mut addr = None; - let size = file.recvfrom( - &self.wait_cx(), - buf, - flags, - if want_source { Some(&mut addr) } else { None }, - )?; - let src_addr = addr.map(SocketAddress::Unix); - (size, src_addr) - } - _ => return Err(Errno::ENOTSOCK), + }, + |entry| { + let mut addr = None; + let size = entry.recvfrom( + &self.wait_cx(), + &mut buf.borrow_mut(), + flags, + if want_source { Some(&mut addr) } else { None }, + )?; + let src_addr = addr.map(SocketAddress::Unix); + Ok((size, src_addr)) + }, + )? }; if !flags.contains(ReceiveFlags::TRUNC) { @@ -1548,6 +1525,7 @@ impl Task { optlen: usize, ) -> Result<(), Errno> { self.files.borrow().with_socket( + &self.global, sockfd, |fd| self.global.setsockopt(fd, optname, optval, optlen), |file| file.setsockopt(&self.global, optname, optval, optlen), @@ -1591,6 +1569,7 @@ impl Task { len: u32, ) -> Result { self.files.borrow().with_socket( + &self.global, sockfd, |fd| self.global.getsockopt(fd, optname, optval, len), |file| file.getsockopt(&self.global, optname, optval, len), @@ -1612,6 +1591,7 @@ impl Task { } fn do_getsockname(&self, sockfd: u32) -> Result { self.files.borrow().with_socket( + &self.global, sockfd, |fd| { self.global @@ -1640,6 +1620,7 @@ impl Task { } fn do_getpeername(&self, sockfd: u32) -> Result { self.files.borrow().with_socket( + &self.global, sockfd, |fd| { self.global diff --git a/litebox_shim_linux/src/syscalls/process.rs b/litebox_shim_linux/src/syscalls/process.rs index e51e00c16..70f878cde 100644 --- a/litebox_shim_linux/src/syscalls/process.rs +++ b/litebox_shim_linux/src/syscalls/process.rs @@ -798,7 +798,7 @@ impl Task { } // TODO: enforce the following limits: -const RLIMIT_NOFILE_CUR: usize = 1024 * 1024; +pub(crate) const RLIMIT_NOFILE_CUR: usize = 1024 * 1024; const RLIMIT_NOFILE_MAX: usize = 1024 * 1024; struct AtomicRlimit { @@ -896,7 +896,9 @@ impl Task { } match resource { litebox_common_linux::RlimitResource::NOFILE => { + let new_max_fd = new_limit.rlim_cur.saturating_sub(1); self.thread.process.limits.set_rlimit(resource, new_limit); + self.files.borrow().set_max_fd(new_max_fd); } _ => unimplemented!("Unsupported resource for set_rlimit: {:?}", resource), } diff --git a/litebox_shim_linux/src/syscalls/unix.rs b/litebox_shim_linux/src/syscalls/unix.rs index afec959b3..9ad6d7929 100644 --- a/litebox_shim_linux/src/syscalls/unix.rs +++ b/litebox_shim_linux/src/syscalls/unix.rs @@ -20,6 +20,7 @@ use litebox::{ polling::{Pollee, TryOpError}, wait::WaitContext, }, + fd::{FdEnabledSubsystem, FdEnabledSubsystemEntry}, fs::{Mode, OFlags, errors::OpenError}, sync::{Mutex, RwLock}, utils::TruncateExt as _, @@ -35,6 +36,12 @@ use crate::{ syscalls::net::{SocketOptionValue, SocketOptions}, }; +pub(crate) struct UnixSocketSubsystem(core::marker::PhantomData); +impl FdEnabledSubsystem for UnixSocketSubsystem { + type Entry = UnixSocket; +} +impl FdEnabledSubsystemEntry for UnixSocket {} + /// C-compatible structure for Unix socket addresses. const UNIX_PATH_MAX: usize = 108; #[repr(C)]