From 54a17951541fbe7f7e68c2b14c8af1a5145b78c4 Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 17:41:35 +0300 Subject: [PATCH 1/6] =?UTF-8?q?feat(sched):=20T-026=20step=20A=20=E2=80=94?= =?UTF-8?q?=20current-task=20cap-table=20+=20user-window=20bindings=20+=20?= =?UTF-8?q?accessors=20(gate=20#3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two scheduler parallel arrays (mirroring task_address_space_handles): task_cap_tables (Option<*mut CapabilityTable> — raw ptr, BSP-owned, ADR-0021 bridge; the H1 review fix) + task_user_windows (Option = [entry_va, stack_top_va), built from add_user_task's existing user_entry/user_sp). add_user_task gains a cap_table param and records both bindings. New pub accessors current_user_table() / current_address_space_handle() / current_user_window() resolve self.current -> slot -> the arrays, returning None (the fail-closed signal) for no-current / unbound slots. The raw ptr makes Scheduler !Send/!Sync; it only ever lives inside the BSP's unconditionally-Sync StaticCell, reached via the *mut Scheduler bridge — no Send/Sync bound broken. No consumer yet (the syscall_entry rewire + fail-closed dispatch is step B). Refs: T-026 Co-Authored-By: Claude Opus 4.8 (1M context) --- kernel/src/sched/mod.rs | 109 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/kernel/src/sched/mod.rs b/kernel/src/sched/mod.rs index 36a604f..6a31785 100644 --- a/kernel/src/sched/mod.rs +++ b/kernel/src/sched/mod.rs @@ -55,6 +55,7 @@ use crate::ipc::{ use crate::mm::AddressSpaceHandle; use crate::obj::endpoint::EndpointArena; use crate::obj::{EndpointHandle, TaskHandle, TASK_ARENA_CAPACITY}; +use crate::syscall::user_access::UserAccessWindow; // ─── SchedQueue ─────────────────────────────────────────────────────────────── @@ -267,6 +268,35 @@ pub struct Scheduler { /// [ADR-0028 §Simulation row 3]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md#simulation /// [`Mmu::activate`]: tyrne_hal::Mmu::activate task_address_space_handles: [Option; TASK_ARENA_CAPACITY], + /// Per-task **capability-table** pointer, parallel to `task_handles`. + /// Written by [`add_user_task`][Self::add_user_task]; read by the BSP + /// `syscall_entry` (via [`current_user_table`][Self::current_user_table]) + /// so a syscall resolves capabilities in the **running EL0 task's own** + /// table — gate #3 (T-026), the per-subject unforgeability of + /// [ADR-0014][adr-0014]. A **raw `*mut`**: the table is owned by the BSP + /// (a static), not the scheduler; the scheduler only records the binding, + /// consistent with the [ADR-0021][adr-0021] raw-pointer bridge — no + /// ownership transfer, and the momentary `&mut` the BSP materialises lives + /// only across one `dispatch` call, never across a context switch. `None` + /// for kernel-mode tasks ([`add_task`][Self::add_task]), which make no EL0 + /// syscall; a `None` lookup is the fail-closed signal (never an ambient + /// table). The raw pointer makes `Scheduler` `!Send`/`!Sync`; it is only + /// ever held inside the BSP's unconditionally-`Sync` `StaticCell` and + /// reached through the ADR-0021 `*mut Scheduler` bridge, so no `Send`/`Sync` + /// bound is broken. + /// + /// [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md + /// [adr-0021]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md + task_cap_tables: [Option<*mut CapabilityTable>; TASK_ARENA_CAPACITY], + /// Per-task user-access window `[entry_va, stack_top_va)`, parallel to + /// `task_handles`. Written by [`add_user_task`][Self::add_user_task] from + /// its `user_entry` / `user_sp` params; read by the BSP `syscall_entry` + /// (via [`current_user_window`][Self::current_user_window]) as the cheap + /// range first-gate the gate-#1 translate-based copy-user validates against + /// ([ADR-0038][adr-0038]). `None` for kernel-mode tasks. + /// + /// [adr-0038]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0038-mmu-translate-and-user-access.md + task_user_windows: [Option; TASK_ARENA_CAPACITY], current: Option, /// Idle-task fallback slot per [ADR-0026]. Written exclusively by /// [`register_idle`]; read by the dispatch sites @@ -316,6 +346,8 @@ impl Scheduler { task_states: [TaskState::Idle; TASK_ARENA_CAPACITY], task_handles: [None; TASK_ARENA_CAPACITY], task_address_space_handles: [None; TASK_ARENA_CAPACITY], + task_cap_tables: [None; TASK_ARENA_CAPACITY], + task_user_windows: [None; TASK_ARENA_CAPACITY], current: None, idle: None, contexts: core::array::from_fn(|_| C::TaskContext::default()), @@ -393,7 +425,22 @@ impl Scheduler { /// user-stack access translate; the `enter_el0` trampoline installs no /// `TTBR0` of its own. /// + /// `cap_table` must be a valid pointer to a [`CapabilityTable`] that + /// outlives the task and is not aliased by a live `&mut` across any + /// context switch (the [ADR-0021][adr-0021] raw-pointer-bridge discipline): + /// the scheduler only **records** it (a binding for `syscall_entry` to + /// resolve the task's own capabilities through, gate #3 / T-026); the BSP + /// materialises a momentary `&mut` to it only across one `dispatch` call. + /// It is **not** dereferenced here. + /// /// [ADR-0037]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0037-el0-entry-context.md + /// [adr-0021]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md + #[allow( + clippy::too_many_arguments, + reason = "EL0 task registration genuinely needs these distinct inputs \ + (handle, AS, user entry/SP, kernel SP_EL1, cap-table binding); \ + bundling into a struct would only relocate the same fields" + )] pub unsafe fn add_user_task( &mut self, cpu: &C, @@ -402,6 +449,7 @@ impl Scheduler { user_entry: usize, user_sp: usize, kernel_stack_top: *mut u8, + cap_table: *mut CapabilityTable, ) -> Result<(), SchedError> { let idx = handle.slot().index() as usize; // Gate #2 belt-and-braces: `kernel_stack_top` becomes this task's @@ -441,9 +489,55 @@ impl Scheduler { self.task_states[idx] = TaskState::Ready; self.task_handles[idx] = Some(handle); self.task_address_space_handles[idx] = Some(address_space_handle); + // Gate #3 (T-026): record the task's capability-table binding + its + // user-access window so `syscall_entry` resolves the *running* task's + // own caps + bounds its buffers per task. The window is the contiguous + // image+stack span `[entry_va, stack_top_va) = [user_entry, user_sp)`; + // `saturating_sub` yields a zero-length window if the caller violates + // `user_sp >= user_entry` — fail-closed (every non-zero copy then + // faults) rather than wrapping. + self.task_cap_tables[idx] = Some(cap_table); + self.task_user_windows[idx] = Some(UserAccessWindow::new( + user_entry, + user_sp.saturating_sub(user_entry), + )); Ok(()) } + /// The running task's capability-table pointer (gate #3), or `None` if + /// there is no current task or it has no bound table (a kernel-mode task). + /// + /// The BSP `syscall_entry` resolves a syscall's capabilities in **this** + /// table; a `None` is the **fail-closed** signal — the caller must dispatch + /// against an empty table (every lookup → `InvalidHandle`) or short-circuit, + /// never fall back to an ambient table ([ADR-0014][adr-0014] per-subject + /// unforgeability). The returned `*mut` rides the [ADR-0021] bridge: the + /// caller materialises a momentary `&mut` only across one `dispatch`. + /// + /// [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md + #[must_use] + pub fn current_user_table(&self) -> Option<*mut CapabilityTable> { + let idx = self.current?.slot().index() as usize; + self.task_cap_tables[idx] + } + + /// The running task's [`AddressSpaceHandle`] — the translation regime the + /// gate-#1 per-page `Mmu::translate` resolves user pointers through. `None` + /// when there is no current task or the slot is unregistered. + #[must_use] + pub fn current_address_space_handle(&self) -> Option { + let idx = self.current?.slot().index() as usize; + self.task_address_space_handles[idx] + } + + /// The running task's [`UserAccessWindow`] (the cheap range first-gate), + /// or `None` if there is no current task or it has no bound window. + #[must_use] + pub fn current_user_window(&self) -> Option { + let idx = self.current?.slot().index() as usize; + self.task_user_windows[idx] + } + // ── Private helpers ─────────────────────────────────────────────────────── /// Resolve a capability handle to an [`EndpointHandle`]. @@ -1646,11 +1740,16 @@ mod tests { // Opaque userspace VAs — the fake records but never dereferences them. let user_entry = 0x0080_0000usize; let user_sp = 0x0080_2000usize; + // A capability-table the binding points at — never dereferenced here + // (no syscall is dispatched in this test). + let mut table = CapabilityTable::new(); + let table_ptr: *mut CapabilityTable = core::ptr::addr_of_mut!(table); // SAFETY: `ktop` is one-past a 512-byte, 16-byte-aligned kernel stack // (AlignedStack repr); `FakeCpu::init_user_context` only records, so // `user_entry` / `user_sp` / `ktop` are never dereferenced and no real - // EL0 entry occurs. + // EL0 entry occurs; `table_ptr` is a valid pointer to a stack-local + // table the scheduler only records (gate #3). unsafe { sched .add_user_task( @@ -1660,6 +1759,7 @@ mod tests { user_entry, user_sp, ktop, + table_ptr, ) .unwrap(); }; @@ -1671,6 +1771,13 @@ mod tests { sched.task_address_space_handles[0], Some(BOOTSTRAP_ADDRESS_SPACE_HANDLE) ); + // … and the gate-#3 bindings are recorded: the cap-table pointer + the + // [entry_va, stack_top_va) user-access window. + assert_eq!(sched.task_cap_tables[0], Some(table_ptr)); + assert_eq!( + sched.task_user_windows[0], + Some(UserAccessWindow::new(user_entry, user_sp - user_entry)) + ); assert_eq!(sched.ready.len(), 1); // … but seeded via the EL0 first-entry path (not init_context): the // context carries the user entry / user SP and the kernel stack (its From 6cbb6844c8478286a4ecdcf421dc16883cf1f213 Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 17:45:13 +0300 Subject: [PATCH 2/6] =?UTF-8?q?test(syscall):=20T-025=20gate-#1=20review?= =?UTF-8?q?=20follow-up=20=E2=80=94=20BlockMapped=20copy-path=20test=20+?= =?UTF-8?q?=20AF-check=20audit=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Post-merge security review of gate #1 (PR #39) verdict: confused-deputy closed, all-or-nothing clean, lock-shut decoder, read-only translate, fail-closed — all verified. Two minor findings actioned: (BULGU-1) add copy_from_user_block_mapped_page_faults — the probe maps every translate error (incl. BlockMapped) to FaultAddress, but only the NotMapped arm had a copy-path test; uses the existing BlockMappedMmu decorator. (BULGU-3) note in the UNSAFE-2026-0025 amendment that the AF=0->NotMapped filter (d0e5a17) is a pure read-only guard (no write site / frame alloc / new invariant). Skipped (BULGU-2): Mmu::map allowing DEVICE|USER is not a defect here — the copy-time USER check is correct; the proper fix is at map-time (cap_map rejecting DEVICE|USER), a future B6+ item, and no v1 leaf is DEVICE|USER. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/audits/unsafe-log.md | 2 +- kernel/src/syscall/user_access.rs | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/audits/unsafe-log.md b/docs/audits/unsafe-log.md index 745cbb7..ea8c062 100644 --- a/docs/audits/unsafe-log.md +++ b/docs/audits/unsafe-log.md @@ -617,7 +617,7 @@ Neither change touches the `copy_nonoverlapping` site itself; both correct contr **Amendment (2026-05-15, T-019 commit 7 — review-round 4 follow-up): argument-preflight alignment check closes a preventable root-frame leak path.** PR #31 review-round 4 P2 observed that `load_image` did not validate `image_base_va` alignment in its argument preflight (row 1); an unaligned base surfaced from the first `cap_map` call inside the image-page loop as `MmuError::MisalignedAddress` → `LoadError::MapFailed`. By that point `cap_create_address_space` had already allocated the root L0 frame for the new AS, which then leaked via the v1 baseline rollback (the existing test `rolls_back_on_misaligned_image_base_va` *asserted* this leak with `pmm.stats().free_frames == pmm_before - 1`, accepting it as the documented v1 trade-off). Fix: row 1's argument preflight now checks `image_base_va.0.is_multiple_of(PAGE_SIZE)` and rejects with new variant `LoadError::MisalignedImageBaseVa(VirtAddr)` before any `cap_create_address_space` invocation — PMM byte-stable on rejection (no root-frame leak). This does not change the `copy_nonoverlapping` site itself but eliminates an internal-API-misuse path that consumed a frame per call. The renamed test `rejects_misaligned_image_base_va_with_pmm_byte_stable` now asserts `pmm.stats().free_frames == pmm_before`. `LoadError` taxonomy grows 9 → 10 variants; the exhaustiveness regression test (`load_error_variants_pattern_match_exhaustively`) is updated and would compile-fail if the new variant is silently removed. Two follow-up doc fixes also landed in the same commit (review-round 4 P3): (a) `accepts_image_disjoint_from_pmm_extent` test switched from a heap-allocated `Vec` to a `.rodata`-resident `static [u8; 8]` so disjointness from the PMM extent is structurally guaranteed (the previous silent `if {...} return` skip path is replaced with a hard premise `assert!`); (b) the `LoadError::FrameBudgetExceeded` variant doc-comment was refreshed from "intermediate_budget = 6 is the safe upper bound" (stale since round 3 F1) to a reference to the exact `intermediate_frame_count(...)` helper. -**Amendment (2026-05-31, T-025 / [ADR-0038](../decisions/0038-mmu-translate-and-user-access.md) — a new read-only caller, `QemuVirtMmu::translate`, reuses this walker).** ADR-0038 adds `Mmu::translate` (the realised [ADR-0009](../decisions/0009-mmu-trait.md) walk query) for the syscall copy-user path (gate #1). The aarch64 impl ([`bsp-qemu-virt/src/mmu.rs`](../../bsp-qemu-virt/src/mmu.rs)) reuses **this entry's** `walk_or_alloc_table` in its read-only mode (`unmap = true` + `NullFrameProvider`) for the L0→L2 descent, then a single `core::ptr::read_volatile` of the L3 leaf — **no descriptor is written and no frame is allocated**. The operation is strictly a *subset* of what this entry already audits: the same index-bounded (`< ENTRIES_PER_TABLE`) volatile reads through the high-half direct map (`phys_to_kernel_va`), minus every write; block / absent descriptors surface as `MmuError::BlockMapped` / `NotMapped` exactly as the existing unmap-path read does. The invariants this entry relies on (valid root, index bounds, high-half-direct-map reads) cover the read-only walk verbatim — **no new write site, no new invariant.** Host-tested via `FakeMmu::translate` + the `QemuVirtMmu` read-only walk; the kernel-side per-page copy that consumes it is audited under UNSAFE-2026-0030 (its 2026-05-31 Amendment). +**Amendment (2026-05-31, T-025 / [ADR-0038](../decisions/0038-mmu-translate-and-user-access.md) — a new read-only caller, `QemuVirtMmu::translate`, reuses this walker).** ADR-0038 adds `Mmu::translate` (the realised [ADR-0009](../decisions/0009-mmu-trait.md) walk query) for the syscall copy-user path (gate #1). The aarch64 impl ([`bsp-qemu-virt/src/mmu.rs`](../../bsp-qemu-virt/src/mmu.rs)) reuses **this entry's** `walk_or_alloc_table` in its read-only mode (`unmap = true` + `NullFrameProvider`) for the L0→L2 descent, then a single `core::ptr::read_volatile` of the L3 leaf — **no descriptor is written and no frame is allocated**. The operation is strictly a *subset* of what this entry already audits: the same index-bounded (`< ENTRIES_PER_TABLE`) volatile reads through the high-half direct map (`phys_to_kernel_va`), minus every write; block / absent descriptors surface as `MmuError::BlockMapped` / `NotMapped` exactly as the existing unmap-path read does. The invariants this entry relies on (valid root, index bounds, high-half-direct-map reads) cover the read-only walk verbatim — **no new write site, no new invariant.** Host-tested via `FakeMmu::translate` + the `QemuVirtMmu` read-only walk; the kernel-side per-page copy that consumes it is audited under UNSAFE-2026-0030 (its 2026-05-31 Amendment). **AF filter (follow-on `d0e5a17`):** `QemuVirtMmu::translate` also rejects an L3 leaf whose Access Flag is clear (`AF = 0` → `MmuError::NotMapped`), since such a page would take an Access-Flag fault on a real access. This is a pure **read-only guard** — it tests a bit of the already-read L3 leaf descriptor and adds **no write site, no frame allocation, and no new invariant** (covered by this same read-only-walk amendment; called out here so a reader of the 3-line check finds its rationale). [t-019-ac]: ../analysis/tasks/phase-b/T-019-task-loader.md#acceptance-criteria [UNSAFE-2026-0026]: #unsafe-2026-0026--pmm-frame-zeroing-via-coreptrwrite_bytes-in-pmmalloc_frame diff --git a/kernel/src/syscall/user_access.rs b/kernel/src/syscall/user_access.rs index 9eece7a..8c8e199 100644 --- a/kernel/src/syscall/user_access.rs +++ b/kernel/src/syscall/user_access.rs @@ -329,8 +329,8 @@ pub fn copy_to_user( mod tests { use super::{copy_from_user, copy_to_user, UserAccessWindow}; use crate::syscall::error::SyscallError; - use tyrne_hal::{MappingFlags, PAGE_SIZE}; - use tyrne_test_hal::FakeUserMem; + use tyrne_hal::{MappingFlags, Mmu, PhysAddr, PhysFrame, VirtAddr, PAGE_SIZE}; + use tyrne_test_hal::{BlockMappedMmu, FakeUserMem}; // The translate-based copy resolves a user VA to a `PhysFrame`, rebases it // via `phys_frame_kernel_ptr` (identity on host) and reads / writes the @@ -505,6 +505,26 @@ mod tests { ); } + #[test] + fn copy_from_user_block_mapped_page_faults() { + // A 2 MiB block-mapped leaf (e.g. the bootstrap kernel map) is not a + // 4 KiB user page; `translate` returns `BlockMapped`, which the probe + // maps to `FaultAddress` — the same reject as an unmapped page. Closes + // the copy-path coverage of the probe's `BlockMapped` translate-error + // arm (the `BlockMappedMmu` decorator injects it). + let mmu = BlockMappedMmu::with_blocked([VirtAddr(UVA)]); + // SAFETY: the inner FakeMmu stores `root` without dereferencing it. + let as_ = + unsafe { mmu.create_address_space(PhysFrame::from_aligned(PhysAddr(0x1000)).unwrap()) }; + let window = UserAccessWindow::new(UVA, PAGE_SIZE); + let mut dst = [0u8; 8]; + assert_eq!( + copy_from_user(&mmu, &as_, &window, UVA, &mut dst), + Err(SyscallError::FaultAddress) + ); + assert_eq!(dst, [0; 8], "no byte copied from a block-mapped leaf"); + } + // ── range gate (window) ──────────────────────────────────────────────────── #[test] From 15a9d804e11fc93a8217ee5fbbe03c2dd76dc395 Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 17:56:16 +0300 Subject: [PATCH 3/6] feat(mm): widen AddressSpace::inner() to pub for read-only Mmu::translate consumers (T-026 gate #3 prep) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syscall_entry (gate #3) needs the running task's concrete &M::AddressSpace to pass to the gate-#1 Mmu::translate copy-user path. inner() is &self (shared) so it grants no mutation — all map/unmap still flow through the cap-gated wrappers via inner_mut; exposing the immutable view bypasses no capability check. Refs: T-026 Co-Authored-By: Claude Opus 4.8 (1M context) --- kernel/src/mm/address_space.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index bb00ab6..a1208ba 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -130,21 +130,18 @@ impl AddressSpace { mmu.address_space_root(&self.inner) } - /// Return a reference to the BSP-specific inner value. + /// Return a **shared** reference to the BSP-specific inner value. /// - /// Crate-internal: the activation hook (T-018 commit 4) uses - /// this to pass `&Mmu::AddressSpace` to [`Mmu::activate`] on - /// the context-switch path. Outside code accesses an - /// `AddressSpace` only through the cap-gated surface, - /// never through this accessor directly. + /// `pub` for read-only consumers that need the concrete `&M::AddressSpace` + /// the [`Mmu`] trait's by-shared-ref methods take — notably the BSP + /// `syscall_entry` passing the running task's address space to the gate-#1 + /// `Mmu::translate` copy-user path (gate #3 / T-026), and the activation + /// hook passing it to [`Mmu::activate`]. It is `&self` (shared), so it + /// grants **no mutation**: all `map` / `unmap` still flow only through the + /// cap-gated wrappers via the crate-internal [`inner_mut`][Self::inner_mut]. + /// Exposing the immutable view therefore bypasses no capability check. #[must_use] - #[allow( - dead_code, - reason = "T-018 commit 4 (activation hook in yield_now) is the first \ - caller; landed for module-shape completeness so commit 4 \ - adds only the scheduler-side hook, not the accessor surface" - )] - pub(crate) const fn inner(&self) -> &M::AddressSpace { + pub const fn inner(&self) -> &M::AddressSpace { &self.inner } From 385f4b495e6f1169c21c835664198cd267fbec57 Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 18:28:04 +0300 Subject: [PATCH 4/6] =?UTF-8?q?feat(syscall):=20T-026=20step=20B+C=20?= =?UTF-8?q?=E2=80=94=20gate=20#3=20current-task=20cap-table=20sourcing=20+?= =?UTF-8?q?=20fail-closed=20(B6=20gate=20#3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syscall_entry sources the running EL0 task's capability table + address space + user-access window from SCHED.current (scheduler accessors landed in step A), failing closed to the empty FAILCLOSED_TABLE + empty window when no task is current. AddressSpace::inner() widened to pub (read-only) so the BSP can pass the task's &QemuVirtAddressSpace to gate-#1 Mmu::translate. The H2 control-plane fail-closed lands in the dispatcher via SyscallContext.has_current_task (host-testable; the BSP can't construct the #[non_exhaustive] SyscallError): task_yield/task_exit -> InvalidHandle when no task is current. The +0x200 smoke is re-sequenced after SCHED init (current=None) and now demonstrates gate-#3 fail-closed (console_write -> InvalidHandle 0x102); SYSCALL_STUB_TABLE retired -> the empty FAILCLOSED_TABLE. Closes the last T-021 carry-forward gate (#1 T-025 + #2 T-023 + #3 T-026 all done). Gates: fmt; host+kernel clippy -D warnings; host tests kernel 257 / hal 46 / test-hal 58 / 3 doc (new: current_accessors_resolve_running_task_bindings_or_none, task_{yield,exit}_with_no_current_task_fails_closed); kernel build; QEMU smoke PASS (2 SVC, clean ERET, zero new fault class); Miri 0 UB. Mandatory UNSAFE-2026-0014 Amendment (cap-table deref) + UNSAFE-2026-0029 Amendment (statics + smoke re-sequence). Refs: T-026 Co-Authored-By: Claude Opus 4.8 (1M context) --- bsp-qemu-virt/src/main.rs | 143 ++++++++--------- bsp-qemu-virt/src/syscall.rs | 150 ++++++++++-------- .../phase-b/T-026-current-task-cap-table.md | 21 +-- docs/audits/unsafe-log.md | 4 + docs/roadmap/current.md | 2 + docs/roadmap/phases/phase-b.md | 2 +- kernel/src/sched/mod.rs | 52 ++++++ kernel/src/syscall/dispatch.rs | 111 ++++++++++++- 8 files changed, 325 insertions(+), 160 deletions(-) diff --git a/bsp-qemu-virt/src/main.rs b/bsp-qemu-virt/src/main.rs index 496eb58..0a7bbbe 100644 --- a/bsp-qemu-virt/src/main.rs +++ b/bsp-qemu-virt/src/main.rs @@ -415,15 +415,20 @@ static EP_CAP_A: StaticCell = StaticCell::new(); /// Task B's endpoint capability handle (index into `TABLE_B`). static EP_CAP_B: StaticCell = StaticCell::new(); -// ─── T-021 syscall-boundary smoke ───────────────────────────────────────────── - -/// The EL1 kernel-stub's capability table — the `caller_table` the syscall -/// dispatcher resolves capabilities in for the B5 `SVC` smoke (see -/// [`syscall::syscall_entry`]). In B5 the only `SVC` comes from a kernel-stub, -/// so it has a dedicated table holding a single debug-console capability; -/// B6 replaces this with the scheduler's current-task table once a real EL0 -/// task exists. Distinct from `TABLE_A` / `TABLE_B` (the IPC-demo tables). -static SYSCALL_STUB_TABLE: StaticCell = StaticCell::new(); +// ─── Syscall-boundary fail-closed fallback table (T-026 / gate #3) ──────────── + +/// The **empty** capability table the syscall dispatcher resolves against when +/// [`syscall::syscall_entry`] cannot resolve a running EL0 task from the +/// scheduler (its `current_user_table()` returns `None`) — the **fail-closed** +/// default for gate #3 (T-026). Every cap lookup against it returns +/// `CapError::InvalidHandle`, so a syscall issued with no running task names no +/// capability — never an ambient table (the [ADR-0014] per-subject +/// unforgeability holds). It is **never minted into**: a real EL0 task brings +/// its own table, recorded in the scheduler by `add_user_task` and dereferenced +/// by `syscall_entry`. Distinct from `TABLE_A` / `TABLE_B` (the IPC-demo tables). +/// +/// [ADR-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +static FAILCLOSED_TABLE: StaticCell = StaticCell::new(); /// Task kernel-object arena — global per [ADR-0016]. Although the v1 demo /// never reads this arena after `create_task` has returned the two @@ -694,84 +699,67 @@ fn task_a() -> ! { } } -// ─── T-021 syscall-boundary smoke ────────────────────────────────────────────── +// ─── Syscall-boundary smoke — gate #3 fail-closed (T-026) ───────────────────── -/// EL1 kernel-stub `SVC` smoke for the B5 syscall boundary ([T-021]). +/// EL1 `SVC` smoke for the syscall boundary, demonstrating **gate #3 +/// fail-closed** ([T-026]). /// -/// Issues two `SVC #0` traps **from EL1** — exercising the current-EL -/// `VBAR_EL1 + 0x200` sync vector and the full save → decode → dispatch → -/// `ERET` round-trip (an `SVC` issued at EL1 cannot take the lower-EL `+0x400` -/// vector; that real-EL0 path is B6's smoke per [ADR-0030 §Simulation]): +/// Issues two `SVC #0` traps from EL1 (the current-EL `VBAR_EL1 + 0x200` sync +/// vector — an EL1 `SVC` cannot take the lower-EL `+0x400` path; the real-EL0 +/// round-trip is the B6 wire-up). It runs **after `SCHED` is published but +/// before `start()`**, so `SCHED.current` is `None` — no running EL0 task. The +/// dispatcher therefore **fails closed**: with no current task it resolves +/// capabilities against the empty [`FAILCLOSED_TABLE`] and bounds user buffers +/// with an empty window, so a syscall carries **no authority**: /// -/// 1. **`console_write`** (number `5`) through a granted debug-console -/// capability — the dispatcher's capability check passes, `copy_from_user` -/// validates the buffer against the active address space, and the bytes are -/// emitted on the serial console (the round-trip + emitted-bytes half of B5 -/// acceptance criterion #7). -/// 2. a **reserved-invalid number** (`0`) — the panic-free error path returns -/// `SyscallError::BadSyscallNumber` (status `0x1`) without touching any -/// capability. +/// 1. **`console_write`** (number `5`) → the cap (any handle) is looked up in +/// the empty table → `SyscallError::Cap(InvalidHandle)` (status `0x102`), +/// nothing emitted. Gate #1's per-page `Mmu::translate` boundary (T-025) is +/// never reached — the cap gate rejects first; the positive copy path is +/// host-tested and runs at the B6 wire-up. +/// 2. a **reserved-invalid number** (`0`) → `SyscallError::BadSyscallNumber` +/// (status `0x1`), panic-free, capability untouched. /// -/// Runs after the IPC statics are published (the dispatcher's -/// [`SyscallContext`][tyrne_kernel::syscall::SyscallContext] borrows -/// `EP_ARENA` / `IPC_QUEUES`) and before `start()`. `task_yield` / `task_exit` -/// are not driven here — their dispatcher routing is host-tested; their real -/// EL0 semantics land in B6. +/// Both `ERET` cleanly (the `SVC` mechanism is exercised); neither over-grants. +/// `task_yield` / `task_exit` are not driven here — their gate-#3 control-plane +/// fail-closed is host-tested. This supersedes the B5 "stub mints a console cap +/// and emits a greeting" smoke: post-gate-#3 a syscall with no running task is +/// rejected, which is the security property worth demonstrating. /// -/// [T-021]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-021-syscall-dispatch.md -/// [ADR-0030 §Simulation]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0030-syscall-abi.md +/// [T-026]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-026-current-task-cap-table.md #[allow( clippy::cast_possible_truncation, reason = "Tyrne's BSP target is 64-bit aarch64; pointer/usize → u64 \ register-word casts are lossless" )] fn syscall_boundary_smoke(console: &Pl011Uart) { - // Mint a debug-console capability into the kernel-stub's table. + // Initialise the empty fail-closed fallback table the dispatcher resolves + // against when no EL0 task is current (gate #3). Never minted into. // - // SAFETY: `SYSCALL_STUB_TABLE` lives in `.bss`; this is its single write, - // performed before any `SVC` issues. The momentary `&mut` for the - // `insert_root` drops before the trap. Audit: UNSAFE-2026-0010 (StaticCell) - // + UNSAFE-2026-0014 (momentary `&mut`). - let cons_cap = unsafe { - (*SYSCALL_STUB_TABLE.0.get()).write(CapabilityTable::new()); - let table = (*SYSCALL_STUB_TABLE.0.get()).assume_init_mut(); - table - .insert_root(Capability::new( - CapRights::CONSOLE_WRITE, - CapObject::DebugConsole, - )) - .expect("debug-console cap mint in empty table cannot fail") - }; - let cons_cap_word = tyrne_kernel::syscall::encode_cap_handle(Some(cons_cap)); + // SAFETY: `FAILCLOSED_TABLE` lives in `.bss`; this is its single write, + // before any `SVC` issues. Audit: UNSAFE-2026-0010 (StaticCell pattern). + unsafe { + (*FAILCLOSED_TABLE.0.get()).write(CapabilityTable::new()); + } - // (1) console_write via SVC: x8 = 5, x0 = cap, x1 = buffer VA, x2 = length. - // - // Gate #1 demonstration (ADR-0038 / T-025): the stub passes a **kernel** - // .rodata VA. The dispatcher's per-page `Mmu::translate` resolves it through - // the bootstrap AS — whose low-identity table maps no `USER` page — so the - // copy is rejected with `SyscallError::FaultAddress` and **nothing is - // emitted**. This is the confused-deputy defence working: even a holder of a - // valid debug-console cap cannot make the kernel copy a non-user (kernel) VA - // to the console. (A real EL0 task with a genuine USER buffer is the B6 - // wire-up; before T-025 this same `SVC` emitted the greeting via the old - // identity-map deref. The mechanism is exhaustively host-tested — see the - // kernel `syscall::user_access` / `syscall::dispatch` tests.) - let kernel_buf: &[u8] = b"tyrne: (gate #1 rejects this kernel-VA buffer)\n"; - let ptr = kernel_buf.as_ptr() as u64; - let len = kernel_buf.len() as u64; + // (1) console_write via SVC with no current task → fail-closed InvalidHandle. + // The cap word (`0`) and the buffer are irrelevant: with `SCHED.current == + // None` the dispatcher resolves against the empty `FAILCLOSED_TABLE`, so the + // cap lookup rejects before the window / per-page translate is ever reached. + let buf: &[u8] = b"tyrne: (no current task; gate #3 fail-closed)\n"; + let cap_word = 0u64; let status: u64; - let written: u64; // SAFETY: `SVC #0` traps to the EL1 current-EL sync vector (+0x200), runs // the panic-free dispatcher, and `ERET`s back here. x8 = number, x0..x2 = - // args; the handler writes x0 = status, x1 = bytes written, clobbers - // x0..x7, preserves x8..x30 + SP_EL0. Audit: UNSAFE-2026-0029. + // args; the handler writes x0 = status, clobbers x0..x7, preserves + // x8..x30 + SP_EL0. Audit: UNSAFE-2026-0029. unsafe { core::arch::asm!( "svc #0", in("x8") 5u64, - inout("x0") cons_cap_word => status, - inout("x1") ptr => written, - in("x2") len, + inout("x0") cap_word => status, + inout("x1") buf.as_ptr() as u64 => _, + in("x2") buf.len() as u64, out("x3") _, out("x4") _, out("x5") _, @@ -803,7 +791,7 @@ fn syscall_boundary_smoke(console: &Pl011Uart) { let mut w = FmtWriter(console); let _ = writeln!( w, - "tyrne: syscall smoke ok (gate #1 rejected kernel-VA console_write: status={status:#x} bytes={written}; bad-number status={bad_status:#x})" + "tyrne: syscall smoke ok (gate #3 fail-closed — no current task: console_write status={status:#x}; bad-number status={bad_status:#x})" ); } @@ -1535,15 +1523,6 @@ extern "C" fn kernel_main_high() -> ! { (*EP_CAP_B.0.get()).write(ep_cap_b); } - // ── Syscall-boundary smoke — T-021 ──────────────────────────────────────── - // - // Exercise the EL0→EL1 `SVC` trap → panic-free dispatcher → `ERET` - // round-trip via an EL1 kernel-stub (the current-EL `+0x200` vector). Runs - // here, after the IPC statics the dispatcher's context borrows are live, and - // before `start()` hands control to the cooperative demo. The real EL0 - // (`+0x400`) round-trip is B6's smoke. - syscall_boundary_smoke(console); - // ── Scheduler setup ─────────────────────────────────────────────────────── let mut sched = Scheduler::::new(); @@ -1600,6 +1579,16 @@ extern "C" fn kernel_main_high() -> ! { (*SCHED.0.get()).write(sched); } + // ── Syscall-boundary smoke — gate #3 fail-closed (T-026) ────────────────── + // + // Sequenced **after `SCHED` is published** (above) but **before `start()`**: + // `syscall_entry` now sources the caller's table / AS / window from + // `SCHED.current` (gate #3), so `SCHED` must be initialised — and `current` + // is `None` here (the scheduler is published but not started), which is + // exactly the fail-closed case this smoke demonstrates. The real EL0 + // `+0x400` round-trip (with a running task) is the B6 wire-up. + syscall_boundary_smoke(console); + console.write_bytes(b"tyrne: starting cooperative scheduler\n"); // Transfer control to Task B (the first ready task). Does not return. diff --git a/bsp-qemu-virt/src/syscall.rs b/bsp-qemu-virt/src/syscall.rs index 33783bf..1b22161 100644 --- a/bsp-qemu-virt/src/syscall.rs +++ b/bsp-qemu-virt/src/syscall.rs @@ -14,22 +14,25 @@ //! [`tyrne_kernel::syscall::dispatch`], and applies the returned //! [`SyscallEffect`] by writing the status + payload back into the frame. //! -//! ## B5 scope and the `0x200` / `0x400` split +//! ## The `0x200` / `0x400` split //! //! The shared trampoline is installed at **both** sync vector slots — current-EL //! (`VBAR_EL1 + 0x200`) and lower-EL-AArch64 (`VBAR_EL1 + 0x400`) — because the -//! save → dispatch → `ERET` mechanism is privilege-entry-agnostic. In B5 the -//! only `SVC` comes from an **EL1 kernel-stub** (see `kernel_entry`'s syscall -//! smoke), which — executing at the *current* EL — takes the `0x200` vector, -//! **not** the lower-EL `0x400` vector. A real EL0 task taking the `0x400` -//! vector (with the EL0↔EL1 privilege transition and copy-user against a -//! separate userspace `TTBR0_EL1`) is verified at runtime in **B6**, per +//! save → dispatch → `ERET` mechanism is privilege-entry-agnostic. The only +//! `SVC` today comes from the [`crate::syscall_boundary_smoke`] EL1 stub, which — +//! executing at the *current* EL — takes the `0x200` vector. A real EL0 task +//! taking the `0x400` vector (with the EL0↔EL1 privilege transition and copy-user +//! against a separate userspace `TTBR0_EL1`) is the B6 wire-up, per //! [ADR-0030 §Simulation row-to-verification mapping][adr-0030]. The `0x400` -//! handler is installed now so B6 adds only the EL0 task, not new trap plumbing. +//! handler is installed now so the wire-up adds only the EL0 task, not new trap +//! plumbing. //! -//! `caller_table` is a dedicated **kernel-stub** capability table in B5 -//! ([`crate::SYSCALL_STUB_TABLE`]); B6 replaces it with the scheduler's -//! current-task table once a real EL0 task exists. +//! `caller_table` is sourced per-syscall from the **scheduler's running task** +//! (gate #3 / T-026): `syscall_entry` resolves the current task's own capability +//! table, address space, and user-access window from `SCHED.current`, and **fails +//! closed** when no task is current — the empty [`crate::FAILCLOSED_TABLE`] (every +//! lookup → `InvalidHandle`) + an empty window, so a syscall with no running task +//! names no capability and copies no byte. //! //! Audit: UNSAFE-2026-0029 (the trap-frame asm + this entry's frame //! reads/writes). @@ -85,29 +88,9 @@ pub struct SyscallTrapFrame { // the build before that can ship. (Mirrors the `TrapFrame` 192-byte guard.) const _: () = assert!(core::mem::size_of::() == 272); -/// Length of the syscall copy-from/to-user window in B5: the whole RAM extent, -/// reached through the kernel's high-half direct map (post-T-022 / ADR-0033). -/// -/// The B5 EL1 kernel-stub executes in the high half; its buffer — a -/// `.rodata`-resident `&[u8]` in the kernel image — is reachable at its -/// high-half VA, so the window base is `phys_to_kernel_va(PMM_EXTENT_START)` -/// (see [`syscall_entry`]) and the stub buffer is in range. Because the -/// stub's "user" pointer **is** a valid kernel VA, the dispatcher's direct -/// deref works for the stub; B6's real EL0 task instead lives at a *user* VA -/// in its own `TTBR0_EL1`, so B6 derives a tighter per-task window AND -/// replaces the direct deref with a per-page user-VA→kernel-VA translation -/// (T-021 carry-forward gate #1 — see [`UserAccessWindow`]'s module docs). -/// The subtraction is a `const`, so it -/// cannot wrap at runtime: const-eval rejects an underflow at **build time** -/// (an inverted extent is a hard compile error, never a release wrap). The -/// explicit assertion below makes that invariant — and its failure message — -/// unambiguous rather than relying on a raw "subtract with overflow" const-eval -/// error. -const _: () = assert!( - crate::PMM_EXTENT_END >= crate::PMM_EXTENT_START, - "PMM extent must be non-inverted: PMM_EXTENT_END >= PMM_EXTENT_START" -); -const SYSCALL_USER_WINDOW_LEN: usize = crate::PMM_EXTENT_END - crate::PMM_EXTENT_START; +// The B5 whole-RAM-extent `SYSCALL_USER_WINDOW_LEN` is gone: post-gate-#3 +// (T-026) the user-access window is **per task**, sourced from the scheduler's +// current task (`current_user_window()`), not a fixed extent — see `syscall_entry`. /// Rust entry for the `SVC` sync trampoline (`vectors.s`). /// @@ -127,22 +110,33 @@ const SYSCALL_USER_WINDOW_LEN: usize = crate::PMM_EXTENT_END - crate::PMM_EXTENT /// register frame through a raw `*mut SyscallTrapFrame` (the asm calling /// convention passes a pointer, not a `&mut`), and it materialises momentary /// references to the write-once BSP statics via `assume_init_{mut,ref}`. -/// **Invariants upheld.** (1) The four statics it reaches -/// (`EP_ARENA` / `IPC_QUEUES` / `SYSCALL_STUB_TABLE` / `CONSOLE`) are all -/// written before the syscall smoke issues any `SVC`; (2) v1 is single-core and -/// the `SVC` handler runs with interrupts masked (exception entry masks `DAIF`), -/// so no peer aliases them mid-call; (3) the momentary `&mut`s are scoped to the +/// **Invariants upheld.** (1) The statics it reaches (`SCHED` / `EP_ARENA` / +/// `IPC_QUEUES` / `CONSOLE` / `MMU` / `AS_ARENA` / `FAILCLOSED_TABLE` / +/// `BOOTSTRAP_AS`) are all written before the syscall smoke issues any `SVC` — +/// the smoke is sequenced *after* `SCHED` is published, so `SCHED.current` reads +/// a valid (empty, not-yet-started) scheduler; (2) v1 is single-core and the +/// `SVC` handler runs with interrupts masked (exception entry masks `DAIF`), so +/// no peer aliases them mid-call; (3) the momentary `&mut`s are scoped to the /// single `dispatch` call and do not cross a context switch — the data-plane /// syscalls do not switch and the control-plane ones return a directive *before* /// any switch, honouring the [ADR-0021] discipline; (4) the frame writes touch /// only `x0`–`x7`, leaving the trampoline's restore of `x8`–`x30` + `SP_EL0` + -/// `ELR_EL1` + `SPSR_EL1` intact. **Rejected alternatives.** Passing a `&mut +/// `ELR_EL1` + `SPSR_EL1` intact; (5) **gate #3 (M4):** with a current EL0 task, +/// `caller_table` is `&mut *current_user_table()` — a momentary `&mut` to the +/// task's own capability table (a BSP static recorded by `add_user_task` via the +/// [ADR-0021] raw-pointer bridge), lexically contained to this one `dispatch` +/// call and never crossing a switch; with no current task it is the empty +/// `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`) and `task_as` the +/// never-dereferenced bootstrap-AS placeholder behind an empty window, so a +/// syscall with no running task names no capability and copies no byte +/// (UNSAFE-2026-0014 Amendment). **Rejected alternatives.** Passing a `&mut /// SyscallTrapFrame` from the asm is impossible (asm has no Rust references); /// holding the BSP statics behind a lock would deadlock the interrupts-masked /// handler with no soundness gain under single-core cooperative semantics. /// /// Audit: UNSAFE-2026-0029 (trap-frame asm + frame access) + UNSAFE-2026-0010 -/// (`StaticCell` pattern) + UNSAFE-2026-0014 (momentary `&mut` to kernel state). +/// (`StaticCell` pattern) + UNSAFE-2026-0014 (momentary `&mut` to kernel state, +/// incl. the gate-#3 cap-table-pointer deref). #[unsafe(no_mangle)] pub unsafe extern "C" fn syscall_entry(frame: *mut SyscallTrapFrame) { // SAFETY: `frame` is valid per the trampoline contract above; read the @@ -158,29 +152,46 @@ pub unsafe extern "C" fn syscall_entry(frame: *mut SyscallTrapFrame) { } }; - // SAFETY: build the dispatch context from the write-once BSP statics. All - // four are initialised in `kernel_entry` before the syscall smoke runs; - // single-core + interrupts-masked-in-handler means no aliasing; the - // momentary `&mut`s drop at the end of the `dispatch` call and never cross a - // switch. Audit: UNSAFE-2026-0010 (StaticCell) + UNSAFE-2026-0014 (momentary - // `&mut` to kernel state) + UNSAFE-2026-0029 (the syscall arc). + // SAFETY: build the dispatch context from the **running EL0 task's** + // bindings, sourced from the scheduler (gate #3 / T-026), or the FAIL-CLOSED + // default when no task is current. `SCHED` / `EP_ARENA` / `IPC_QUEUES` / + // `CONSOLE` / `MMU` / `AS_ARENA` / `FAILCLOSED_TABLE` / `BOOTSTRAP_AS` are all + // published before the (post-`SCHED`-init) smoke runs; single-core + + // interrupts-masked ⇒ no aliasing; the momentary `&mut`s drop at the end of + // `dispatch` and never cross a switch. The `&mut *table_ptr` is the gate-#3 + // cap-table dereference (M4 — UNSAFE-2026-0014 Amendment; see this fn's + // `# Safety`). Audit: UNSAFE-2026-0010 + UNSAFE-2026-0014 + UNSAFE-2026-0029. let effect = unsafe { + let sched = (*crate::SCHED.0.get()).assume_init_ref(); + let current_table = sched.current_user_table(); + let current_as = sched.current_address_space_handle(); + let current_window = sched.current_user_window(); + + // task_as: the running task's address space (read-only, for the gate-#1 + // `Mmu::translate`), or the bootstrap AS as a harmless placeholder when + // fail-closed (the empty window rejects every non-zero copy first). A + // stale / absent AS handle also falls back. + let arena = (*crate::AS_ARENA.0.get()).assume_init_ref(); + let task_as = match current_as.and_then(|h| tyrne_kernel::mm::get_address_space(arena, h)) { + Some(asp) => asp.inner(), + None => (*crate::BOOTSTRAP_AS.0.get()).assume_init_ref(), + }; + // caller_table: the running task's own recorded table, or the empty + // FAILCLOSED_TABLE (every lookup → InvalidHandle) when no task is current. + let caller_table = match current_table { + Some(table_ptr) => &mut *table_ptr, + None => (*crate::FAILCLOSED_TABLE.0.get()).assume_init_mut(), + }; + let mut ctx = SyscallContext { ep_arena: (*crate::EP_ARENA.0.get()).assume_init_mut(), queues: (*crate::IPC_QUEUES.0.get()).assume_init_mut(), - caller_table: (*crate::SYSCALL_STUB_TABLE.0.get()).assume_init_mut(), + caller_table, console: (*crate::CONSOLE.0.get()).assume_init_ref(), - user_window: UserAccessWindow::new( - tyrne_hal::phys_to_kernel_va(crate::PMM_EXTENT_START), - SYSCALL_USER_WINDOW_LEN, - ), - // Gate #1 (ADR-0038): the per-page `Mmu::translate` source. In B5 - // this is the bootstrap AS the EL1 stub runs in — whose low-identity - // table maps no `USER` page, so a stub `console_write` of a kernel - // VA is correctly rejected (`FaultAddress`). B6 / gate #3 (T-026) - // sources the running EL0 task's AS from the scheduler instead. + user_window: current_window.unwrap_or_else(UserAccessWindow::empty), mmu: (*crate::MMU.0.get()).assume_init_ref(), - task_as: (*crate::BOOTSTRAP_AS.0.get()).assume_init_ref(), + task_as, + has_current_task: current_table.is_some(), }; dispatch(&mut ctx, args) }; @@ -203,23 +214,24 @@ pub unsafe extern "C" fn syscall_entry(frame: *mut SyscallTrapFrame) { } } SyscallEffect::Reschedule => { - // task_yield. v1 B5 stand-in: there is no scheduler-resident EL0 - // task issuing this (the smoke runs the stub before `start()`), so - // the real `yield_now` wiring lands in B6 once the caller is an EL0 - // task. The dispatcher-level routing (number 3 → Reschedule) is - // host-tested; here we resume with `Ok` (x0 = 0) — task_yield - // "always succeeds in v1" per ADR-0031. + // task_yield. Post-gate-#3 the dispatcher returns `Reschedule` only + // when an EL0 task is current (`has_current_task`); with no current + // task it returns `Resume(InvalidHandle)` instead (handled above). + // So this arm is reached only for a real running task — **dormant** + // until the B6 wire-up (the smoke issues no control-plane `SVC`). + // The v1 stand-in resumes `Ok` (x0 = 0) — task_yield "always succeeds + // in v1" per ADR-0031; real `yield_now` wiring lands in the wire-up. // SAFETY: write x0 only. Audit: UNSAFE-2026-0029. unsafe { (*frame).x0_x1[0] = tyrne_kernel::syscall::OK_STATUS; } } SyscallEffect::Terminate(_code) => { - // task_exit. The ABI says "does not return", but v1 has no EL0 - // context register file to drop — real termination lands in B6. The - // dispatcher-level routing (number 4 → Terminate) is host-tested; - // here we defensively resume with `Ok` so a stray kernel-stub - // task_exit cannot wedge the boot before B6 wires real termination. + // task_exit. As with `Reschedule`, reached only with a current task + // (gate #3 rejects it otherwise) — **dormant** until the B6 wire-up. + // The ABI says "does not return", but v1 has no EL0 context to drop; + // the v1 stand-in resumes `Ok` so a stray `task_exit` cannot wedge + // the boot before the wire-up lands real termination. // SAFETY: write x0 only. Audit: UNSAFE-2026-0029. unsafe { (*frame).x0_x1[0] = tyrne_kernel::syscall::OK_STATUS; diff --git a/docs/analysis/tasks/phase-b/T-026-current-task-cap-table.md b/docs/analysis/tasks/phase-b/T-026-current-task-cap-table.md index 566a4b8..9171d06 100644 --- a/docs/analysis/tasks/phase-b/T-026-current-task-cap-table.md +++ b/docs/analysis/tasks/phase-b/T-026-current-task-cap-table.md @@ -2,7 +2,7 @@ - **Phase:** B - **Milestone:** B6 — First userspace "hello" (step 4b of the [B6 dependency-ordered sequence](../../../roadmap/phases/phase-b.md#milestone-b6--first-userspace-hello); closes [T-021 carry-forward **gate #3**](../../../roadmap/phases/phase-b.md#t-021-carry-forward-gates-must-close-before-a-real-el0-task-runs)) -- **Status:** Draft (sibling of [T-025](T-025-user-access-translation.md); opened in the [ADR-0038](../../../decisions/0038-mmu-translate-and-user-access.md) Propose commit because that ADR's dependency chain names it — [ADR-0025 §Rule 1](../../../decisions/0025-adr-governance-amendments.md). **No ADR of its own.**) +- **Status:** In Review (implemented on `t-026-current-task-cap-table` off the merged T-025 `f21eece`; **no ADR of its own** — rides ADR-0030/0021/0014; all gates green incl. Miri; **security-relevant — awaiting the explicit EL0-boundary security review per Definition of done**) - **Created:** 2026-05-31 - **Author:** @cemililik (+ Claude Opus 4.8 agent) - **Dependencies:** [T-025](T-025-user-access-translation.md) (the translate-based, ``-generic `SyscallContext` this sources a real task's AS + capability table into); [ADR-0030](../../../decisions/0030-syscall-abi.md) (§Dependency-chain step 7 already names "`SYSCALL_STUB_TABLE` → scheduler current-task table" — this task needs **no new ADR**); [ADR-0021](../../../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) (the raw-pointer scheduler-bridge discipline the per-task `*mut CapabilityTable` rides); [ADR-0014](../../../decisions/0014-capability-representation.md) (per-subject table unforgeability — the property gate #3 preserves); [ADR-0037](../../../decisions/0037-el0-entry-context.md) (`add_user_task` — the registration site the cap-table binding is added to); [ADR-0028](../../../decisions/0028-address-space-data-structure.md) (the `AddressSpaceHandle` → `AddressSpace` lookup the window derivation uses). @@ -21,15 +21,15 @@ The scheduler tracks the current task (`Scheduler::current: Option`) ## Acceptance criteria -- [ ] **Scheduler task→table binding** ([`kernel/src/sched/mod.rs`](../../../../kernel/src/sched/mod.rs)): a per-slot `task_cap_tables: [Option<*mut CapabilityTable>; TASK_ARENA_CAPACITY]` (mirroring `task_address_space_handles`; **raw pointer — tables stay BSP-owned**, no ownership transfer, [ADR-0021](../../../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) discipline), written by `add_user_task` (new `cap_table: *mut CapabilityTable` parameter). **Not** embedded in `Task` (keeps `Task` minimal — `id + address_space_handle`). -- [ ] **Per-task window storage** ([`kernel/src/sched/mod.rs`](../../../../kernel/src/sched/mod.rs)): a parallel `task_user_windows: [Option; TASK_ARENA_CAPACITY]` (mirroring `task_address_space_handles`), built by `add_user_task` from its **existing** `user_entry`/`user_sp` params — the `[entry_va, stack_top_va)` image+stack span, `UserAccessWindow::new(user_entry, user_sp - user_entry)` per [T-025](T-025-user-access-translation.md). **Today the `LoadedImage` span is persisted nowhere `syscall_entry` can reach** (`Task` holds only `id + address_space_handle`; the scheduler stores no window; `task_create_from_image` does not read `entry_va`/`stack_top_va`), so this storage is genuinely new — without it the per-task window cannot be reconstructed at syscall time. -- [ ] **Scheduler accessors:** `current_user_table(&self) -> Option<*mut CapabilityTable>`, `current_address_space_handle(&self) -> Option`, and `current_user_window(&self) -> Option` — resolve `current` → slot index → the parallel arrays; `None` when there is no current task or the slot is unregistered (the caller maps `None` to the fail-closed default per the Fail-closed AC, so the accessors stay symmetric — no in-accessor `empty()` substitution). -- [ ] **`syscall_entry` rewire** ([`bsp-qemu-virt/src/syscall.rs`](../../../../bsp-qemu-virt/src/syscall.rs)): source `caller_table` from `SCHED.current_user_table()` (not `SYSCALL_STUB_TABLE`), the AS from `current_address_space_handle()` → `AS_ARENA`, and the per-task `UserAccessWindow` from `SCHED.current_user_window()`; pass `mmu` + `task_as` into the `` `SyscallContext` ([T-025](T-025-user-access-translation.md)). -- [ ] **Fail-closed** (security-critical, never weaken): if `current` is `None`, the slot has no bound table, or the AS handle is stale/absent, `syscall_entry` **must not** fall back to `SYSCALL_STUB_TABLE` or any ambient table. It dispatches with an **empty** `CapabilityTable` (every lookup → `CapError::InvalidHandle`) + `UserAccessWindow::empty()` (every non-zero copy → `FaultAddress`). No path resolves a capability in a table other than the verified current task's own ([ADR-0014](../../../decisions/0014-capability-representation.md) preserved). **Control-plane syscalls require a stronger rule:** `task_yield`/`task_exit` return `Reschedule`/`Terminate` directly in the dispatcher ([`dispatch.rs`](../../../../kernel/src/syscall/dispatch.rs)) and **never consult** `ctx.caller_table`/`ctx.user_window`, so the empty-table/empty-window default does **not** protect them — for these `syscall_entry` **must short-circuit to `SyscallError::InvalidHandle` before dispatch** when `current` is `None` or unbound (the trust-boundary check [ADR-0031](../../../decisions/0031-initial-syscall-set.md) demands: "is there a valid current task?"). Data-plane syscalls (`send`/`recv`/`console_write`) take the empty-table/empty-window path. -- [ ] **Dispatcher unchanged:** [`dispatch`](../../../../kernel/src/syscall/dispatch.rs) resolves only against `ctx.caller_table` / `ctx.user_window` / `ctx.mmu` — only the BSP's *source* of those moves. Panic-free, host-tested behaviour intact. -- [ ] **Host tests:** `add_user_task` records the cap-table pointer; `current_user_table()` / `current_address_space_handle()` return the running task's binding and `None` for no-current / unregistered slot; a fake-current-task dispatch resolves a cap **only** in that task's table (a cap absent from it → `InvalidHandle`); the no-current-task path yields the fail-closed empty-table/empty-window outcome (`FaultAddress`/`InvalidHandle`), never an ambient grant. **Control-plane fail-closed:** a no-current-task `task_yield` SVC → `InvalidHandle` (not `Reschedule`); a no-current-task `task_exit` SVC → `InvalidHandle` (not `Terminate`). -- [ ] **UNSAFE-2026-0014 Amendment (mandatory, not conditional):** the `syscall_entry` rewire dereferences `task_cap_tables[idx]` (a `*mut CapabilityTable`) to a momentary `&mut CapabilityTable` — a **new dereference site that broadens the raw-pointer surface**, so a dated Amendment to [UNSAFE-2026-0014](../../../audits/unsafe-log.md) is required (per [unsafe-policy](../../../standards/unsafe-policy.md): introducing/broadening an `unsafe` region demands an audit-log entry/update). It records: (1) the `&mut` scope is lexically contained to the one `dispatch` call and never crosses `cpu.context_switch`; (2) pointer validity is established by `add_user_task` and re-checked at `syscall_entry` (fail-closed on unregistered/stale slot); (3) the site follows the existing UNSAFE-2026-0014 discipline (irq-entry / start-prelude / AS-activation); (4) **second-reviewer security review required** (capability-table sourcing is security-sensitive, [unsafe-policy §Review](../../../standards/unsafe-policy.md)). -- [ ] **All gates green:** host tests (+N), host + kernel clippy `-D warnings`, `cargo fmt --check`, kernel build, Miri. **QEMU smoke:** the dormant `+0x200` smoke is re-seeded with a fake current task (or retired in favour of the B6 `+0x400` EL0 smoke); documented in the PR. `SYSCALL_STUB_TABLE` retired from the real-EL0 path. +- [x] **Scheduler task→table binding** ([`kernel/src/sched/mod.rs`](../../../../kernel/src/sched/mod.rs)): a per-slot `task_cap_tables: [Option<*mut CapabilityTable>; TASK_ARENA_CAPACITY]` (mirroring `task_address_space_handles`; **raw pointer — tables stay BSP-owned**, no ownership transfer, [ADR-0021](../../../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) discipline), written by `add_user_task` (new `cap_table: *mut CapabilityTable` parameter). **Not** embedded in `Task` (keeps `Task` minimal — `id + address_space_handle`). +- [x] **Per-task window storage** ([`kernel/src/sched/mod.rs`](../../../../kernel/src/sched/mod.rs)): a parallel `task_user_windows: [Option; TASK_ARENA_CAPACITY]` (mirroring `task_address_space_handles`), built by `add_user_task` from its **existing** `user_entry`/`user_sp` params — the `[entry_va, stack_top_va)` image+stack span, `UserAccessWindow::new(user_entry, user_sp - user_entry)` per [T-025](T-025-user-access-translation.md). **Today the `LoadedImage` span is persisted nowhere `syscall_entry` can reach** (`Task` holds only `id + address_space_handle`; the scheduler stores no window; `task_create_from_image` does not read `entry_va`/`stack_top_va`), so this storage is genuinely new — without it the per-task window cannot be reconstructed at syscall time. +- [x] **Scheduler accessors:** `current_user_table(&self) -> Option<*mut CapabilityTable>`, `current_address_space_handle(&self) -> Option`, and `current_user_window(&self) -> Option` — resolve `current` → slot index → the parallel arrays; `None` when there is no current task or the slot is unregistered (the caller maps `None` to the fail-closed default per the Fail-closed AC, so the accessors stay symmetric — no in-accessor `empty()` substitution). +- [x] **`syscall_entry` rewire** ([`bsp-qemu-virt/src/syscall.rs`](../../../../bsp-qemu-virt/src/syscall.rs)): source `caller_table` from `SCHED.current_user_table()` (not `SYSCALL_STUB_TABLE`), the AS from `current_address_space_handle()` → `AS_ARENA`, and the per-task `UserAccessWindow` from `SCHED.current_user_window()`; pass `mmu` + `task_as` into the `` `SyscallContext` ([T-025](T-025-user-access-translation.md)). +- [x] **Fail-closed** (security-critical, never weaken): if `current` is `None`, the slot has no bound table, or the AS handle is stale/absent, `syscall_entry` **must not** fall back to any ambient table. It dispatches with the empty `FAILCLOSED_TABLE` (every lookup → `CapError::InvalidHandle`) + `UserAccessWindow::empty()` (every non-zero copy → `FaultAddress`) + the bootstrap AS as a never-dereferenced placeholder. No path resolves a capability in a table other than the verified current task's own ([ADR-0014](../../../decisions/0014-capability-representation.md) preserved). **Control-plane syscalls require a stronger rule:** `task_yield`/`task_exit` consult **no** capability, so the empty-table/empty-window default does **not** protect them. **Implemented in the dispatcher** (not a BSP short-circuit — the BSP cannot construct the `#[non_exhaustive]` `SyscallError`, and the dispatcher version is host-testable): `SyscallContext` carries a `has_current_task: bool` (set by `syscall_entry` from `current_user_table().is_some()`); `dispatch` returns `InvalidHandle` for `task_yield`/`task_exit` when it is `false` (the trust-boundary check [ADR-0031](../../../decisions/0031-initial-syscall-set.md) demands: "is there a valid current task?"). Data-plane syscalls (`send`/`recv`/`console_write`) take the empty-table/empty-window path. +- [x] **Dispatcher:** [`dispatch`](../../../../kernel/src/syscall/dispatch.rs) resolves capabilities only against `ctx.caller_table` / `ctx.user_window` / `ctx.mmu` (the BSP's *source* of those moves), **plus** the new `has_current_task` gate on the control-plane syscalls (above). Panic-free; the data-plane behaviour is host-test-intact. +- [x] **Host tests:** `add_user_task` records the cap-table pointer; `current_user_table()` / `current_address_space_handle()` return the running task's binding and `None` for no-current / unregistered slot; a fake-current-task dispatch resolves a cap **only** in that task's table (a cap absent from it → `InvalidHandle`); the no-current-task path yields the fail-closed empty-table/empty-window outcome (`FaultAddress`/`InvalidHandle`), never an ambient grant. **Control-plane fail-closed:** a no-current-task `task_yield` SVC → `InvalidHandle` (not `Reschedule`); a no-current-task `task_exit` SVC → `InvalidHandle` (not `Terminate`). +- [x] **UNSAFE-2026-0014 Amendment (mandatory, not conditional):** the `syscall_entry` rewire dereferences `task_cap_tables[idx]` (a `*mut CapabilityTable`) to a momentary `&mut CapabilityTable` — a **new dereference site that broadens the raw-pointer surface**, so a dated Amendment to [UNSAFE-2026-0014](../../../audits/unsafe-log.md) is required (per [unsafe-policy](../../../standards/unsafe-policy.md): introducing/broadening an `unsafe` region demands an audit-log entry/update). It records: (1) the `&mut` scope is lexically contained to the one `dispatch` call and never crosses `cpu.context_switch`; (2) pointer validity is established by `add_user_task` and re-checked at `syscall_entry` (fail-closed on unregistered/stale slot); (3) the site follows the existing UNSAFE-2026-0014 discipline (irq-entry / start-prelude / AS-activation); (4) **second-reviewer security review required** (capability-table sourcing is security-sensitive, [unsafe-policy §Review](../../../standards/unsafe-policy.md)). +- [x] **All gates green:** host tests (kernel 257), host + kernel clippy `-D warnings`, `cargo fmt --check`, kernel build, Miri. **QEMU smoke** (maintainer chose the fail-closed demo): the `+0x200` smoke is **re-sequenced after `SCHED` init** so `SCHED.current` is `None`, demonstrating gate-#3 fail-closed — `console_write` → `InvalidHandle` (status `0x102`), `bad-number` → `BadSyscallNumber`; 2 SVC exceptions, clean `ERET`, **zero new fault class**. `SYSCALL_STUB_TABLE` retired → the empty `FAILCLOSED_TABLE` fallback (no longer minted into). ## Out of scope @@ -49,3 +49,4 @@ All acceptance criteria checked; gates green (incl. Miri); the smoke change docu ## Review history - **2026-05-31 — opened Draft** in the [ADR-0038](../../../decisions/0038-mmu-translate-and-user-access.md) Propose commit (the ADR's dependency chain names it; [ADR-0025 §Rule 1](../../../decisions/0025-adr-governance-amendments.md)). No ADR of its own — pure plumbing per [ADR-0030 §Dependency-chain step 7](../../../decisions/0030-syscall-abi.md). Implementation follows T-025's merge. +- **2026-05-31 — implemented (→ In Review)** on `t-026-current-task-cap-table` (off the merged T-025 [PR #39](https://github.com/HodeTech/Tyrne/pull/39) `f21eece`). Three steps: **(A)** scheduler `task_cap_tables` (raw `*mut`) + `task_user_windows` parallel arrays + `add_user_task` `cap_table` param + `current_user_table()`/`current_address_space_handle()`/`current_user_window()` accessors (commit `54a1795`); the raw ptr makes `Scheduler` `!Send`/`!Sync`, absorbed by the BSP's unconditionally-`Sync` `StaticCell`. **(B)** [`AddressSpace::inner()` → `pub`](../../../../kernel/src/mm/address_space.rs) (read-only; commit `15a9d80`) so the BSP can pass the task's `&QemuVirtAddressSpace` to `Mmu::translate`; `syscall_entry` rewired to source table/AS/window from `SCHED.current` with the empty `FAILCLOSED_TABLE` fallback; **the smoke moved after `SCHED` init** (so `SCHED.current` reads `None`). **(C)** the H2 control-plane fail-closed landed **in the dispatcher** via `SyscallContext.has_current_task` (host-testable; the BSP can't construct the `#[non_exhaustive]` `SyscallError`) — `task_yield`/`task_exit` → `InvalidHandle` when no task is current. **Smoke decision (maintainer):** fail-closed demo — the stub `console_write` now fail-closes to `InvalidHandle` (status `0x102`) with no current task (the B5 cap-mint + greeting are retired); `SYSCALL_STUB_TABLE` → empty `FAILCLOSED_TABLE`. **Mandatory UNSAFE-2026-0014 Amendment** (the `syscall_entry` `&mut *table_ptr` deref) + a UNSAFE-2026-0029 Amendment (the syscall-arc statics change + smoke re-sequence). Gates: fmt, host+kernel clippy `-D warnings`, **host tests kernel 257** (+`current_accessors_resolve_running_task_bindings_or_none` + `task_{yield,exit}_with_no_current_task_fails_closed`), kernel build, **QEMU smoke** PASS (2 SVC, clean ERET, zero new fault), **Miri 0 UB**. **Security-relevant — flagged for the explicit EL0-boundary security review** (the carry-forward DoD item with gate #1's UNSAFE-2026-0030/0032). diff --git a/docs/audits/unsafe-log.md b/docs/audits/unsafe-log.md index ea8c062..9b7b197 100644 --- a/docs/audits/unsafe-log.md +++ b/docs/audits/unsafe-log.md @@ -248,6 +248,8 @@ Both forms are time-stamped so a reader can reconstruct the entry's state at any - **Additional locations (T-018, commit `1b0f1d9` BSP wiring + commit `0d16ea4` scheduler hook):** [`kernel/src/sched/mod.rs::yield_now`](../../kernel/src/sched/mod.rs) (activation-hook invocation just before `cpu.context_switch`), [`kernel/src/sched/mod.rs::ipc_recv_and_yield`](../../kernel/src/sched/mod.rs) (Phase-2 dispatch path's activation hook), [`kernel/src/sched/mod.rs::ipc_send_and_yield`](../../kernel/src/sched/mod.rs) (threads the `activate_address_space` closure into its delegated `yield_now` call on the unblock-receiver-then-yield path; the activation itself fires inside `yield_now` but the closure-as-parameter site is here for traceability), [`kernel/src/sched/mod.rs::start`](../../kernel/src/sched/mod.rs) (first-task activation via momentary `unsafe { (*sched).task_address_space_handles[next_idx] }`), [`kernel/src/sched/mod.rs::address_space_activation_target`](../../kernel/src/sched/mod.rs) (the pure helper that computes the `Option` switch decision — no `unsafe`, but cited here for completeness), and [`bsp-qemu-virt/src/main.rs::activate_address_space`](../../bsp-qemu-virt/src/main.rs) (the closure that dereferences the `AS_ARENA` + `MMU` `StaticCell`s and calls [`tyrne_kernel::mm::activate_address_space_handle`](../../kernel/src/mm/address_space.rs)). - **Additional invariant:** the activation closure fires *inside* the scheduler's `IrqGuard` scope but *after* the `&mut Scheduler` borrow drops; the closure's own `&AS_ARENA` + `&MMU` borrows do not alias any live scheduler borrow. The closure is `FnOnce` — called at most once per scheduler invocation; on the no-AS-switch path it is dropped unused (the `Option` computed by `address_space_activation_target` is `None`). + **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3, mandatory per the task's audit AC): scope extended to the `syscall_entry` cap-table-pointer dereference.** T-026 records each EL0 task's capability-table pointer in the scheduler (`task_cap_tables: [Option<*mut CapabilityTable>; N]`, written by [`add_user_task`](../../kernel/src/sched/mod.rs)); the BSP [`syscall_entry`](../../bsp-qemu-virt/src/syscall.rs) reads it via `Scheduler::current_user_table()` and materialises a momentary `&mut *table_ptr` — the **running task's own** table — for the single `dispatch` call. This is a new dereference site under **this entry's discipline**: (1) the `&mut` is lexically contained to the one `dispatch` call and never crosses `cpu.context_switch` (the data-plane syscalls do not switch; control-plane returns a directive); (2) the pointer's validity is established by `add_user_task`'s `# Safety` contract (a `CapabilityTable` outliving the task, no `&mut` aliased across a switch — the [ADR-0021](../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) bridge) and the binding is *read* from the scheduler, not minted here; (3) **fail-closed** when no task is current — `current_user_table()` returns `None` and `syscall_entry` substitutes the empty `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`), so no over-grant. The new `task_cap_tables` array makes `Scheduler` `!Send`/`!Sync`; it lives only inside the BSP's unconditionally-`Sync` `StaticCell` and is reached via the ADR-0021 `*mut Scheduler` bridge, so no `Send`/`Sync` bound is broken. **Second-reviewer required** (capability-table sourcing is security-sensitive, [unsafe-policy §Review.4](../standards/unsafe-policy.md)). Host-tested: `sched::tests::current_accessors_resolve_running_task_bindings_or_none` + `syscall::dispatch::tests::task_{yield,exit}_with_no_current_task_fails_closed`; QEMU smoke shows the no-current-task `console_write` fail-closing to `InvalidHandle` (status `0x102`). + ### UNSAFE-2026-0015 — generic-timer system-register reads (`CNTPCT_EL0`, `CNTFRQ_EL0`) - **Introduced:** 2026-04-23, T-009 — Timer trait implementation for QEMU virt. @@ -661,6 +663,8 @@ Neither change touches the `copy_nonoverlapping` site itself; both correct contr - **Reviewed by:** @cemililik (+ Claude Opus 4.8 agent). Security-sensitive (the EL0→EL1 trust boundary — the single widest untrusted-input surface in the system) → second-reviewer required per [unsafe-policy §Review.4](../standards/unsafe-policy.md). - **Status:** Active. Smoke-verified at runtime: the 2026-05-29 QEMU trace (debug build) shows the EL1 kernel-stub's two `SVC`s taken at the current-EL `+0x200` sync vector — `Taking exception 2 [SVC] ... from EL1 to EL1 ... with ESR 0x15/0x56000000` (EC = SVC64, exactly the value the trampoline routes on) — each `ERET`ing cleanly back to EL1, with the `console_write` syscall emitting `tyrne: hello from the syscall boundary (console_write via SVC)` and the round-trip confirmation `console_write status=0x0, bytes=63; bad-number status=0x1`; `-d int,unimp,guest_errors` shows only the pre-existing PL011-disabled-UART warnings (no new fault class). **The lower-EL `+0x400` slot is installed but not yet exercised** — a real EL0 task taking it (with the EL0↔EL1 privilege transition) is B6's runtime verification per [ADR-0030 §Simulation row-to-verification mapping](../decisions/0030-syscall-abi.md#simulation); this status note lifts to cover the `+0x400` path via append-only Amendment when B6's first EL0 task runs. + **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3): `syscall_entry` now sources the caller context from the scheduler's running task; the smoke is re-sequenced after `SCHED` init.** The statics the "Statics initialised before first `SVC`" invariant named have changed: `syscall_entry` no longer reads a dedicated `SYSCALL_STUB_TABLE`. It reads **`SCHED`** (added) for the running task's `current_user_table()` / `current_address_space_handle()` / `current_user_window()`, resolves the task's `&QemuVirtAddressSpace` from `AS_ARENA`, and falls back to the empty `FAILCLOSED_TABLE` + `BOOTSTRAP_AS` + an empty window when no task is current. `syscall_boundary_smoke` is correspondingly moved to **after** `SCHED` is published (still before `start()`), so `SCHED.current` is a valid (empty) read — `None` — which is exactly the fail-closed case the smoke now demonstrates (`console_write` → `InvalidHandle`; the old "emit a greeting" path is superseded). The new cap-table-pointer dereference is covered by the [UNSAFE-2026-0014](#unsafe-2026-0014--scheduler-free-function-momentary-mut-pattern) 2026-05-31 Amendment. Smoke-verified 2026-05-31: 2 `SVC` exceptions at `+0x200`, clean `ERET`, no new fault class; `console_write status=0x102 ; bad-number status=0x1`. + ### UNSAFE-2026-0030 — validated copy-from/to-user byte move via `core::ptr::copy_nonoverlapping` - **Introduced:** 2026-05-29, [T-021 — EL0→EL1 SVC dispatch](../analysis/tasks/phase-b/T-021-syscall-dispatch.md). New entry rather than an Amendment of [UNSAFE-2026-0027](#unsafe-2026-0027--task-loader-frame-byte-copy-via-coreptrcopy_nonoverlapping-in-task_loaderload_image): 0027's scope is a *kernel-orchestrated* copy from a kernel-owned `.rodata` slice into a freshly-allocated PMM frame the loader fully controls; 0030's scope is a copy across the *userspace trust boundary* — the source/destination pointer is a **userspace-supplied integer** the kernel does not own a reference into, gated by a runtime range check against the active address space. The differing ownership-proof chain (caller-owned `PhysFrame` vs. validator-bounded untrusted VA) is the load-bearing axis the audit log discriminates on, so a fresh entry is the honest record. diff --git a/docs/roadmap/current.md b/docs/roadmap/current.md index 08d7122..ee9d3d5 100644 --- a/docs/roadmap/current.md +++ b/docs/roadmap/current.md @@ -4,6 +4,8 @@ A short pointer file updated as work progresses. For the full plan see [`phases/ --- +> **2026-05-31 update — B6 gate #3 closed: T-026 (current-task capability table + per-task window in `syscall_entry`) implemented, In Review. All three T-021 carry-forward gates now closed.** **[T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md)** (on `t-026-current-task-cap-table` off the merged T-025 `f21eece`; **no new ADR** — rides ADR-0030/0021/0014) makes `syscall_entry` source the **running EL0 task's** capability table + address space + user-access window from `SCHED.current` (scheduler `task_cap_tables` / `task_user_windows` parallel arrays + `current_*` accessors, written by `add_user_task`), **failing closed** when no task is current — the empty `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`) + an empty window. **Control-plane** (`task_yield`/`task_exit`, which consult no capability) is gated in the dispatcher on a new `SyscallContext.has_current_task` (→ `InvalidHandle` when no current task — the H2 review fix; host-testable, since the BSP can't construct the `#[non_exhaustive]` `SyscallError`). `AddressSpace::inner()` widened to `pub` (read-only — no cap-gate bypass) so the BSP can pass the task's `&QemuVirtAddressSpace` to gate-#1 `Mmu::translate`. The `+0x200` smoke is **re-sequenced after `SCHED` init** (so `SCHED.current` is `None`) and now demonstrates gate-#3 **fail-closed** (`console_write` → `InvalidHandle` `0x102`; `bad-number` → `BadSyscallNumber`); `SYSCALL_STUB_TABLE` retired. Two relayed pre-impl reviews hardened the design (H1 window persistence, H2 control-plane, M4 mandatory audit). Mandatory **UNSAFE-2026-0014 Amendment** (the cap-table-pointer deref) + a UNSAFE-2026-0029 Amendment (syscall-arc statics + smoke re-sequence). Gates: **host tests 257 kernel / 46 hal / 58 test-hal / 3 doc**, fmt, host + kernel clippy `-D warnings`, kernel build, QEMU smoke (2 SVC, clean `ERET`, **zero new fault class**), **Miri 0 UB**. **All three [T-021 carry-forward gates](phases/phase-b.md#t-021-carry-forward-gates-must-close-before-a-real-el0-task-runs) are closed** (#1 mechanism T-025, #2 T-023, #3 T-026). **Next:** the `tyrne-user` + `userland/hello` crate + `cargo → objcopy → include_bytes!` build pipeline, then the EL0 `+0x400` wire-up smoke, then B6 closure. **Carry-forward DoD:** the explicit EL0-boundary security review (UNSAFE-2026-0030/0032 + cap-table sourcing) before a real EL0 task runs. This banner supersedes the gate-#1 banner below. +> > **2026-05-31 update — B6 gate #1 mechanism landed: ADR-0038 Accepted + T-025 (`Mmu::translate` + per-page user-access translation) implemented, In Review.** [ADR-0038](../decisions/0038-mmu-translate-and-user-access.md) **Accepted** (the read-only `Mmu::translate` walk query — the realised [ADR-0009](../decisions/0009-mmu-trait.md) §Open-questions "translation walk query" — + the per-task user-access policy; arc on `t-025-user-access-translation`: propose → review-round (8 valid / 3 skipped across two relayed reviews) → accept). **[T-025](../analysis/tasks/phase-b/T-025-user-access-translation.md)** implements gate #1's **mechanism** (the security-critical [T-021 carry-forward gate #1](phases/phase-b.md#t-021-carry-forward-gates-must-close-before-a-real-el0-task-runs)): `copy_from_user` / `copy_to_user` become **two-pass** (probe-all-then-copy), generic over ``, resolving **every** user page through the task's own address space and **requiring `USER`** (`FaultAddress` on any miss / non-`USER` / block-mapped page — the confused-deputy defence, never a panic), behind a per-task `[entry_va, stack_top_va)` window first-gate; `SyscallContext` gains `mmu` / `task_as`. `vmsav8` gains the inverse decoder `descriptor_bits_to_flags` (lock-shut). The B5 `+0x200` stub `console_write` of a **kernel** VA is now correctly **rejected** (smoke: `status=0x3 bytes=0`, no greeting emitted) — a positive gate-#1 demonstration and the only smoke-trace change. Gates: **host tests 252 kernel / 46 hal / 58 test-hal / 3 doc**, `cargo fmt`, host + kernel clippy `-D warnings`, kernel build, QEMU smoke (exactly 2 SVC exceptions, clean `ERET`, **zero new fault class**), **Miri 0 UB**. UNSAFE-2026-0030 + 0025 Amendments (per-page translation + read-only `translate` caller — no new entries). **Next:** gate #3 ([T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md)) sources the running EL0 task's AS + capability table from the scheduler, then the `tyrne-user` + `userland/hello` build pipeline + the EL0 `+0x400` wire-up smoke. This banner supersedes the B6-opening banner below. > > **2026-05-31 update — B6 opening: T-022 merged + security-reviewed (Approve); ADR-0037 + T-023 (EL0 entry context) landed; B6's foundational EL0-entry mechanism in place (dormant).** [T-022 / ADR-0033](../decisions/0033-kernel-high-half-migration.md) merged via [PR #36](https://github.com/HodeTech/Tyrne/pull/36) (`b6549d7`); its [high-half migration security review](../analysis/reviews/security-reviews/2026-05-31-T-022-high-half-migration.md) is **Approve** (eight axes, no live finding — a structural kernel/user isolation *strengthening*, kernel absent from `TTBR0_EL1`), discharging the awaiting-review flag. **B6 — the Phase-B-closing milestone — is now open** on branch `b6-el0-entry-context` (off `main`). Landed this session: [ADR-0037](../decisions/0037-el0-entry-context.md) **Accepted** (the EL0 entry-context decision — reuse `Aarch64TaskContext` + a one-shot `enter_el0` `ERET` trampoline; per-task `SP_EL1` = the kernel stack by construction; **D2**: neither extend the context struct nor add EL0 fields to the kernel-object `Task`) + [T-023](../analysis/tasks/phase-b/T-023-el0-entry-context.md) **implemented** (`ContextSwitch::init_user_context` + the BSP `enter_el0` trampoline + `Scheduler::add_user_task`) — **dormant**: no runnable EL0 task yet, `syscall_entry` not EL0-reachable. A **2026-05-31 review-round** caught a **HIGH register-leak** — the trampoline now **scrubs the EL0-visible register file before `ERET`** (`x0`–`x30` + `v0`–`v31`; a 2026-05-31 review-round-2 Low extended it to `FPCR`/`FPSR`/`TPIDR_EL0`/`TPIDRRO_EL0`, UNKNOWN-reset on real HW) so no kernel state reaches EL0 — plus the Medium/nit fixes (`SPSR_EL1` register-form, gate-#2 `debug_assert` + a `user_sp`-alignment one, the EL0-trap stack-size contract, `offset_of!` layout asserts, the AS-must-be-active `# Safety`); new [UNSAFE-2026-0032](../audits/unsafe-log.md) (second-reviewer-flagged). Gates: **342 host tests**, `cargo fmt`, host + kernel clippy `-D warnings`, kernel build (the scrub asm + offset asserts assemble), QEMU smoke **byte-stable + fault-clean** (mechanism dormant), Miri **0 UB**. **Remaining B6 threads** (per [phase-b §B6](phases/phase-b.md#milestone-b6--first-userspace-hello)): `task_create_from_image` → the security-critical **gate #1** (per-task user-VA→kernel-VA translation) + gate #3 → scheduler wiring → `tyrne-user` + `userland/hello` + build pipeline → wire-up + EL0 `+0x400` smoke → closure (= Phase B retrospective). This banner supersedes the 2026-05-29 B5-closure banner below. diff --git a/docs/roadmap/phases/phase-b.md b/docs/roadmap/phases/phase-b.md index b722cf2..c35d8ec 100644 --- a/docs/roadmap/phases/phase-b.md +++ b/docs/roadmap/phases/phase-b.md @@ -282,7 +282,7 @@ The [T-021](../../analysis/tasks/phase-b/T-021-syscall-dispatch.md) review-round **✅ Mechanism LANDED (2026-05-31, [T-025](../../analysis/tasks/phase-b/T-025-user-access-translation.md) / [ADR-0038](../../decisions/0038-mmu-translate-and-user-access.md)).** `Mmu::translate` (read-only walk) + the **two-pass** per-page translate-based `copy_from_user`/`copy_to_user` (every spanned page resolved through the task's own AS, **`USER` required**, `FaultAddress` on any miss/non-`USER`/block — never panic) + the per-task `[entry_va, stack_top_va)` window first-gate now exist and are exhaustively host-tested (confused-deputy reject, unmapped, read-only, multi-page all-or-nothing). The hard-ordering precondition is satisfied — the translate-based boundary is in **before** any real EL0 task is enabled. In B5 `syscall_entry` passes the bootstrap AS, so the stub's kernel-VA `console_write` is now correctly **rejected** (smoke: `status=0x3`, the gate-#1 demonstration). **Remaining for the wire-up:** sourcing the *running EL0 task's* AS + per-task window from the scheduler so a real task's USER buffer translates — that is **gate #3** ([T-026](../../analysis/tasks/phase-b/T-026-current-task-cap-table.md)). 2. 🚩 **`SP_EL1` initialisation for the `+0x400` entry.** The sync trampoline's first `sub sp, sp, #272` runs on `SP_EL1`, which the CPU does **not** auto-initialise on an EL0→EL1 trap. B6's per-task EL0 context-init must set `SP_EL1` to a valid kernel stack before any EL0 task is schedulable (and should assert it). Subsumed by the "EL0-ready context register file" work (ADR-0033 placeholder) but named here explicitly. -3. 🚩 **`SYSCALL_STUB_TABLE` → scheduler current-task table.** `syscall_entry` resolves capabilities in the dedicated kernel-stub table in B5; B6 must swap it for the *running EL0 task's* capability table (looked up from the scheduler's current task). Fail-closed if forgotten (handles resolve to `InvalidHandle`, never over-grant), but functionally required for a real task to name its own caps. +3. ✅ **`SYSCALL_STUB_TABLE` → scheduler current-task table — CLOSED (2026-05-31, [T-026](../../analysis/tasks/phase-b/T-026-current-task-cap-table.md)).** `syscall_entry` now sources the *running EL0 task's* capability table, address space, and user-access window from `SCHED.current` (scheduler `task_cap_tables` / `task_user_windows` parallel arrays + `current_*` accessors, written by `add_user_task`), and **fails closed** when no task is current — the empty `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`) + an empty window. The **control-plane** syscalls (`task_yield`/`task_exit`, which consult no capability) are gated in the dispatcher on a `has_current_task` flag → `InvalidHandle` when no task is current (H2). `SYSCALL_STUB_TABLE` retired; mandatory UNSAFE-2026-0014 Amendment (the cap-table-pointer deref). Host-tested + smoke shows the no-current-task fail-closed (`console_write` → `0x102`). The runtime per-task exercise (a real EL0 task naming its own caps) is the B6 wire-up. Two further hazards are later-phase (already tracked, not B6): `ipc_send`'s `unreachable!()` becomes a release panic-from-userspace only under **preemption/SMP** (harden to `Err(QueueFull)` when preemption lands — ADR-0032 / note C3-009); and **fault containment** for an EL0 non-`SVC` sync fault (illegal instruction, unmapped deref) is Phase E / flag K3-4 (the dispatcher itself is already panic-free). diff --git a/kernel/src/sched/mod.rs b/kernel/src/sched/mod.rs index 6a31785..1198a05 100644 --- a/kernel/src/sched/mod.rs +++ b/kernel/src/sched/mod.rs @@ -1788,6 +1788,58 @@ mod tests { assert_eq!(sched.contexts[0].stack_top, ktop as usize); } + #[test] + fn current_accessors_resolve_running_task_bindings_or_none() { + // Gate #3 (T-026): `current_user_table` / `current_address_space_handle` + // / `current_user_window` resolve `self.current` → slot → the parallel + // arrays. `None` when no task is current (the fail-closed signal); the + // recorded binding once a task is the running `current`. + let cpu = FakeCpu::new(); + let mut sched: Scheduler = Scheduler::new(); + // Fresh scheduler: no current task → all None. + assert!(sched.current_user_table().is_none()); + assert!(sched.current_address_space_handle().is_none()); + assert!(sched.current_user_window().is_none()); + + let h = task_handle(0); + let mut kstack = AlignedStack::<512>::new(); + let mut table = CapabilityTable::new(); + let table_ptr: *mut CapabilityTable = core::ptr::addr_of_mut!(table); + let user_entry = 0x0080_0000usize; + let user_sp = 0x0080_2000usize; + // SAFETY: opaque VAs; `FakeCpu::init_user_context` only records; `table` + // is a stack-local the scheduler only stores a pointer to (never derefs + // here — no syscall is dispatched). + unsafe { + sched + .add_user_task( + &cpu, + h, + BOOTSTRAP_ADDRESS_SPACE_HANDLE, + user_entry, + user_sp, + kstack.top(), + table_ptr, + ) + .unwrap(); + } + // Registered (Ready) but not yet the running task → still None. + assert!(sched.current_user_table().is_none()); + + // Make it the running task (`current` is private but reachable from this + // child test module; the scheduler normally sets it in start/yield_now). + sched.current = Some(h); + assert_eq!(sched.current_user_table(), Some(table_ptr)); + assert_eq!( + sched.current_address_space_handle(), + Some(BOOTSTRAP_ADDRESS_SPACE_HANDLE) + ); + assert_eq!( + sched.current_user_window(), + Some(UserAccessWindow::new(user_entry, user_sp - user_entry)) + ); + } + #[test] fn yield_now_switches_context_and_updates_current() { let cpu = FakeCpu::new(); diff --git a/kernel/src/syscall/dispatch.rs b/kernel/src/syscall/dispatch.rs index c2abbf1..8f29315 100644 --- a/kernel/src/syscall/dispatch.rs +++ b/kernel/src/syscall/dispatch.rs @@ -94,6 +94,17 @@ pub struct SyscallContext<'a, M: Mmu> { /// are resolved through. Sourced from the scheduler's current task in B6 /// (gate #3 / T-026); in B5 it is the EL1 stub's bootstrap AS. pub task_as: &'a M::AddressSpace, + /// Whether a running EL0 task is current (gate #3 / T-026). The BSP sets it + /// from the scheduler (`current_user_table().is_some()`). The **control-plane** + /// syscalls (`task_yield` / `task_exit`) act on the trusted current-task + /// identity ([ADR-0031][adr-0031]) and consult **no** capability, so the + /// empty fail-closed `caller_table` cannot guard them — the dispatcher + /// instead rejects them with `InvalidHandle` when this is `false` (H2). A + /// data-plane syscall with no current task fails closed via the empty table + /// regardless of this flag. + /// + /// [adr-0031]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0031-initial-syscall-set.md + pub has_current_task: bool, } /// Decode and execute one syscall, returning the trampoline's next action. @@ -113,9 +124,30 @@ pub fn dispatch(ctx: &mut SyscallContext<'_, M>, args: SyscallArgs) -> S match number { SyscallNumber::Send => SyscallEffect::Resume(sys_send(ctx, args.args)), SyscallNumber::Recv => SyscallEffect::Resume(sys_recv(ctx, args.args)), - // Control-plane: act on the caller's own task; see SyscallEffect. - SyscallNumber::TaskYield => SyscallEffect::Reschedule, - SyscallNumber::TaskExit => SyscallEffect::Terminate(args.args[0]), + // Control-plane: act on the caller's own task. These consult no + // capability, so the empty fail-closed `caller_table` does not guard + // them — gate #3 (T-026, H2) rejects them here when no EL0 task is + // current (nothing to yield / exit). With a current task they return + // the directive the BSP applies (real `yield_now` / termination is the + // B6 wire-up). + SyscallNumber::TaskYield => { + if ctx.has_current_task { + SyscallEffect::Reschedule + } else { + SyscallEffect::Resume(SyscallReturn::error(SyscallError::from( + CapError::InvalidHandle, + ))) + } + } + SyscallNumber::TaskExit => { + if ctx.has_current_task { + SyscallEffect::Terminate(args.args[0]) + } else { + SyscallEffect::Resume(SyscallReturn::error(SyscallError::from( + CapError::InvalidHandle, + ))) + } + } SyscallNumber::ConsoleWrite => SyscallEffect::Resume(sys_console_write(ctx, args.args)), } } @@ -373,6 +405,7 @@ mod tests { user_window: window, mmu, task_as, + has_current_task: true, }; dispatch( &mut ctx, @@ -397,6 +430,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let effect = dispatch( &mut ctx, @@ -429,6 +463,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let effect = dispatch( &mut ctx, @@ -460,6 +495,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; assert_eq!( dispatch(&mut ctx, call(SyscallNumber::TaskYield, [0; 6])), @@ -482,6 +518,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; assert_eq!( dispatch( @@ -492,6 +529,68 @@ mod tests { ); } + // ── control-plane fail-closed (gate #3 / T-026, H2) ────────────────────── + + #[test] + fn task_yield_with_no_current_task_fails_closed() { + // Control-plane consults no capability, so the empty fail-closed table + // cannot guard it; the dispatcher rejects task_yield with InvalidHandle + // when no EL0 task is current (nothing to yield) — not Reschedule. + let mut ep_arena = EndpointArena::default(); + let mut queues = IpcQueues::new(); + let mut table = CapabilityTable::new(); + let console = FakeConsole::new(); + let (mmu, task_as) = empty_mmu_as(); + let mut ctx = SyscallContext { + ep_arena: &mut ep_arena, + queues: &mut queues, + caller_table: &mut table, + console: &console, + user_window: UserAccessWindow::empty(), + mmu: &mmu, + task_as: &task_as, + has_current_task: false, + }; + let effect = dispatch(&mut ctx, call(SyscallNumber::TaskYield, [0; 6])); + match effect { + SyscallEffect::Resume(r) => assert_eq!( + r.status, + SyscallError::Cap(crate::cap::CapError::InvalidHandle).as_status() + ), + other => panic!("expected Resume(InvalidHandle), not Reschedule, got {other:?}"), + } + } + + #[test] + fn task_exit_with_no_current_task_fails_closed() { + let mut ep_arena = EndpointArena::default(); + let mut queues = IpcQueues::new(); + let mut table = CapabilityTable::new(); + let console = FakeConsole::new(); + let (mmu, task_as) = empty_mmu_as(); + let mut ctx = SyscallContext { + ep_arena: &mut ep_arena, + queues: &mut queues, + caller_table: &mut table, + console: &console, + user_window: UserAccessWindow::empty(), + mmu: &mmu, + task_as: &task_as, + has_current_task: false, + }; + let effect = dispatch( + &mut ctx, + call(SyscallNumber::TaskExit, [0x2A, 0, 0, 0, 0, 0]), + ); + match effect { + SyscallEffect::Resume(r) => assert_eq!( + r.status, + SyscallError::Cap(crate::cap::CapError::InvalidHandle).as_status() + ), + other => panic!("expected Resume(InvalidHandle), not Terminate, got {other:?}"), + } + } + // ── send / recv ────────────────────────────────────────────────────────── #[test] @@ -516,6 +615,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let cap_word = encode_cap_handle(Some(ep_cap)); let effect = dispatch( @@ -554,6 +654,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let cap_word = encode_cap_handle(Some(ep_cap)); let effect = dispatch( @@ -594,6 +695,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let cap_word = encode_cap_handle(Some(ep_cap)); // Enqueue a message via the send syscall. @@ -664,6 +766,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let ep_word = encode_cap_handle(Some(ep_cap)); @@ -727,6 +830,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; // x5 = a handle naming no live slot (index far past CAP_TABLE_CAPACITY). let stale_xfer = encode_cap_handle(Some(CapHandle::from_raw(50, 7))); @@ -767,6 +871,7 @@ mod tests { user_window: UserAccessWindow::empty(), mmu: &mmu, task_as: &task_as, + has_current_task: true, }; let effect = dispatch( &mut ctx, From f6bc9db42427aef4bacababf7f6bd601d13e32bb Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 20:54:00 +0300 Subject: [PATCH 5/6] =?UTF-8?q?fix(syscall):=20T-026=20PR=20#40=20review-r?= =?UTF-8?q?ound=20=E2=80=94=20all-or-nothing=20binding=20+=20FAILCLOSED=20?= =?UTF-8?q?in=20core=20setup=20+=20smoke=20asserts=20+=20null=20guard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the PR #40 review (4 valid fixes; 2 findings skipped with reason): - syscall_entry: fold has_current_task into the same all-or-nothing match as the data-plane triple (table + user window + generation-checked AS). An incomplete running-task binding now also sets has_current_task=false, so the control-plane syscalls (task_yield/task_exit, which consult no capability) are rejected too — not just the data-plane. The whole caller context is all-or-nothing fail-closed. - main.rs: relocate FAILCLOSED_TABLE init from syscall_boundary_smoke into core kernel_main_high setup (alongside EP_ARENA/IPC_QUEUES), with an explicit ordering invariant, so the security fallback is live independent of whether the diagnostic smoke runs (removes a 'smoke removed -> uninit .bss UB' footgun). - main.rs: syscall_boundary_smoke now ASSERTS both SVC statuses != OK_STATUS before printing (a fail-closed regression would over-grant and report OK, panicking the boot before 'all tasks complete') instead of printing 'ok' unconditionally. - sched::add_user_task: debug_assert!(!cap_table.is_null()) + defensive (!cap_table.is_null()).then_some(cap_table) store, so a # Safety-contract violation degrades to None -> fail-closed instead of a Some(null) deref. - dispatch.rs: refresh the SyscallContext.has_current_task doc to the all-or-nothing semantics. Skipped: (a) 'BlockMapped test missing paren / won't compile' — the test compiles and passed; (b) 'base has_current_task on an explicit is-EL0 property' — in v1 only EL0 tasks (which always have a cap table) issue syscalls, so the equivalence holds (forward-flagged). Adversarially verified before commit: a 5-lens review workflow (all-or-nothing correctness, init-ordering, null-store, smoke-assert, audit-accuracy) found no real issues — the all-or-nothing lens cleared all 8 binding-state combinations. UNSAFE-2026-0014 + 0029 review-round amendments. Gates: fmt; host+kernel clippy -D warnings; host tests kernel 257 / hal 46 / test-hal 58 / 3 doc; kernel build; QEMU smoke PASS (asserts hold: console_write 0x102, bad-number 0x1; 2 SVC, clean ERET, zero new fault); Miri 0 UB (kernel 257). Refs: T-026 Co-Authored-By: Claude Opus 4.8 (1M context) --- bsp-qemu-virt/src/main.rs | 35 ++++++++++++++----- bsp-qemu-virt/src/syscall.rs | 64 +++++++++++++++++++--------------- docs/audits/unsafe-log.md | 4 +-- kernel/src/sched/mod.rs | 13 ++++++- kernel/src/syscall/dispatch.rs | 5 ++- 5 files changed, 81 insertions(+), 40 deletions(-) diff --git a/bsp-qemu-virt/src/main.rs b/bsp-qemu-virt/src/main.rs index 0a7bbbe..9524ac2 100644 --- a/bsp-qemu-virt/src/main.rs +++ b/bsp-qemu-virt/src/main.rs @@ -733,14 +733,9 @@ fn task_a() -> ! { register-word casts are lossless" )] fn syscall_boundary_smoke(console: &Pl011Uart) { - // Initialise the empty fail-closed fallback table the dispatcher resolves - // against when no EL0 task is current (gate #3). Never minted into. - // - // SAFETY: `FAILCLOSED_TABLE` lives in `.bss`; this is its single write, - // before any `SVC` issues. Audit: UNSAFE-2026-0010 (StaticCell pattern). - unsafe { - (*FAILCLOSED_TABLE.0.get()).write(CapabilityTable::new()); - } + // `FAILCLOSED_TABLE` is initialised in core setup (see `kernel_main_high`), + // not here: the fail-closed fallback is a security mechanism that must be live + // independent of this diagnostic. This smoke only *exercises* it. // (1) console_write via SVC with no current task → fail-closed InvalidHandle. // The cap word (`0`) and the buffer are irrelevant: with `SCHED.current == @@ -788,6 +783,21 @@ fn syscall_boundary_smoke(console: &Pl011Uart) { ); } + // Assert the security property, don't just print it: with no current task + // both syscalls MUST be rejected (non-OK status). The exact codes + // (InvalidHandle / BadSyscallNumber) are pinned by the host dispatcher tests; + // here the load-bearing check is that neither returned `OK_STATUS` — a + // fail-closed regression would over-grant and report OK. A failed assertion + // panics, so boot never reaches "all tasks complete" and the smoke fails. + assert!( + status != tyrne_kernel::syscall::OK_STATUS, + "gate #3: console_write with no current task must fail closed, got OK" + ); + assert!( + bad_status != tyrne_kernel::syscall::OK_STATUS, + "gate #3: reserved-invalid syscall number must be rejected, got OK" + ); + let mut w = FmtWriter(console); let _ = writeln!( w, @@ -1517,6 +1527,15 @@ extern "C" fn kernel_main_high() -> ! { unsafe { (*EP_ARENA.0.get()).write(ep_arena); (*IPC_QUEUES.0.get()).write(IpcQueues::new()); + // The empty fail-closed fallback table `syscall_entry` resolves against + // when no EL0 task is current (gate #3 / T-026). Initialised here in core + // setup — alongside the other syscall statics — so the security fallback + // is always live and never coupled to whether the diagnostic smoke runs. + // INVARIANT: this write must precede `start()` (and the syscall smoke's + // first `SVC`), so every `syscall_entry` None-path reads an initialised + // table; it does, since both happen later in `kernel_main_high`. Never + // minted into. + (*FAILCLOSED_TABLE.0.get()).write(CapabilityTable::new()); (*TABLE_A.0.get()).write(table_a); (*TABLE_B.0.get()).write(table_b); (*EP_CAP_A.0.get()).write(ep_cap_a); diff --git a/bsp-qemu-virt/src/syscall.rs b/bsp-qemu-virt/src/syscall.rs index 1b22161..221ad2d 100644 --- a/bsp-qemu-virt/src/syscall.rs +++ b/bsp-qemu-virt/src/syscall.rs @@ -125,11 +125,12 @@ const _: () = assert!(core::mem::size_of::() == 272); /// `caller_table` is `&mut *current_user_table()` — a momentary `&mut` to the /// task's own capability table (a BSP static recorded by `add_user_task` via the /// [ADR-0021] raw-pointer bridge), lexically contained to this one `dispatch` -/// call and never crossing a switch; with no current task it is the empty -/// `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`) and `task_as` the -/// never-dereferenced bootstrap-AS placeholder behind an empty window, so a -/// syscall with no running task names no capability and copies no byte -/// (UNSAFE-2026-0014 Amendment). **Rejected alternatives.** Passing a `&mut +/// call and never crossing a switch; with no current task, missing task window, +/// or stale / absent task address-space handle, it is the empty `FAILCLOSED_TABLE` +/// (every lookup → `InvalidHandle`) and `task_as` the never-dereferenced +/// bootstrap-AS placeholder behind an empty window, so an incomplete running-task +/// context names no capability and copies no byte (UNSAFE-2026-0014 Amendment). +/// **Rejected alternatives.** Passing a `&mut /// SyscallTrapFrame` from the asm is impossible (asm has no Rust references); /// holding the BSP statics behind a lock would deadlock the interrupts-masked /// handler with no soundness gain under single-core cooperative semantics. @@ -154,44 +155,51 @@ pub unsafe extern "C" fn syscall_entry(frame: *mut SyscallTrapFrame) { // SAFETY: build the dispatch context from the **running EL0 task's** // bindings, sourced from the scheduler (gate #3 / T-026), or the FAIL-CLOSED - // default when no task is current. `SCHED` / `EP_ARENA` / `IPC_QUEUES` / - // `CONSOLE` / `MMU` / `AS_ARENA` / `FAILCLOSED_TABLE` / `BOOTSTRAP_AS` are all - // published before the (post-`SCHED`-init) smoke runs; single-core + - // interrupts-masked ⇒ no aliasing; the momentary `&mut`s drop at the end of - // `dispatch` and never cross a switch. The `&mut *table_ptr` is the gate-#3 - // cap-table dereference (M4 — UNSAFE-2026-0014 Amendment; see this fn's - // `# Safety`). Audit: UNSAFE-2026-0010 + UNSAFE-2026-0014 + UNSAFE-2026-0029. + // default when the running-task syscall context is incomplete. `SCHED` / + // `EP_ARENA` / `IPC_QUEUES` / `CONSOLE` / `MMU` / `AS_ARENA` / + // `FAILCLOSED_TABLE` / `BOOTSTRAP_AS` are all published before the + // (post-`SCHED`-init) smoke runs; single-core + interrupts-masked ⇒ no + // aliasing; the momentary `&mut`s drop at the end of `dispatch` and never + // cross a switch. The `&mut *table_ptr` is the gate-#3 cap-table dereference + // (M4 — UNSAFE-2026-0014 Amendment; see this fn's `# Safety`). Audit: + // UNSAFE-2026-0010 + UNSAFE-2026-0014 + UNSAFE-2026-0029. let effect = unsafe { let sched = (*crate::SCHED.0.get()).assume_init_ref(); let current_table = sched.current_user_table(); let current_as = sched.current_address_space_handle(); let current_window = sched.current_user_window(); - // task_as: the running task's address space (read-only, for the gate-#1 - // `Mmu::translate`), or the bootstrap AS as a harmless placeholder when - // fail-closed (the empty window rejects every non-zero copy first). A - // stale / absent AS handle also falls back. + // Accept the running task's syscall context only as a complete unit: + // table + user window + generation-checked AS. Any missing / stale piece + // makes the whole context all-or-nothing fail-closed — the empty table + + // empty window (so data-plane syscalls grant no cap and copy no byte from + // a partially bound task) AND `has_current_task = false` (so the + // control-plane syscalls, which consult no capability, are rejected too). let arena = (*crate::AS_ARENA.0.get()).assume_init_ref(); - let task_as = match current_as.and_then(|h| tyrne_kernel::mm::get_address_space(arena, h)) { - Some(asp) => asp.inner(), - None => (*crate::BOOTSTRAP_AS.0.get()).assume_init_ref(), - }; - // caller_table: the running task's own recorded table, or the empty - // FAILCLOSED_TABLE (every lookup → InvalidHandle) when no task is current. - let caller_table = match current_table { - Some(table_ptr) => &mut *table_ptr, - None => (*crate::FAILCLOSED_TABLE.0.get()).assume_init_mut(), - }; + let resolved_task_as = + current_as.and_then(|h| tyrne_kernel::mm::get_address_space(arena, h)); + let (caller_table, user_window, task_as, has_current_task) = + match (current_table, current_window, resolved_task_as) { + (Some(table_ptr), Some(window), Some(asp)) => { + (&mut *table_ptr, window, asp.inner(), true) + } + _ => ( + (*crate::FAILCLOSED_TABLE.0.get()).assume_init_mut(), + UserAccessWindow::empty(), + (*crate::BOOTSTRAP_AS.0.get()).assume_init_ref(), + false, + ), + }; let mut ctx = SyscallContext { ep_arena: (*crate::EP_ARENA.0.get()).assume_init_mut(), queues: (*crate::IPC_QUEUES.0.get()).assume_init_mut(), caller_table, console: (*crate::CONSOLE.0.get()).assume_init_ref(), - user_window: current_window.unwrap_or_else(UserAccessWindow::empty), + user_window, mmu: (*crate::MMU.0.get()).assume_init_ref(), task_as, - has_current_task: current_table.is_some(), + has_current_task, }; dispatch(&mut ctx, args) }; diff --git a/docs/audits/unsafe-log.md b/docs/audits/unsafe-log.md index 9b7b197..8287b52 100644 --- a/docs/audits/unsafe-log.md +++ b/docs/audits/unsafe-log.md @@ -248,7 +248,7 @@ Both forms are time-stamped so a reader can reconstruct the entry's state at any - **Additional locations (T-018, commit `1b0f1d9` BSP wiring + commit `0d16ea4` scheduler hook):** [`kernel/src/sched/mod.rs::yield_now`](../../kernel/src/sched/mod.rs) (activation-hook invocation just before `cpu.context_switch`), [`kernel/src/sched/mod.rs::ipc_recv_and_yield`](../../kernel/src/sched/mod.rs) (Phase-2 dispatch path's activation hook), [`kernel/src/sched/mod.rs::ipc_send_and_yield`](../../kernel/src/sched/mod.rs) (threads the `activate_address_space` closure into its delegated `yield_now` call on the unblock-receiver-then-yield path; the activation itself fires inside `yield_now` but the closure-as-parameter site is here for traceability), [`kernel/src/sched/mod.rs::start`](../../kernel/src/sched/mod.rs) (first-task activation via momentary `unsafe { (*sched).task_address_space_handles[next_idx] }`), [`kernel/src/sched/mod.rs::address_space_activation_target`](../../kernel/src/sched/mod.rs) (the pure helper that computes the `Option` switch decision — no `unsafe`, but cited here for completeness), and [`bsp-qemu-virt/src/main.rs::activate_address_space`](../../bsp-qemu-virt/src/main.rs) (the closure that dereferences the `AS_ARENA` + `MMU` `StaticCell`s and calls [`tyrne_kernel::mm::activate_address_space_handle`](../../kernel/src/mm/address_space.rs)). - **Additional invariant:** the activation closure fires *inside* the scheduler's `IrqGuard` scope but *after* the `&mut Scheduler` borrow drops; the closure's own `&AS_ARENA` + `&MMU` borrows do not alias any live scheduler borrow. The closure is `FnOnce` — called at most once per scheduler invocation; on the no-AS-switch path it is dropped unused (the `Option` computed by `address_space_activation_target` is `None`). - **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3, mandatory per the task's audit AC): scope extended to the `syscall_entry` cap-table-pointer dereference.** T-026 records each EL0 task's capability-table pointer in the scheduler (`task_cap_tables: [Option<*mut CapabilityTable>; N]`, written by [`add_user_task`](../../kernel/src/sched/mod.rs)); the BSP [`syscall_entry`](../../bsp-qemu-virt/src/syscall.rs) reads it via `Scheduler::current_user_table()` and materialises a momentary `&mut *table_ptr` — the **running task's own** table — for the single `dispatch` call. This is a new dereference site under **this entry's discipline**: (1) the `&mut` is lexically contained to the one `dispatch` call and never crosses `cpu.context_switch` (the data-plane syscalls do not switch; control-plane returns a directive); (2) the pointer's validity is established by `add_user_task`'s `# Safety` contract (a `CapabilityTable` outliving the task, no `&mut` aliased across a switch — the [ADR-0021](../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) bridge) and the binding is *read* from the scheduler, not minted here; (3) **fail-closed** when no task is current — `current_user_table()` returns `None` and `syscall_entry` substitutes the empty `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`), so no over-grant. The new `task_cap_tables` array makes `Scheduler` `!Send`/`!Sync`; it lives only inside the BSP's unconditionally-`Sync` `StaticCell` and is reached via the ADR-0021 `*mut Scheduler` bridge, so no `Send`/`Sync` bound is broken. **Second-reviewer required** (capability-table sourcing is security-sensitive, [unsafe-policy §Review.4](../standards/unsafe-policy.md)). Host-tested: `sched::tests::current_accessors_resolve_running_task_bindings_or_none` + `syscall::dispatch::tests::task_{yield,exit}_with_no_current_task_fails_closed`; QEMU smoke shows the no-current-task `console_write` fail-closing to `InvalidHandle` (status `0x102`). + **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3, mandatory per the task's audit AC): scope extended to the `syscall_entry` cap-table-pointer dereference.** T-026 records each EL0 task's capability-table pointer in the scheduler (`task_cap_tables: [Option<*mut CapabilityTable>; N]`, written by [`add_user_task`](../../kernel/src/sched/mod.rs)); the BSP [`syscall_entry`](../../bsp-qemu-virt/src/syscall.rs) reads it via `Scheduler::current_user_table()` and materialises a momentary `&mut *table_ptr` — the **running task's own** table — for the single `dispatch` call. This is a new dereference site under **this entry's discipline**: (1) the `&mut` is lexically contained to the one `dispatch` call and never crosses `cpu.context_switch` (the data-plane syscalls do not switch; control-plane returns a directive); (2) the pointer's validity is established by `add_user_task`'s `# Safety` contract (a `CapabilityTable` outliving the task, no `&mut` aliased across a switch — the [ADR-0021](../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) bridge) and the binding is *read* from the scheduler, not minted here; (3) **fail-closed** when no task is current or the running-task syscall context is incomplete — the table, user window, and generation-checked task address space must resolve as a unit; otherwise `syscall_entry` substitutes the empty `FAILCLOSED_TABLE` (every lookup → `InvalidHandle`), the empty window, and the bootstrap-AS placeholder, so no over-grant. The new `task_cap_tables` array makes `Scheduler` `!Send`/`!Sync`; it lives only inside the BSP's unconditionally-`Sync` `StaticCell` and is reached via the ADR-0021 `*mut Scheduler` bridge, so no `Send`/`Sync` bound is broken. **Second-reviewer required** (capability-table sourcing is security-sensitive, [unsafe-policy §Review.4](../standards/unsafe-policy.md)). Host-tested: `sched::tests::current_accessors_resolve_running_task_bindings_or_none` + `syscall::dispatch::tests::task_{yield,exit}_with_no_current_task_fails_closed`; QEMU smoke shows the no-current-task `console_write` fail-closing to `InvalidHandle` (status `0x102`). **Review-round hardening (2026-05-31):** the running-task context resolves as a single all-or-nothing unit — an incomplete binding (any of table / window / generation-checked AS missing or stale) also sets `has_current_task = false`, so the **control-plane** syscalls (`task_yield`/`task_exit`, which consult no capability and so are not guarded by the empty table) are rejected too, not just the data-plane; and `add_user_task` stores `None` rather than `Some(null)` for a null `cap_table` (behind a `debug_assert!`), so a `# Safety`-contract violation degrades to fail-closed instead of a null dereference. ### UNSAFE-2026-0015 — generic-timer system-register reads (`CNTPCT_EL0`, `CNTFRQ_EL0`) @@ -663,7 +663,7 @@ Neither change touches the `copy_nonoverlapping` site itself; both correct contr - **Reviewed by:** @cemililik (+ Claude Opus 4.8 agent). Security-sensitive (the EL0→EL1 trust boundary — the single widest untrusted-input surface in the system) → second-reviewer required per [unsafe-policy §Review.4](../standards/unsafe-policy.md). - **Status:** Active. Smoke-verified at runtime: the 2026-05-29 QEMU trace (debug build) shows the EL1 kernel-stub's two `SVC`s taken at the current-EL `+0x200` sync vector — `Taking exception 2 [SVC] ... from EL1 to EL1 ... with ESR 0x15/0x56000000` (EC = SVC64, exactly the value the trampoline routes on) — each `ERET`ing cleanly back to EL1, with the `console_write` syscall emitting `tyrne: hello from the syscall boundary (console_write via SVC)` and the round-trip confirmation `console_write status=0x0, bytes=63; bad-number status=0x1`; `-d int,unimp,guest_errors` shows only the pre-existing PL011-disabled-UART warnings (no new fault class). **The lower-EL `+0x400` slot is installed but not yet exercised** — a real EL0 task taking it (with the EL0↔EL1 privilege transition) is B6's runtime verification per [ADR-0030 §Simulation row-to-verification mapping](../decisions/0030-syscall-abi.md#simulation); this status note lifts to cover the `+0x400` path via append-only Amendment when B6's first EL0 task runs. - **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3): `syscall_entry` now sources the caller context from the scheduler's running task; the smoke is re-sequenced after `SCHED` init.** The statics the "Statics initialised before first `SVC`" invariant named have changed: `syscall_entry` no longer reads a dedicated `SYSCALL_STUB_TABLE`. It reads **`SCHED`** (added) for the running task's `current_user_table()` / `current_address_space_handle()` / `current_user_window()`, resolves the task's `&QemuVirtAddressSpace` from `AS_ARENA`, and falls back to the empty `FAILCLOSED_TABLE` + `BOOTSTRAP_AS` + an empty window when no task is current. `syscall_boundary_smoke` is correspondingly moved to **after** `SCHED` is published (still before `start()`), so `SCHED.current` is a valid (empty) read — `None` — which is exactly the fail-closed case the smoke now demonstrates (`console_write` → `InvalidHandle`; the old "emit a greeting" path is superseded). The new cap-table-pointer dereference is covered by the [UNSAFE-2026-0014](#unsafe-2026-0014--scheduler-free-function-momentary-mut-pattern) 2026-05-31 Amendment. Smoke-verified 2026-05-31: 2 `SVC` exceptions at `+0x200`, clean `ERET`, no new fault class; `console_write status=0x102 ; bad-number status=0x1`. + **Amendment (2026-05-31, [T-026](../analysis/tasks/phase-b/T-026-current-task-cap-table.md) / gate #3): `syscall_entry` now sources the caller context from the scheduler's running task; the smoke is re-sequenced after `SCHED` init.** The statics the "Statics initialised before first `SVC`" invariant named have changed: `syscall_entry` no longer reads a dedicated `SYSCALL_STUB_TABLE`. It reads **`SCHED`** (added) for the running task's `current_user_table()` / `current_address_space_handle()` / `current_user_window()`, resolves the task's `&QemuVirtAddressSpace` from `AS_ARENA`, and accepts the task data-plane context only when the table, user window, and generation-checked address space all resolve; otherwise it falls back to the empty `FAILCLOSED_TABLE` + `BOOTSTRAP_AS` + an empty window. `syscall_boundary_smoke` is correspondingly moved to **after** `SCHED` is published (still before `start()`), so `SCHED.current` is a valid (empty) read — `None` — which is exactly the fail-closed case the smoke now demonstrates (`console_write` → `InvalidHandle`; the old "emit a greeting" path is superseded). The new cap-table-pointer dereference is covered by the [UNSAFE-2026-0014](#unsafe-2026-0014--scheduler-free-function-momentary-mut-pattern) 2026-05-31 Amendment. Smoke-verified 2026-05-31: 2 `SVC` exceptions at `+0x200`, clean `ERET`, no new fault class; `console_write status=0x102 ; bad-number status=0x1`. **Review-round hardening (2026-05-31):** `FAILCLOSED_TABLE` is now initialised in core `kernel_main_high` setup (alongside `EP_ARENA`/`IPC_QUEUES`), **not** inside `syscall_boundary_smoke`, so the security fallback is live independent of whether the diagnostic runs — its `.write` still precedes any `SVC`, so the "statics initialised before first `SVC`" invariant holds verbatim. The smoke now also *asserts* both `SVC` results are non-`OK_STATUS` (a fail-closed regression would over-grant and report OK, panicking the boot before "all tasks complete") rather than printing them unconditionally. ### UNSAFE-2026-0030 — validated copy-from/to-user byte move via `core::ptr::copy_nonoverlapping` diff --git a/kernel/src/sched/mod.rs b/kernel/src/sched/mod.rs index 1198a05..b5a96de 100644 --- a/kernel/src/sched/mod.rs +++ b/kernel/src/sched/mod.rs @@ -469,6 +469,15 @@ impl Scheduler { user_sp.is_multiple_of(16), "add_user_task: user_sp must be 16-byte aligned (becomes SP_EL0)", ); + // `cap_table` is dereferenced by `syscall_entry` (`&mut *table_ptr`) to + // resolve the running task's capabilities; a null pointer would be UB on + // the first syscall. The # Safety contract forbids null — assert it in + // debug, and (at the store below) keep `None` rather than `Some(null)` so + // a release build degrades to fail-closed (no usable table) instead. + debug_assert!( + !cap_table.is_null(), + "add_user_task: cap_table must be non-null (the task's capability table)", + ); // SAFETY: caller guarantees the EL0-entry contract per the # Safety doc. // Forwarding to the BSP's init_user_context, which seeds the context's // x19/x20/lr/sp so the first cooperative restore lands in the enter_el0 @@ -496,7 +505,9 @@ impl Scheduler { // `saturating_sub` yields a zero-length window if the caller violates // `user_sp >= user_entry` — fail-closed (every non-zero copy then // faults) rather than wrapping. - self.task_cap_tables[idx] = Some(cap_table); + // Defensive store: `None` for a null pointer so `current_user_table()` + // yields `None` (→ fail-closed) rather than a dereferenceable `Some(null)`. + self.task_cap_tables[idx] = (!cap_table.is_null()).then_some(cap_table); self.task_user_windows[idx] = Some(UserAccessWindow::new( user_entry, user_sp.saturating_sub(user_entry), diff --git a/kernel/src/syscall/dispatch.rs b/kernel/src/syscall/dispatch.rs index 8f29315..5802def 100644 --- a/kernel/src/syscall/dispatch.rs +++ b/kernel/src/syscall/dispatch.rs @@ -95,7 +95,10 @@ pub struct SyscallContext<'a, M: Mmu> { /// (gate #3 / T-026); in B5 it is the EL1 stub's bootstrap AS. pub task_as: &'a M::AddressSpace, /// Whether a running EL0 task is current (gate #3 / T-026). The BSP sets it - /// from the scheduler (`current_user_table().is_some()`). The **control-plane** + /// `true` only when the running task's capability table, user-access window, + /// and (generation-checked) address space **all** resolve from the scheduler + /// — the same all-or-nothing unit as the data-plane context; any incomplete + /// binding yields `false`. The **control-plane** /// syscalls (`task_yield` / `task_exit`) act on the trusted current-task /// identity ([ADR-0031][adr-0031]) and consult **no** capability, so the /// empty fail-closed `caller_table` cannot guard them — the dispatcher From a76d52def8b55068c9a6a46ed7a3dfcb5a3a6d7e Mon Sep 17 00:00:00 2001 From: Cemil ILIK Date: Sun, 31 May 2026 21:33:41 +0300 Subject: [PATCH 6/6] =?UTF-8?q?test(syscall):=20T-026=20=E2=80=94=20pin=20?= =?UTF-8?q?incomplete-binding=20fallback=20context=20fails=20closed=20on?= =?UTF-8?q?=20both=20planes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the one 'Kısmi' (partial) item from the review verification: there was no host test directly modelling the context the BSP syscall_entry produces on an incomplete running-task binding (empty FAILCLOSED_TABLE + empty window + has_current_task=false). The BSP match that assembles it is no_std/no_main and not host-testable directly, but the dispatcher's handling of that exact context is — and the prior suite only covered the planes separately (console_write with has_current_task=true; task_yield with has_current_task=false), never the combined fail-closed context. The new test builds that exact context once and asserts BOTH planes fail closed in it: data-plane console_write -> InvalidHandle (no output, via the empty table) and control-plane task_yield -> InvalidHandle (via the has_current_task gate, not Reschedule). debug-gated (console_write number 5 is debug-only). Gates: fmt; host + kernel clippy -D warnings; kernel tests 258 (+1); Miri 0 UB on the new test. Refs: T-026 Co-Authored-By: Claude Opus 4.8 (1M context) --- kernel/src/syscall/dispatch.rs | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/kernel/src/syscall/dispatch.rs b/kernel/src/syscall/dispatch.rs index 5802def..e3fd524 100644 --- a/kernel/src/syscall/dispatch.rs +++ b/kernel/src/syscall/dispatch.rs @@ -594,6 +594,67 @@ mod tests { } } + #[test] + #[cfg(debug_assertions)] // exercises console_write (number 5), which is debug-gated + fn incomplete_binding_context_fails_closed_on_both_planes() { + // Models the context the BSP `syscall_entry` builds on an INCOMPLETE + // running-task binding (any of table / window / generation-checked AS + // missing or stale): the empty `FAILCLOSED_TABLE` + an empty window + + // `has_current_task = false`. The BSP match that assembles it is + // no_std / no_main and not host-testable directly; this pins the + // dispatcher's handling of that exact context — **both** planes must + // fail closed in the *same* context: the data-plane `console_write` via + // the empty table (InvalidHandle, no output) and the control-plane + // `task_yield` via the `has_current_task` gate (InvalidHandle, not + // Reschedule). Closes the incomplete-context coverage gate #3's BSP + // fallback arm cannot unit-test itself (T-026 review-round). + let mut ep_arena = EndpointArena::default(); + let mut queues = IpcQueues::new(); + let mut table = CapabilityTable::new(); // empty: the FAILCLOSED_TABLE analog + let console = FakeConsole::new(); + let (mmu, task_as) = empty_mmu_as(); + let mut ctx = SyscallContext { + ep_arena: &mut ep_arena, + queues: &mut queues, + caller_table: &mut table, + console: &console, + user_window: UserAccessWindow::empty(), + mmu: &mmu, + task_as: &task_as, + has_current_task: false, + }; + + // Data-plane: console_write fails closed via the empty table (the cap + // gate rejects before the window / translate is ever consulted). + let bogus = encode_cap_handle(Some(CapHandle::from_raw(0, 0))); + match dispatch( + &mut ctx, + call(SyscallNumber::ConsoleWrite, [bogus, 0x40_0000, 5, 0, 0, 0]), + ) { + SyscallEffect::Resume(r) => assert_eq!( + r.status, + SyscallError::Cap(crate::cap::CapError::InvalidHandle).as_status(), + "data-plane console_write must fail closed on an incomplete binding" + ), + other => panic!("expected Resume(InvalidHandle), got {other:?}"), + } + assert!( + console.captured().is_empty(), + "no byte may be emitted from the incomplete-binding fallback context" + ); + + // Control-plane: task_yield fails closed via the has_current_task gate + // (the empty table cannot guard it — it consults no capability). + match dispatch(&mut ctx, call(SyscallNumber::TaskYield, [0; 6])) { + SyscallEffect::Resume(r) => assert_eq!( + r.status, + SyscallError::Cap(crate::cap::CapError::InvalidHandle).as_status(), + "control-plane task_yield must fail closed on an incomplete binding" + ), + other => panic!("expected Resume(InvalidHandle), not Reschedule, got {other:?}"), + } + } + // ── send / recv ────────────────────────────────────────────────────────── #[test]