diff --git a/bsp-qemu-virt/src/main.rs b/bsp-qemu-virt/src/main.rs index dfdcc91..4345000 100644 --- a/bsp-qemu-virt/src/main.rs +++ b/bsp-qemu-virt/src/main.rs @@ -49,6 +49,7 @@ mod exceptions; mod gic; mod mmu; mod mmu_bootstrap; +mod syscall; use console::Pl011Uart; use cpu::QemuVirtCpu; @@ -384,6 +385,16 @@ static EP_CAP_A: StaticCell = StaticCell::new(); /// Task B's endpoint capability handle (index into `TABLE_B`). static EP_CAP_B: StaticCell = StaticCell::new(); +// ─── T-021 syscall-boundary smoke ───────────────────────────────────────────── + +/// The EL1 kernel-stub's capability table — the `caller_table` the syscall +/// dispatcher resolves capabilities in for the B5 `SVC` smoke (see +/// [`syscall::syscall_entry`]). In B5 the only `SVC` comes from a kernel-stub, +/// so it has a dedicated table holding a single debug-console capability; +/// B6 replaces this with the scheduler's current-task table once a real EL0 +/// task exists. Distinct from `TABLE_A` / `TABLE_B` (the IPC-demo tables). +static SYSCALL_STUB_TABLE: StaticCell = StaticCell::new(); + /// Task kernel-object arena — global per [ADR-0016]. Although the v1 demo /// never reads this arena after `create_task` has returned the two /// `TaskHandle`s, global storage is the uniform pattern established by @@ -653,6 +664,109 @@ fn task_a() -> ! { } } +// ─── T-021 syscall-boundary smoke ────────────────────────────────────────────── + +/// EL1 kernel-stub `SVC` smoke for the B5 syscall boundary ([T-021]). +/// +/// Issues two `SVC #0` traps **from EL1** — exercising the current-EL +/// `VBAR_EL1 + 0x200` sync vector and the full save → decode → dispatch → +/// `ERET` round-trip (an `SVC` issued at EL1 cannot take the lower-EL `+0x400` +/// vector; that real-EL0 path is B6's smoke per [ADR-0030 §Simulation]): +/// +/// 1. **`console_write`** (number `5`) through a granted debug-console +/// capability — the dispatcher's capability check passes, `copy_from_user` +/// validates the buffer against the active address space, and the bytes are +/// emitted on the serial console (the round-trip + emitted-bytes half of B5 +/// acceptance criterion #7). +/// 2. a **reserved-invalid number** (`0`) — the panic-free error path returns +/// `SyscallError::BadSyscallNumber` (status `0x1`) without touching any +/// capability. +/// +/// Runs after the IPC statics are published (the dispatcher's +/// [`SyscallContext`][tyrne_kernel::syscall::SyscallContext] borrows +/// `EP_ARENA` / `IPC_QUEUES`) and before `start()`. `task_yield` / `task_exit` +/// are not driven here — their dispatcher routing is host-tested; their real +/// EL0 semantics land in B6. +/// +/// [T-021]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-021-syscall-dispatch.md +/// [ADR-0030 §Simulation]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0030-syscall-abi.md +#[allow( + clippy::cast_possible_truncation, + reason = "Tyrne's BSP target is 64-bit aarch64; pointer/usize → u64 \ + register-word casts are lossless" +)] +fn syscall_boundary_smoke(console: &Pl011Uart) { + // Mint a debug-console capability into the kernel-stub's table. + // + // SAFETY: `SYSCALL_STUB_TABLE` lives in `.bss`; this is its single write, + // performed before any `SVC` issues. The momentary `&mut` for the + // `insert_root` drops before the trap. Audit: UNSAFE-2026-0010 (StaticCell) + // + UNSAFE-2026-0014 (momentary `&mut`). + let cons_cap = unsafe { + (*SYSCALL_STUB_TABLE.0.get()).write(CapabilityTable::new()); + let table = (*SYSCALL_STUB_TABLE.0.get()).assume_init_mut(); + table + .insert_root(Capability::new( + CapRights::CONSOLE_WRITE, + CapObject::DebugConsole, + )) + .expect("debug-console cap mint in empty table cannot fail") + }; + let cons_cap_word = tyrne_kernel::syscall::encode_cap_handle(Some(cons_cap)); + + // (1) console_write via SVC: x8 = 5, x0 = cap, x1 = buffer VA, x2 = length. + let greeting: &[u8] = b"tyrne: hello from the syscall boundary (console_write via SVC)\n"; + let ptr = greeting.as_ptr() as u64; + let len = greeting.len() as u64; + let status: u64; + let written: u64; + // SAFETY: `SVC #0` traps to the EL1 current-EL sync vector (+0x200), runs + // the panic-free dispatcher, and `ERET`s back here. The convention is + // x8 = number, x0..x2 = args; the handler writes x0 = status, x1 = bytes + // written, clobbers x0..x7, and preserves x8..x30 + SP_EL0. The emitted + // greeting bytes are the observable round-trip proof. Audit: UNSAFE-2026-0029. + unsafe { + core::arch::asm!( + "svc #0", + in("x8") 5u64, + inout("x0") cons_cap_word => status, + inout("x1") ptr => written, + in("x2") len, + out("x3") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + ); + } + + // (2) reserved-invalid number 0 → BadSyscallNumber, panic-free. + let bad_status: u64; + // SAFETY: same `SVC` trap mechanism; number 0 is reserved-invalid, so the + // dispatcher returns a typed `SyscallError::BadSyscallNumber` in x0 without + // touching any capability or panicking. Audit: UNSAFE-2026-0029. + unsafe { + core::arch::asm!( + "svc #0", + in("x8") 0u64, + out("x0") bad_status, + out("x1") _, + out("x2") _, + out("x3") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + ); + } + + let mut w = FmtWriter(console); + let _ = writeln!( + w, + "tyrne: syscall smoke ok (console_write status={status:#x}, bytes={written}; bad-number status={bad_status:#x})" + ); +} + // ─── Boot entry ─────────────────────────────────────────────────────────────── // Reset entry (`_start`). See `boot.s` and `docs/architecture/boot.md`. @@ -1216,6 +1330,15 @@ pub extern "C" fn kernel_entry() -> ! { (*EP_CAP_B.0.get()).write(ep_cap_b); } + // ── Syscall-boundary smoke — T-021 ──────────────────────────────────────── + // + // Exercise the EL0→EL1 `SVC` trap → panic-free dispatcher → `ERET` + // round-trip via an EL1 kernel-stub (the current-EL `+0x200` vector). Runs + // here, after the IPC statics the dispatcher's context borrows are live, and + // before `start()` hands control to the cooperative demo. The real EL0 + // (`+0x400`) round-trip is B6's smoke. + syscall_boundary_smoke(console); + // ── Scheduler setup ─────────────────────────────────────────────────────── let mut sched = Scheduler::::new(); diff --git a/bsp-qemu-virt/src/syscall.rs b/bsp-qemu-virt/src/syscall.rs new file mode 100644 index 0000000..aa2ec24 --- /dev/null +++ b/bsp-qemu-virt/src/syscall.rs @@ -0,0 +1,214 @@ +//! BSP-side syscall glue: the `SVC` trap frame and the Rust entry the +//! `vectors.s` sync trampoline calls. +//! +//! The architecture-agnostic, panic-free dispatch logic lives in the kernel +//! ([`tyrne_kernel::syscall`]). This module owns only the **hardware-facing** +//! half: +//! +//! - [`SyscallTrapFrame`] — the `#[repr(C)]` mirror of the register frame the +//! `tyrne_sync_trampoline` in `vectors.s` saves (`x0`–`x30` + `SP_EL0` + +//! `ELR_EL1` + `SPSR_EL1`); its field order and offsets must match the asm +//! `stp` sequence byte-for-byte (a compile-time `size_of` guard catches drift). +//! - [`syscall_entry`] — reads the syscall number + arguments from the saved +//! frame, builds a [`SyscallContext`] from the BSP statics, calls +//! [`tyrne_kernel::syscall::dispatch`], and applies the returned +//! [`SyscallEffect`] by writing the status + payload back into the frame. +//! +//! ## B5 scope and the `0x200` / `0x400` split +//! +//! The shared trampoline is installed at **both** sync vector slots — current-EL +//! (`VBAR_EL1 + 0x200`) and lower-EL-AArch64 (`VBAR_EL1 + 0x400`) — because the +//! save → dispatch → `ERET` mechanism is privilege-entry-agnostic. In B5 the +//! only `SVC` comes from an **EL1 kernel-stub** (see `kernel_entry`'s syscall +//! smoke), which — executing at the *current* EL — takes the `0x200` vector, +//! **not** the lower-EL `0x400` vector. A real EL0 task taking the `0x400` +//! vector (with the EL0↔EL1 privilege transition and copy-user against a +//! separate userspace `TTBR0_EL1`) is verified at runtime in **B6**, per +//! [ADR-0030 §Simulation row-to-verification mapping][adr-0030]. The `0x400` +//! handler is installed now so B6 adds only the EL0 task, not new trap plumbing. +//! +//! `caller_table` is a dedicated **kernel-stub** capability table in B5 +//! ([`crate::SYSCALL_STUB_TABLE`]); B6 replaces it with the scheduler's +//! current-task table once a real EL0 task exists. +//! +//! Audit: UNSAFE-2026-0029 (the trap-frame asm + this entry's frame +//! reads/writes). +//! +//! [adr-0030]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0030-syscall-abi.md + +use tyrne_kernel::syscall::{ + dispatch, SyscallArgs, SyscallContext, SyscallEffect, UserAccessWindow, +}; + +/// Saved-register frame the `tyrne_sync_trampoline` in `vectors.s` populates +/// before branching into [`syscall_entry`] on an `SVC`. +/// +/// `#[repr(C)]` is **mandatory**: the field order and byte offsets must match +/// the asm `stp` sequence in `vectors.s` exactly. The frame is 272 bytes total +/// (`x0`–`x29` as 15 pairs, then `x30`/`SP_EL0`, then `ELR_EL1`/`SPSR_EL1`), +/// 16-byte SP-aligned. Unlike the IRQ [`TrapFrame`][crate::exceptions::TrapFrame] +/// (which saves only the AAPCS64 caller-saved set), the syscall frame saves the +/// **full** general-purpose register file plus `SP_EL0` so it is a complete +/// snapshot of the trapped context — the shape a real EL0 task (B6) and any +/// future preemption arc require. +/// +/// Fields are private: the only reader/writer is [`syscall_entry`] in this +/// module, and keeping the raw register snapshot un-`pub` avoids exposing +/// (or accidentally logging) trapped register contents elsewhere. +#[repr(C)] +pub struct SyscallTrapFrame { + // `x0`–`x29` saved as 15 consecutive pairs at offsets 0x00..0xF0. + x0_x1: [u64; 2], + x2_x3: [u64; 2], + x4_x5: [u64; 2], + x6_x7: [u64; 2], + x8_x9: [u64; 2], + x10_x11: [u64; 2], + x12_x13: [u64; 2], + x14_x15: [u64; 2], + x16_x17: [u64; 2], + x18_x19: [u64; 2], + x20_x21: [u64; 2], + x22_x23: [u64; 2], + x24_x25: [u64; 2], + x26_x27: [u64; 2], + x28_x29: [u64; 2], + /// `x30` (LR) at 0xF0 and `SP_EL0` at 0xF8. + x30_sp_el0: [u64; 2], + /// `ELR_EL1` (return address) at 0x100 and `SPSR_EL1` (saved PSTATE) at 0x108. + elr_spsr: [u64; 2], +} + +// The trampoline reserves exactly 272 bytes and writes through fixed offsets +// mirroring the field order above. A size/layout drift between the asm and this +// `#[repr(C)]` would corrupt saved registers on every syscall; this guard fails +// the build before that can ship. (Mirrors the `TrapFrame` 192-byte guard.) +const _: () = assert!(core::mem::size_of::() == 272); + +/// Length of the syscall copy-from/to-user window in B5: the whole +/// identity-mapped RAM extent the bootstrap address space covers. +/// +/// The B5 EL1 kernel-stub runs on the bootstrap AS, which identity-maps the +/// managed extent (per [ADR-0027 §Decision outcome (a)]), so the stub's buffer +/// — a `.rodata`-resident `&[u8]` in the kernel image — is in range. B6's real +/// EL0 task derives a tighter window from its own mapped region (see +/// [`UserAccessWindow`]'s module docs). The subtraction is a `const`, so it +/// cannot wrap at runtime: const-eval rejects an underflow at **build time** +/// (an inverted extent is a hard compile error, never a release wrap). The +/// explicit assertion below makes that invariant — and its failure message — +/// unambiguous rather than relying on a raw "subtract with overflow" const-eval +/// error. +const _: () = assert!( + crate::PMM_EXTENT_END >= crate::PMM_EXTENT_START, + "PMM extent must be non-inverted: PMM_EXTENT_END >= PMM_EXTENT_START" +); +const SYSCALL_USER_WINDOW_LEN: usize = crate::PMM_EXTENT_END - crate::PMM_EXTENT_START; + +/// Rust entry for the `SVC` sync trampoline (`vectors.s`). +/// +/// Reads the syscall number (`x8`) and arguments (`x0`–`x5`) from the saved +/// `frame`, dispatches through [`tyrne_kernel::syscall::dispatch`], and applies +/// the resulting [`SyscallEffect`] by writing the status (`x0`) and payload +/// (`x1`–`x7`) back into the frame. Returns to the trampoline, which restores +/// the (now result-bearing) frame and `ERET`s. +/// +/// # Safety +/// +/// `extern "C"` so the asm trampoline can `bl` it. `frame` is guaranteed valid +/// by the trampoline (constructed via `stp` immediately before the `bl`, on the +/// kernel stack); this function dereferences it only inside `unsafe` blocks. +/// +/// **Why `unsafe` is required.** The function reads and writes the saved +/// register frame through a raw `*mut SyscallTrapFrame` (the asm calling +/// convention passes a pointer, not a `&mut`), and it materialises momentary +/// references to the write-once BSP statics via `assume_init_{mut,ref}`. +/// **Invariants upheld.** (1) The four statics it reaches +/// (`EP_ARENA` / `IPC_QUEUES` / `SYSCALL_STUB_TABLE` / `CONSOLE`) are all +/// written before the syscall smoke issues any `SVC`; (2) v1 is single-core and +/// the `SVC` handler runs with interrupts masked (exception entry masks `DAIF`), +/// so no peer aliases them mid-call; (3) the momentary `&mut`s are scoped to the +/// single `dispatch` call and do not cross a context switch — the data-plane +/// syscalls do not switch and the control-plane ones return a directive *before* +/// any switch, honouring the [ADR-0021] discipline; (4) the frame writes touch +/// only `x0`–`x7`, leaving the trampoline's restore of `x8`–`x30` + `SP_EL0` + +/// `ELR_EL1` + `SPSR_EL1` intact. **Rejected alternatives.** Passing a `&mut +/// SyscallTrapFrame` from the asm is impossible (asm has no Rust references); +/// holding the BSP statics behind a lock would deadlock the interrupts-masked +/// handler with no soundness gain under single-core cooperative semantics. +/// +/// Audit: UNSAFE-2026-0029 (trap-frame asm + frame access) + UNSAFE-2026-0010 +/// (`StaticCell` pattern) + UNSAFE-2026-0014 (momentary `&mut` to kernel state). +#[unsafe(no_mangle)] +pub unsafe extern "C" fn syscall_entry(frame: *mut SyscallTrapFrame) { + // SAFETY: `frame` is valid per the trampoline contract above; read the + // syscall number (x8) and argument words (x0..x5) out of the saved frame. + // Audit: UNSAFE-2026-0029. + let args = unsafe { + let f = &*frame; + SyscallArgs { + number: f.x8_x9[0], + args: [ + f.x0_x1[0], f.x0_x1[1], f.x2_x3[0], f.x2_x3[1], f.x4_x5[0], f.x4_x5[1], + ], + } + }; + + // SAFETY: build the dispatch context from the write-once BSP statics. All + // four are initialised in `kernel_entry` before the syscall smoke runs; + // single-core + interrupts-masked-in-handler means no aliasing; the + // momentary `&mut`s drop at the end of the `dispatch` call and never cross a + // switch. Audit: UNSAFE-2026-0010 (StaticCell) + UNSAFE-2026-0014 (momentary + // `&mut` to kernel state) + UNSAFE-2026-0029 (the syscall arc). + let effect = unsafe { + let mut ctx = SyscallContext { + ep_arena: (*crate::EP_ARENA.0.get()).assume_init_mut(), + queues: (*crate::IPC_QUEUES.0.get()).assume_init_mut(), + caller_table: (*crate::SYSCALL_STUB_TABLE.0.get()).assume_init_mut(), + console: (*crate::CONSOLE.0.get()).assume_init_ref(), + user_window: UserAccessWindow::new(crate::PMM_EXTENT_START, SYSCALL_USER_WINDOW_LEN), + }; + dispatch(&mut ctx, args) + }; + + match effect { + SyscallEffect::Resume(r) => { + // SAFETY: write the status (x0) + payload (x1..x7) back into the + // saved frame; the trampoline restores them on `ERET`. Touches only + // x0..x7. Audit: UNSAFE-2026-0029. + unsafe { + let f = &mut *frame; + f.x0_x1[0] = r.status; // x0 = status + f.x0_x1[1] = r.payload[0]; // x1 + f.x2_x3[0] = r.payload[1]; // x2 + f.x2_x3[1] = r.payload[2]; // x3 + f.x4_x5[0] = r.payload[3]; // x4 + f.x4_x5[1] = r.payload[4]; // x5 + f.x6_x7[0] = r.payload[5]; // x6 + f.x6_x7[1] = r.payload[6]; // x7 + } + } + SyscallEffect::Reschedule => { + // task_yield. v1 B5 stand-in: there is no scheduler-resident EL0 + // task issuing this (the smoke runs the stub before `start()`), so + // the real `yield_now` wiring lands in B6 once the caller is an EL0 + // task. The dispatcher-level routing (number 3 → Reschedule) is + // host-tested; here we resume with `Ok` (x0 = 0) — task_yield + // "always succeeds in v1" per ADR-0031. + // SAFETY: write x0 only. Audit: UNSAFE-2026-0029. + unsafe { + (*frame).x0_x1[0] = tyrne_kernel::syscall::OK_STATUS; + } + } + SyscallEffect::Terminate(_code) => { + // task_exit. The ABI says "does not return", but v1 has no EL0 + // context register file to drop — real termination lands in B6. The + // dispatcher-level routing (number 4 → Terminate) is host-tested; + // here we defensively resume with `Ok` so a stray kernel-stub + // task_exit cannot wedge the boot before B6 wires real termination. + // SAFETY: write x0 only. Audit: UNSAFE-2026-0029. + unsafe { + (*frame).x0_x1[0] = tyrne_kernel::syscall::OK_STATUS; + } + } + } +} diff --git a/bsp-qemu-virt/src/vectors.s b/bsp-qemu-virt/src/vectors.s index 3da5554..1224f3b 100644 --- a/bsp-qemu-virt/src/vectors.s +++ b/bsp-qemu-virt/src/vectors.s @@ -27,8 +27,14 @@ * * Tyrne runs at EL1 with SPSel = 1 (per ADR-0024's EL drop + * SPSR_EL2 = 0x3c5 = EL1h). An IRQ taken from kernel code lands at - * +0x280; userspace doesn't exist in v1 so the lower-EL entries are - * unreachable. Sync/FIQ/SError on any class trampoline to a panic. + * +0x280. The two *sync* entries (+0x200 current-EL and +0x400 lower-EL + * AArch64) route to the SVC sync trampoline (T-021): on ESR_EL1.EC == + * SVC64 they save the full register frame and call the Rust syscall + * dispatcher; any other sync cause falls through to the panic path. + * In v1 only the +0x200 path fires (an EL1 kernel-stub `SVC`); the + * +0x400 (real EL0) path is wired now but exercised at runtime in B6. + * FIQ/SError on any class, and sync on the unused SP_EL0 / AArch32 + * categories, still trampoline to a panic. * * Each entry is one `b