diff --git a/.agents/skills/README.md b/.agents/skills/README.md index 43efedd..744fef2 100644 --- a/.agents/skills/README.md +++ b/.agents/skills/README.md @@ -73,7 +73,7 @@ Skills are short. If a skill needs more than ~200 lines, either (a) the underlyi | [update-glossary](update-glossary/SKILL.md) | Add or update an entry in the glossary. | | [sync-adr-index](sync-adr-index/SKILL.md) | Rebuild the ADR index table from the files on disk. | | [start-task](start-task/SKILL.md) | Open a new roadmap task from the user-story template. | -| [conduct-review](conduct-review/SKILL.md) | Produce a milestone retrospective in `docs/roadmap/reviews/`. | +| [conduct-review](conduct-review/SKILL.md) | Produce a business / code / security / performance review artifact in `docs/analysis/reviews/-reviews/`. | | [add-bsp](add-bsp/SKILL.md) | Add a new Board Support Package crate — crate skeleton, boot checklist, console, context switch, smoke test. | ## Conventions for adding a new skill diff --git a/.agents/skills/add-bsp/SKILL.md b/.agents/skills/add-bsp/SKILL.md index b21627e..b3461a3 100644 --- a/.agents/skills/add-bsp/SKILL.md +++ b/.agents/skills/add-bsp/SKILL.md @@ -6,6 +6,8 @@ when-to-use: When adding support for a new hardware target (e.g. Raspberry Pi 4, # Add BSP +> **Length note.** This skill intentionally exceeds the ~200-line soft limit in [the skills README](../README.md). Bringing up a new board is irreducibly long — the `boot.s` template (step 4) and the smoke-test diagnostic table (step 10) are load-bearing checklist content, not padding, and a partial procedure here would be more dangerous than a long one. Kept whole rather than split or moved to a guide. + ## Inputs Before starting, the agent must have: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36e20f5..a14ca18 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,14 +2,26 @@ # # Goal: every hygiene check that a contributor is expected to pass # locally before opening a PR is also enforced here. A red CI blocks -# merge. The Miri and coverage jobs are slower and run conditionally -# to keep median PR feedback tight; they are still required for merge -# into `main`. +# merge. +# +# What actually gates merge (configured in GitHub branch protection): +# - lint-and-host-test, kernel-build, host-stable-check (fast lane) +# - miri (required, slow) +# The `miri` job runs the host-test suite under Stacked Borrows; it is +# slower (~10–15 min) but a Miri regression is a hard stop. The +# `coverage` job is INFORMATIONAL only (it sets `continue-on-error: true`) +# and must NOT be added to the required-checks list until the post-T-011 +# flip removes that flag — see docs/guides/ci.md §"Branch protection". +# +# Toolchain note: the kernel needs nightly (inline asm / lang items), +# so the lint-and-host-test, kernel-build, and miri jobs all run the +# pinned nightly ($NIGHTLY_PIN) — the same toolchain rust-toolchain.toml +# selects for in-repo `cargo` invocations. A separate host-stable-check +# job gives a genuine "host crates build clean on stable" signal. # # When this pipeline was born (2026-04-23, R6 retrospective work) the -# local host-test count was 111; the kernel-build target was -# aarch64-unknown-none. Any time a new crate or target lands, update -# the job matrix below. +# kernel-build target was aarch64-unknown-none. Any time a new crate or +# target lands, update the job matrix below. name: CI @@ -25,10 +37,23 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +# Least-privilege for the auto-provisioned GITHUB_TOKEN. This pipeline +# only reads the repository and runs builds; it never writes refs, +# publishes artifacts, or touches issues/PRs. Override per-job only if a +# future job genuinely needs more — none do today. +permissions: + contents: read + env: CARGO_TERM_COLOR: always - RUSTFLAGS: -D warnings - # Pinned nightly for miri + coverage jobs. Rolling nightly means a + # NOTE: `-D warnings` is intentionally NOT set as a global RUSTFLAGS env + # var. A process-wide RUSTFLAGS REPLACES (does not merge) the per-target + # `[target.aarch64-unknown-none] rustflags` in .cargo/config.toml, which + # would silently drop `panic=abort` + `force-frame-pointers=yes` from the + # kernel build — CI would then compile a different ELF than a local build. + # Deny-warnings is enforced instead by the `-- -D warnings` already passed + # in the host-clippy / kernel-clippy aliases (.cargo/config.toml). + # Pinned nightly for the kernel + miri + coverage jobs. Rolling nightly means a # miri/llvm-tools regression on the public channel breaks master # without any commit of ours being the cause. Update this pin # intentionally (open an issue citing the pin bump), not silently. @@ -37,67 +62,122 @@ env: NIGHTLY_PIN: nightly-2026-01-15 jobs: - # ─── Fast lane: lint + host tests on stable ───────────────────────────── + # ─── Fast lane: fmt + clippy + host tests (pinned nightly) ────────────── # Expected wall time: ~2 min. Every PR must pass this before anything - # else runs. + # else runs. Runs on the pinned nightly — `rust-toolchain.toml` overrides + # `rustup default` for in-repo cargo anyway, so we select the pin + # explicitly (matching the miri/coverage jobs) instead of pretending to + # run on stable. A genuine stable signal lives in `host-stable-check`. lint-and-host-test: - name: fmt + clippy + host tests + name: fmt + clippy + host tests (nightly) runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Install Rust stable with rustfmt + clippy + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # CI only reads the repo; do not persist the token into local git + # config (least-privilege; avoids credential reuse by later steps). + persist-credentials: false + - name: Install pinned nightly with rustfmt + clippy run: | - rustup update stable --no-self-update - rustup default stable - rustup component add rustfmt clippy + rustup toolchain install $NIGHTLY_PIN --component rustfmt --component clippy --no-self-update + rustup override set $NIGHTLY_PIN - name: Cache cargo registry and build - uses: actions/cache@v4 + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: | ~/.cargo/bin ~/.cargo/registry ~/.cargo/git target - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-${{ env.NIGHTLY_PIN }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | - ${{ runner.os }}-cargo- + ${{ runner.os }}-cargo-${{ env.NIGHTLY_PIN }}- - name: cargo fmt --check - run: cargo fmt --all -- --check + run: cargo +$NIGHTLY_PIN fmt --all -- --check - name: cargo host-clippy - run: cargo host-clippy + run: cargo +$NIGHTLY_PIN host-clippy - name: cargo host-test - run: cargo host-test + run: cargo +$NIGHTLY_PIN host-test # ─── Kernel build: aarch64-unknown-none ───────────────────────────────── # The bare-metal BSP cannot be built as part of the default workspace # test target (no_std + no_main). Build it explicitly here so the # kernel ELF and its dependencies stay compilable. kernel-build: - name: aarch64-unknown-none kernel build + name: aarch64-unknown-none kernel build (nightly) runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Install Rust stable + aarch64 target + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # CI only reads the repo; do not persist the token into local git + # config (least-privilege; avoids credential reuse by later steps). + persist-credentials: false + - name: Install pinned nightly + aarch64 target run: | - rustup update stable --no-self-update - rustup default stable - rustup target add aarch64-unknown-none - rustup component add clippy + rustup toolchain install $NIGHTLY_PIN --component clippy --no-self-update + rustup override set $NIGHTLY_PIN + rustup target add aarch64-unknown-none --toolchain $NIGHTLY_PIN - name: Cache cargo registry and build - uses: actions/cache@v4 + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: | ~/.cargo/bin ~/.cargo/registry ~/.cargo/git target - key: ${{ runner.os }}-cargo-aarch64-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-aarch64-${{ env.NIGHTLY_PIN }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | - ${{ runner.os }}-cargo-aarch64- + ${{ runner.os }}-cargo-aarch64-${{ env.NIGHTLY_PIN }}- - name: cargo kernel-build - run: cargo kernel-build + run: cargo +$NIGHTLY_PIN kernel-build - name: cargo kernel-clippy - run: cargo kernel-clippy + run: cargo +$NIGHTLY_PIN kernel-clippy + + # ─── Host stable check: host crates compile + test on stable ──────────── + # A genuine "the host-buildable crates compile and pass tests on stable + # Rust" gate. The kernel image itself needs nightly (inline asm / lang + # items) so the bare-metal BSP is NOT built here — this job runs only the + # workspace default-members (kernel, hal, test-hal), which carry no + # nightly-only features today. If a host crate ever grows a `#![feature]`, + # this job is the one that will (correctly) go red on stable. + # + # Deliberately build + test only — NOT clippy/fmt with `-D warnings`. + # `clippy::pedantic` is `warn` workspace-wide and stable is a rolling + # toolchain: a future stable release can add a pedantic lint that turns a + # `-D warnings` gate red with no code change of ours. Lint/format + # enforcement therefore lives only on the pinned-nightly jobs (which run + # host-clippy + fmt), keeping this gate immune to upstream lint drift. + host-stable-check: + name: host crates on stable + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # CI only reads the repo; do not persist the token into local git + # config (least-privilege; avoids credential reuse by later steps). + persist-credentials: false + - name: Install Rust stable + run: | + rustup toolchain install stable --no-self-update + - name: Cache cargo registry and build + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: | + ~/.cargo/bin + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-stable-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-stable- + # Explicit `+stable` bypasses the rust-toolchain.toml override so this + # job exercises stable for real. `cargo build` / `host-test` operate on + # default-members (host-buildable crates only); the bare-metal BSP is + # never built here. No `-D warnings` lint/fmt step — see the job header. + - name: cargo +stable build (host crates) + run: cargo +stable build + - name: cargo +stable host-test + run: cargo +stable host-test # ─── Miri: aliasing validation ────────────────────────────────────────── # Runs the full host-test suite under Miri's Stacked Borrows checker @@ -108,13 +188,17 @@ jobs: name: miri (Stacked Borrows) runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # CI only reads the repo; do not persist the token into local git + # config (least-privilege; avoids credential reuse by later steps). + persist-credentials: false - name: Install pinned nightly with miri run: | rustup toolchain install $NIGHTLY_PIN --component miri --no-self-update rustup override set $NIGHTLY_PIN - name: Cache cargo registry and miri target - uses: actions/cache@v4 + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: | ~/.cargo/bin @@ -139,7 +223,11 @@ jobs: runs-on: ubuntu-latest continue-on-error: true steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # CI only reads the repo; do not persist the token into local git + # config (least-privilege; avoids credential reuse by later steps). + persist-credentials: false - name: Install pinned nightly with llvm-tools run: | rustup toolchain install $NIGHTLY_PIN --component llvm-tools-preview --no-self-update @@ -155,12 +243,15 @@ jobs: # The cargo-llvm-cov version pin is independent of NIGHTLY_PIN — # the two can be bumped together or independently per # docs/guides/ci.md "Nightly pinning". - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@e0eafa9a0d485c37f97c0f7beb930a58a2facbac # v2.79.4 with: tool: cargo-llvm-cov@0.6.16 - name: Cache cargo registry and llvm-cov target - uses: actions/cache@v4 + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: + # ~/.cargo/bin is intentionally omitted here (unlike the sibling + # jobs): cargo-llvm-cov is installed by taiki-e/install-action, not + # `cargo install`, so nothing useful lands in ~/.cargo/bin to cache. path: | ~/.cargo/registry ~/.cargo/git diff --git a/.gitignore b/.gitignore index de15c6b..2e56867 100644 --- a/.gitignore +++ b/.gitignore @@ -41,8 +41,9 @@ tmp/ .tmp/ # Claude Code runtime state — per-session lock files coordinating tools like -# ScheduleWakeup. The .claude/skills/ tree itself IS tracked (project skills -# are source of truth); only runtime lock files are ignored. +# ScheduleWakeup. Only runtime lock files under .claude/ are ignored; the +# tracked project skills live under .agents/skills/ (migrated from +# .claude/skills/ on 2026-05-14) and are committed as the source of truth. .claude/*.lock # Python tooling occasionally used for build scripts diff --git a/CLAUDE.md b/CLAUDE.md index 535a99a..0ed7b41 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file is the entry point for Claude-based AI agents (Claude Code, Claude API ## What this project is -Tyrne is a **capability-based microkernel** written in Rust, in the lineage of seL4 and Hubris. The project is **pre-alpha** — most code is not yet written, and the current phase is architecture design captured in Architecture Decision Records (ADRs). Primary development target is QEMU `virt` on aarch64; first real hardware target is the Raspberry Pi 4. +Tyrne is a **capability-based microkernel** written in Rust, in the lineage of seL4 and Hubris. The project is **pre-alpha**, but implementation is well underway: the kernel boots end-to-end on QEMU `virt` aarch64 and runs a two-task capability-gated IPC demo. The project is **mid-Phase B** — the MMU, PMM, address-space objects, and task loader (load half) are done; the syscall ABI and first userspace task are next. Architecture is documented as Architecture Decision Records (see the [ADR index](docs/decisions/README.md)); active implementation work lives under `kernel/`, `hal/`, and `bsp-qemu-virt/`. Primary development target is QEMU `virt` on aarch64; first real hardware target is the Raspberry Pi 4. See [README.md](README.md) for the public overview. @@ -13,7 +13,7 @@ See [README.md](README.md) for the public overview. These rules apply to every AI agent acting inside this repository, regardless of model, runner, or tool. 1. **Security-first mindset.** Tyrne is built to be a high-assurance OS. When in doubt, choose the more conservative option. Never weaken a capability check, never introduce ambient authority, never suppress a failing security test. -2. **Memory safety through Rust.** All kernel and userspace code is Rust. Every `unsafe` block must have a comment explaining (a) why it is needed, (b) what invariants it upholds, (c) why safer alternatives were rejected. Audit tracking for `unsafe` is defined in [docs/standards/](docs/standards/). +2. **Memory safety through Rust.** All kernel and userspace code is Rust. Every `unsafe` block must have a comment explaining (a) why it is needed, (b) what invariants it upholds, (c) why safer alternatives were rejected. The `unsafe` policy is in [docs/standards/unsafe-policy.md](docs/standards/unsafe-policy.md); each block's audit entry lives in the log at [docs/audits/unsafe-log.md](docs/audits/unsafe-log.md). 3. **English in the repository.** Source code, comments, doc-comments, documentation, commit messages, PR descriptions, issue text, and this file are English. Conversation with the maintainer in chat may be Turkish, but nothing committed to the repo should be. 4. **Mermaid for diagrams.** All architectural diagrams are inline Mermaid code fences. Do not add PNG, SVG, ASCII-art, or other binary diagram formats. 5. **Record decisions as ADRs.** Any non-trivial architectural, security, or process decision is recorded as an ADR in [docs/decisions/](docs/decisions/) using the MADR template. ADRs are append-only; to override an old decision, write a new ADR that supersedes it. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 901c8e8..eae6de6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing to Tyrne -Thank you for your interest. Tyrne is currently in the **architecture phase** — the foundational design documents are being written and the codebase is not yet open for code contributions. +Thank you for your interest. Tyrne is **pre-alpha** and mid-way through Phase B: the Rust workspace exists, the kernel boots end-to-end on QEMU `virt` aarch64, and it runs a two-task IPC demo. Implementation is active, but the codebase is not yet open for unsolicited code contributions — each subsystem is being grown along the phased roadmap, and changes outside the active milestone's scope force premature rewrites. Issues, references, and ADR review are very welcome (see below). ## What is useful right now @@ -11,7 +11,7 @@ Thank you for your interest. Tyrne is currently in the **architecture phase** ## What is not useful yet -- Pull requests against source code outside the active milestone scope. The Rust workspace exists and the kernel boots end-to-end on QEMU virt (Phase A + B0/B1 closed), but each subsystem is being grown along the phased roadmap in [`docs/roadmap/`](docs/roadmap/); changes outside the current milestone's scope force premature rewrites and are usually not merged. Check [`docs/roadmap/current.md`](docs/roadmap/current.md) before opening a non-trivial PR. +- Pull requests against source code outside the active milestone scope. The Rust workspace exists and the kernel boots end-to-end on QEMU virt (Phase A and milestones B0–B3 closed; B4 active), but each subsystem is being grown along the phased roadmap in [`docs/roadmap/`](docs/roadmap/); changes outside the current milestone's scope force premature rewrites and are usually not merged. Check [`docs/roadmap/current.md`](docs/roadmap/current.md) before opening a non-trivial PR. - Feature requests for subsystems that have not yet been designed. File those as discussion issues if you want to influence the design, not as feature requests. ## When the project enters the implementation phase diff --git a/Cargo.toml b/Cargo.toml index 1825b0b..615f461 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,11 +26,14 @@ version = "0.0.1" edition = "2021" authors = ["Cemil İlik"] license = "Apache-2.0" -repository = "https://github.com/cemililik/Tyrne" +repository = "https://github.com/HodeTech/Tyrne" -# Lints applied to every workspace crate that opts in via `[lints] workspace = true`. -# See docs/standards/code-style.md and docs/standards/unsafe-policy.md for the -# rationale behind each. +# Workspace-wide lint LEVELS. These are the authoritative source for lint +# levels — `clippy.toml` holds only numeric thresholds, not levels. A lint set +# here takes effect only in crates that opt in with `[lints] workspace = true` +# in their own manifest (every member crate does). See +# docs/standards/code-style.md (§Lints) and docs/standards/unsafe-policy.md for +# the rationale behind each entry. [workspace.lints.rust] unsafe_op_in_unsafe_fn = "deny" missing_docs = "warn" @@ -50,6 +53,10 @@ todo = "deny" # Release profile: optimized, thin on debug info, panic=abort comes from # .cargo/config.toml rustflags so it only applies to the bare-metal target. +# overflow-checks is kept ON in release deliberately: a silent integer wrap in +# the kernel is a security bug, not a performance win. This is consistent with +# the `clippy::arithmetic_side_effects = deny` posture (see +# docs/standards/code-style.md) — do not drop it to "optimize" the build. [profile.release] codegen-units = 1 debug = "line-tables-only" @@ -57,7 +64,8 @@ lto = "fat" opt-level = 3 overflow-checks = true -# Dev profile: reasonable compile times, full debug info, overflow checks on. +# Dev profile: reasonable compile times, full debug info, overflow checks on +# (same security rationale as the release profile above). [profile.dev] debug = 2 opt-level = 1 diff --git a/NOTICE b/NOTICE index 50dfa0a..fe731e4 100644 --- a/NOTICE +++ b/NOTICE @@ -1,7 +1,7 @@ Tyrne — a capability-based microkernel -Copyright 2026 Cemil İlik and Tyrne contributors +Copyright 2026 HodeTech and Tyrne contributors This product includes software developed by the Tyrne project -(https://github.com/cemililik/TyrneOS). +(https://github.com/HodeTech/Tyrne). Licensed under the Apache License, Version 2.0. See LICENSE for the full text. diff --git a/README.md b/README.md index a29f339..72eaac5 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,13 @@ The active task and its current state live in [`docs/roadmap/current.md`](docs/r **Microkernel by construction, not by branding.** The kernel runs exclusively in privileged mode and contains five subsystems: capabilities, IPC, scheduling, memory management, and interrupt dispatch. Drivers, filesystems, and network stacks land in userspace compartments — see [`docs/architecture/overview.md`](docs/architecture/overview.md) for the layer diagram. Adding a feature does not enlarge the trusted computing base unless it strictly has to. -**Memory safety through Rust + audited `unsafe`.** All kernel, HAL, and userspace code is Rust. Every `unsafe` block carries a SAFETY comment explaining (a) why it is needed, (b) the invariants it upholds, and (c) why safer alternatives were rejected, and is tracked in [`docs/audits/unsafe-log.md`](docs/audits/unsafe-log.md) with a numbered ID, a reviewed-by line, and a status field. There are currently 27 `unsafe` audit entries; the kernel proper exposes one (`UNSAFE-2026-0027`, the task-loader byte-copy). +**Memory safety through Rust + audited `unsafe`.** All kernel, HAL, and userspace code is Rust. Every `unsafe` block carries a SAFETY comment explaining (a) why it is needed, (b) the invariants it upholds, and (c) why safer alternatives were rejected, and is tracked in [`docs/audits/unsafe-log.md`](docs/audits/unsafe-log.md) with a numbered ID, a reviewed-by line, and a status field. The audit log is the source of truth for the current set of `unsafe` blocks; in the kernel crate, production `unsafe` is a small audited set — the PMM frame-zeroing, the task-loader byte-copy, and the scheduler/IPC raw-pointer bridge — each carrying an audit-log entry. **HAL separation as a hard architectural rule.** Hardware-specific code lives behind a small set of traits — `Console`, `Cpu`, `Mmu`, `Timer`, `IrqController`, `ContextSwitch` — defined in the `tyrne-hal` crate. Each Board Support Package implements those traits for one board. Bringing up a new aarch64 SoC means writing a new BSP, not editing the kernel. **Heterogeneous hardware as a stated goal.** The same kernel is intended to scale from microcontroller-class smart-home devices to single-board computers and eventually to mobile-class SoCs. Hardware tiers (below) make the level of support explicit per target. -**Documented decisions, append-only.** Every non-trivial architectural choice is captured as an Architecture Decision Record under [`docs/decisions/`](docs/decisions/). ADRs are append-only: corrections land as revision notes, supersessions write a new ADR. The current count is 32 accepted ADRs. +**Documented decisions, append-only.** Every non-trivial architectural choice is captured as an Architecture Decision Record under [`docs/decisions/`](docs/decisions/). ADRs are append-only: corrections land as revision notes, supersessions write a new ADR. See the [ADR index](docs/decisions/README.md) for the full list with each ADR's title, status, and date. --- @@ -54,7 +54,7 @@ cargo kernel-build cargo kernel-run ``` -You should see, in order: +You should see, in order (the exact frame counts and addresses are representative and vary by build): ```text tyrne: hello from kernel_main @@ -77,7 +77,7 @@ Exit QEMU with `Ctrl-A x`. Full prerequisites, troubleshooting, and a line-by-li **Host-side tests** (no QEMU required): ```sh -cargo host-test # 259 tests across kernel, HAL, test-HAL +cargo host-test # host-side test suite (kernel · HAL · test-HAL) cargo host-clippy # -D warnings cargo kernel-clippy # -D warnings (kernel crate's stricter lints) cargo fmt --check diff --git a/SECURITY.md b/SECURITY.md index 50475a2..c80f696 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -4,13 +4,13 @@ Tyrne is a security-oriented operating system project. Even while it is in pre-a ## Project status and guarantees -Tyrne is **pre-alpha**. The kernel boots end-to-end on QEMU virt aarch64 (Phase A + B0/B1 closed) and runs a two-task IPC demo through to completion, but it is not yet a userspace-bearing OS — no production use is supported, and no security guarantees are made for the current tree. The formal threat model is documented in [`docs/architecture/security-model.md`](docs/architecture/security-model.md) (Accepted) and refined as Phase B progresses; both the model and the codebase will continue to evolve until the project reaches a stable release. +Tyrne is **pre-alpha**. The kernel boots end-to-end on QEMU virt aarch64 (Phase A and milestones B0–B3 closed; B4 active) and runs a two-task IPC demo through to completion, but it is not yet a userspace-bearing OS — no production use is supported, and no security guarantees are made for the current tree. The formal threat model is documented in [`docs/architecture/security-model.md`](docs/architecture/security-model.md) (Accepted) and refined as Phase B progresses; both the model and the codebase will continue to evolve until the project reaches a stable release. ## Reporting a security issue Until a dedicated disclosure channel is set up, please report security-relevant observations by opening a **private security advisory** on GitHub: -https://github.com/cemililik/Tyrne/security/advisories/new +https://github.com/HodeTech/Tyrne/security/advisories/new Do not open a public issue for anything that looks like it might be security-sensitive, even in this early phase. diff --git a/bsp-qemu-virt/Cargo.toml b/bsp-qemu-virt/Cargo.toml index 599db8c..580554f 100644 --- a/bsp-qemu-virt/Cargo.toml +++ b/bsp-qemu-virt/Cargo.toml @@ -16,5 +16,9 @@ path = "src/main.rs" tyrne-hal = { path = "../hal" } tyrne-kernel = { path = "../kernel" } +# Inherits the workspace `[lints]` table, which sets the deny-heavy posture +# the BSP's `unsafe`-audit discipline relies on — notably +# `clippy::undocumented_unsafe_blocks` and `clippy::missing_safety_doc` +# (deny) per docs/standards/unsafe-policy.md §Tooling. (C7-013) [lints] workspace = true diff --git a/bsp-qemu-virt/src/console.rs b/bsp-qemu-virt/src/console.rs index e8d1bf5..faca970 100644 --- a/bsp-qemu-virt/src/console.rs +++ b/bsp-qemu-virt/src/console.rs @@ -69,11 +69,17 @@ impl Console for Pl011Uart { // accesses. // Audit: UNSAFE-2026-0005. unsafe { - let fr = (self.base + UARTFR) as *const u32; + // `saturating_add` for the base + register-offset + // arithmetic, matching the MMIO idiom in `gic.rs` and + // `main.rs` (C7-004 consistency). Overflow is impossible + // here — `base` is the fixed `0x0900_0000` and the offsets + // are tiny compile-time constants — but a uniform idiom + // keeps the MMIO-arithmetic audit surface small. + let fr = self.base.saturating_add(UARTFR) as *const u32; while read_volatile(fr) & UARTFR_TXFF != 0 { core::hint::spin_loop(); } - let dr = (self.base + UARTDR) as *mut u32; + let dr = self.base.saturating_add(UARTDR) as *mut u32; write_volatile(dr, u32::from(byte)); } } diff --git a/bsp-qemu-virt/src/cpu.rs b/bsp-qemu-virt/src/cpu.rs index 784da89..5ccfc6e 100644 --- a/bsp-qemu-virt/src/cpu.rs +++ b/bsp-qemu-virt/src/cpu.rs @@ -8,13 +8,14 @@ //! - [`tyrne_hal::Timer`] — monotonic time via the ARM Generic Timer's //! **virtual** counter (`CNTVCT_EL0`) and frequency register //! (`CNTFRQ_EL0`); see [ADR-0010]. The deadline-arming half -//! (`arm_deadline` / `cancel_deadline`) is intentionally -//! `unimplemented!()` until GIC + interrupt-vector-table wiring lands — -//! see T-009 task notes. Reading the virtual counter (rather than the -//! physical `CNTPCT_EL0`) keeps the read side aligned with the -//! deferred deadline-arming side, which programs `CNTV_CVAL_EL0` / -//! `CNTV_CTL_EL0` per ADR-0010's references and ADR-0022's first- -//! rider sub-rider. +//! (`arm_deadline` / `cancel_deadline`) is **implemented** (it programs +//! `CNTV_CVAL_EL0` / `CNTV_CTL_EL0` per ADR-0010's 2026-04-28 revision / +//! T-012); it stays runtime-unexercised in the v1 cooperative demo, +//! which arms no deadline, but the code path is complete. Reading the +//! virtual counter (rather than the physical `CNTPCT_EL0`) keeps the +//! read side aligned with the deadline-arming side, which programs +//! `CNTV_CVAL_EL0` / `CNTV_CTL_EL0` per ADR-0010's references and +//! ADR-0022's first-rider sub-rider. //! //! # Safety overview //! @@ -30,8 +31,8 @@ //! audited under `UNSAFE-2026-0018` — rather than duplicated as an inline-asm //! block in this file. //! -//! [ADR-0010]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0010-timer-trait.md -//! [ADR-0020]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md +//! [ADR-0010]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0010-timer-trait.md +//! [ADR-0020]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md use core::arch::{asm, naked_asm}; @@ -114,8 +115,8 @@ impl QemuVirtCpu { /// set this register; a zero value would make `now_ns` divide by /// zero and `resolution_ns_for_freq` overflow. Audit: UNSAFE-2026-0015. /// - /// [ADR-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md - /// [ADR-0024]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0024-el-drop-policy.md + /// [ADR-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md + /// [ADR-0024]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0024-el-drop-policy.md #[must_use] pub unsafe fn new() -> Self { // Runtime assertion of the ADR-0012 + ADR-0024 boot-time @@ -171,7 +172,7 @@ impl QemuVirtCpu { // read side. // Audit: UNSAFE-2026-0015. // - // [ADR-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md + // [ADR-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md unsafe { asm!("mrs {}, cntfrq_el0", out(reg) frequency_hz, options(nostack, nomem)); } @@ -271,6 +272,15 @@ impl Cpu for QemuVirtCpu { // SAFETY: `WFI` halts the core until an interrupt arrives. It does not // modify registers or memory; it only affects CPU power state. // Audit: UNSAFE-2026-0007. + // + // FORWARD HAZARD (C7-003): `nomem` is correct for v1 — `WFI` itself + // touches no memory, and v1's `irq_entry` is ack-and-ignore, so the + // idle loop observes nothing across the wake that `nomem` could + // reorder. When a scheduler-wake hook lands (an `irq_entry` that + // writes a flag idle's post-WFI code reads via a non-volatile + // path), `nomem` would permit that read to be hoisted *before* the + // `WFI`. At that point either drop `nomem` here or make the wake + // flag `Atomic`/volatile. Routed to the security pass. unsafe { asm!("wfi", options(nostack, nomem)); } diff --git a/bsp-qemu-virt/src/exceptions.rs b/bsp-qemu-virt/src/exceptions.rs index 3367536..6105380 100644 --- a/bsp-qemu-virt/src/exceptions.rs +++ b/bsp-qemu-virt/src/exceptions.rs @@ -18,9 +18,10 @@ //! trampolines; the Rust functions here run with the GIC's CPU //! interface in "active" state for the duration of `irq_entry`. //! -//! [`docs/architecture/exceptions.md`]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/exceptions.md +//! [`docs/architecture/exceptions.md`]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/exceptions.md use core::arch::asm; +use core::fmt; use core::sync::atomic::{compiler_fence, Ordering}; use tyrne_hal::IrqController; @@ -40,8 +41,10 @@ const TIMER_IRQ_ID: u32 = 27; /// `#[repr(C)]` is mandatory — the field order and offsets must match /// the asm `stp` sequence in `src/vectors.s` byte-for-byte. The frame /// is 192 bytes total; SP alignment is preserved. +/// +/// `Debug` is **hand-written** (not derived) so it can redact the +/// deliberately-uninitialised `_reserved` slot — see that field's note. #[repr(C)] -#[derive(Debug)] pub struct TrapFrame { /// `x0` and `x1` saved at frame offset 0x00. pub x0_x1: [u64; 2], @@ -66,9 +69,41 @@ pub struct TrapFrame { /// `ELR_EL1` (return address) and `SPSR_EL1` (saved PSTATE) at offset 0xA0. pub elr_spsr: [u64; 2], /// Padding — keeps the frame at 192 bytes total (16-byte SP-aligned). + /// + /// **Deliberately uninitialised.** The `vectors.s` trampoline does + /// `sub sp, sp, #192` and writes only offsets `0x00..0xB0`; this slot + /// (`[sp, #0xB0]`) is left holding whatever was previously on the + /// stack. `irq_entry` never reads it, so this is sound. Because the + /// slot holds stale kernel-stack bytes, the hand-written `Debug` impl + /// below redacts it (C7-010) so that printing a `TrapFrame` can never + /// leak those 16 bytes into a log or panic message; zero it in the + /// trampoline as well if the raw value is ever genuinely needed. pub _reserved: [u64; 2], } +impl fmt::Debug for TrapFrame { + /// Formats every saved-register field but **redacts `_reserved`**: + /// that slot is deliberately uninitialised (see its field note), so + /// emitting it would leak up to 16 bytes of stale kernel-stack + /// contents (C7-010). Redaction closes that path with no per-IRQ cost. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TrapFrame") + .field("x0_x1", &self.x0_x1) + .field("x2_x3", &self.x2_x3) + .field("x4_x5", &self.x4_x5) + .field("x6_x7", &self.x6_x7) + .field("x8_x9", &self.x8_x9) + .field("x10_x11", &self.x10_x11) + .field("x12_x13", &self.x12_x13) + .field("x14_x15", &self.x14_x15) + .field("x16_x17", &self.x16_x17) + .field("x18_lr", &self.x18_lr) + .field("elr_spsr", &self.elr_spsr) + .field("_reserved", &"") + .finish() + } +} + // The trampoline in `vectors.s` reserves exactly 192 bytes of stack // for the frame and writes through fixed offsets that mirror the // field order above. A drift between the asm and the Rust `repr(C)` diff --git a/bsp-qemu-virt/src/gic.rs b/bsp-qemu-virt/src/gic.rs index ca1c680..dc997fb 100644 --- a/bsp-qemu-virt/src/gic.rs +++ b/bsp-qemu-virt/src/gic.rs @@ -19,8 +19,8 @@ //! full design rationale; `UNSAFE-2026-0019` for the audit-log entry //! covering this module's MMIO surface. //! -//! [ADR-0011]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0011-irq-controller-trait.md -//! [`docs/architecture/exceptions.md`]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/exceptions.md +//! [ADR-0011]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0011-irq-controller-trait.md +//! [`docs/architecture/exceptions.md`]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/exceptions.md use core::ptr::{read_volatile, write_volatile}; diff --git a/bsp-qemu-virt/src/main.rs b/bsp-qemu-virt/src/main.rs index 6ced6f4..dfdcc91 100644 --- a/bsp-qemu-virt/src/main.rs +++ b/bsp-qemu-virt/src/main.rs @@ -15,9 +15,9 @@ //! The boot flow is documented in [`docs/architecture/boot.md`][boot-doc] //! and the memory-layout decisions in [ADR-0012][adr-0012]. //! -//! [adr-0004]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0004-target-platforms.md -//! [adr-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md -//! [boot-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/boot.md +//! [adr-0004]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0004-target-platforms.md +//! [adr-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md +//! [boot-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/boot.md #![no_std] #![no_main] @@ -69,7 +69,7 @@ use gic::{QemuVirtGic, QEMU_VIRT_GIC_CPU_INTERFACE_BASE, QEMU_VIRT_GIC_DISTRIBUT // reservations cover everything that must never be handed to a // runtime caller. // -// [ADR-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md +// [ADR-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md /// PMM-managed extent base PA. Matches `linker.ld` `MEMORY` `RAM` /// `ORIGIN` for QEMU virt. @@ -107,7 +107,7 @@ type BspPmm = Pmm; /// peripheral addresses. QEMU `virt` has exposed this address across /// all versions the project targets. /// -/// [adr-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md +/// [adr-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0012-boot-flow-qemu-virt.md const PL011_UART_BASE: usize = 0x0900_0000; // ─── StaticCell ─────────────────────────────────────────────────────────────── @@ -161,7 +161,7 @@ impl StaticCell { /// and must not use the pointer to create a `&mut T` that outlives a /// cooperative context switch (ADR-0021). Audit: UNSAFE-2026-0013. /// - /// [ADR-0021]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md + /// [ADR-0021]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md #[inline] #[allow( clippy::mut_from_ref, @@ -310,7 +310,7 @@ static BOOTSTRAP_AS_TABLE: StaticCell = StaticCell::new(); /// (kernel mappings in userspace AS, EL0-ready context, syscall /// entry). /// -/// [adr-0029]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md +/// [adr-0029]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md static USERSPACE_IMAGE: &[u8] = &[0x40, 0x05, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6]; /// Base VA the loader places the image at — userspace VA range per @@ -323,7 +323,7 @@ static USERSPACE_IMAGE: &[u8] = &[0x40, 0x05, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6 /// reading the smoke trace. Hard-coded for the placeholder blob; B6's /// `userland` linker script picks the real VA. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md const USERSPACE_IMAGE_BASE_VA: usize = 0x0080_0000; /// Stack region size in `PAGE_SIZE`-multiples. Minimum 1; v1's @@ -391,7 +391,7 @@ static EP_CAP_B: StaticCell = StaticCell::new(); /// not on `kernel_entry`'s stack) avoids a second BSP static-cell churn /// when task destruction / status-query APIs arrive in later Phase B work. /// -/// [ADR-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +/// [ADR-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md static TASK_ARENA: StaticCell = StaticCell::new(); // ─── Idle task ──────────────────────────────────────────────────────────────── @@ -423,8 +423,8 @@ static TASK_ARENA: StaticCell = StaticCell::new(); /// the BSP did not register idle at all (i.e. `register_idle` was not /// called before `start`). /// -/// [ADR-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md -/// [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md +/// [ADR-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md +/// [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md fn idle_entry() -> ! { // SAFETY: CPU is fully initialised in `kernel_entry` before `start()`; // single-core cooperative scheduling prevents concurrent access. @@ -907,17 +907,22 @@ pub extern "C" fn kernel_entry() -> ! { // `kernel/src/mm/address_space.rs::tests`. let bootstrap_root_pa = l0_root.as_usize(); // SAFETY: - // - AS_ARENA was just written above; momentary &mut for the - // create_address_space call drops at scope end. Audit: - // UNSAFE-2026-0010 (StaticCell pattern) + UNSAFE-2026-0014 - // (momentary &mut to the just-initialised arena). + // - AS_ARENA was just written above; the momentary &mut to the + // just-initialised arena (the `assume_init_mut()` line) drops at + // scope end. Audit: UNSAFE-2026-0010 (StaticCell pattern) + + // UNSAFE-2026-0014 (momentary &mut to the just-initialised arena). + // These two entries cover ONLY the StaticCell/arena publish + // mechanics, not the `from_existing_root` wrap below. // - `QemuVirtAddressSpace::from_existing_root(l0_root)` requires // `l0_root` to be a currently-live VMSAv8 L0 translation table // (see its `# Safety` doc). `mmu_bootstrap` populated this exact // frame and wrote its PA into `TTBR0_EL1` before this block runs // (we are post-`mmu_bootstrap` at this point); the kernel-half // mappings are installed; the descriptors are correctly encoded - // per the host-tested `tyrne_hal::mmu::vmsav8` encoders. + // per the host-tested `tyrne_hal::mmu::vmsav8` encoders. The wrap + // does NOT zero-fill the live root (which would unmap the running + // kernel) — that is why it cannot route through the zero-fill + // `create_address_space`. Audit: UNSAFE-2026-0028. let bootstrap_as_handle = unsafe { let arena = (*AS_ARENA.0.get()).assume_init_mut(); let inner = mmu::QemuVirtAddressSpace::from_existing_root(l0_root); @@ -1231,7 +1236,7 @@ pub extern "C" fn kernel_entry() -> ! { // UNSAFE-2026-0009 (init_context site) + UNSAFE-2026-0014 // (register_idle's momentary `&mut Scheduler` discipline). // - // [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md + // [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md unsafe { sched .add_task( diff --git a/bsp-qemu-virt/src/mmu.rs b/bsp-qemu-virt/src/mmu.rs index fdfa7a0..8011a05 100644 --- a/bsp-qemu-virt/src/mmu.rs +++ b/bsp-qemu-virt/src/mmu.rs @@ -14,10 +14,13 @@ //! (TLB asm + barriers). //! - UNSAFE-2026-0025 — per-call `Mmu::map` / `Mmu::unmap` page-table entry //! writes; lands with the body of those methods (Stage 4). +//! - UNSAFE-2026-0028 — `QemuVirtAddressSpace::from_existing_root` wraps the +//! already-live, populated bootstrap L0 root without zero-fill (distinct +//! from `create_address_space`'s zero-filled-root contract). //! -//! [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -//! [UNSAFE-2026-0023]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md -//! [UNSAFE-2026-0024]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +//! [UNSAFE-2026-0023]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [UNSAFE-2026-0024]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md // `QemuVirtMmu` and its `Mmu` impl are the post-bootstrap address- // space-management surface (per ADR-0027 §Decision outcome (c)). The @@ -120,7 +123,9 @@ impl QemuVirtAddressSpace { /// `TTBR0_EL1`. The bootstrap path is the only well-known /// already-live root in v1. /// - /// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md + /// Audit: UNSAFE-2026-0028. + /// + /// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md #[must_use] pub unsafe fn from_existing_root(root: PhysFrame) -> Self { Self { root } @@ -148,9 +153,26 @@ impl QemuVirtMmu { impl Mmu for QemuVirtMmu { type AddressSpace = QemuVirtAddressSpace; + /// # Safety + /// + /// Inherits the [`Mmu::create_address_space`] trait-declaration + /// contract: `root` must be a [`PAGE_SIZE`]-sized physical frame that + /// is exclusively owned by the caller for the lifetime of the + /// resulting address space, and zero-initialised. (For the *already- + /// live* bootstrap root, use [`QemuVirtAddressSpace::from_existing_root`] + /// instead — its contract is the inverse: a populated, non-zero root.) unsafe fn create_address_space(&self, root: PhysFrame) -> QemuVirtAddressSpace { - // No allocation; the safety contract of the trait method covers - // exclusive ownership + zero-initialisation of `root`. + // SAFETY: this body performs NO unsafe operation — it stores the + // `PhysFrame` value (an aligned address) without dereferencing it. + // The unsafety is entirely the trait-level caller contract above + // (root exclusively-owned + zero-initialised); the resulting AS's + // soundness when later walked by `Mmu::map`/`unmap` rides those + // preconditions and the UNSAFE-2026-0025 walker invariants. No + // separate audit-log entry: per unsafe-policy §4, a trait-impl + // `unsafe fn` whose body is alloc-free inherits the trait + // declaration's contract; the zero-fill responsibility is the + // caller's (kernel `cap_create_address_space`, covered by + // UNSAFE-2026-0026's PMM zero-fill). QemuVirtAddressSpace { root } } diff --git a/bsp-qemu-virt/src/mmu_bootstrap.rs b/bsp-qemu-virt/src/mmu_bootstrap.rs index a37989c..9fa59b0 100644 --- a/bsp-qemu-virt/src/mmu_bootstrap.rs +++ b/bsp-qemu-virt/src/mmu_bootstrap.rs @@ -24,10 +24,10 @@ //! (`TLBI VMALLE1` / `IC IALLU` / `DSB ISH` / `ISB`). Same scope- //! extension Amendment pattern. //! -//! [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -//! [UNSAFE-2026-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md -//! [UNSAFE-2026-0023]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md -//! [UNSAFE-2026-0024]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +//! [UNSAFE-2026-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [UNSAFE-2026-0023]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [UNSAFE-2026-0024]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md use core::arch::asm; @@ -83,8 +83,8 @@ const BLOCK_2MIB: u64 = 2 * 1024 * 1024; /// instruction-fetch after `SCTLR.M = 1` faults (per ADR-0027 /// §Simulation §Step 3). /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [linker]: https://github.com/cemililik/Tyrne/blob/main/bsp-qemu-virt/linker.ld +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [linker]: https://github.com/HodeTech/Tyrne/blob/main/bsp-qemu-virt/linker.ld pub unsafe fn mmu_bootstrap() { // `addr_of!` of an `extern "C" static` is itself safe — it does // not dereference the symbol; it just yields the linker-resolved diff --git a/docs/README.md b/docs/README.md index 6f94857..2358a4d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,13 +10,16 @@ This is the canonical documentation tree for Tyrne. It is organized by the *kind | [decisions/](decisions/) | **Why is Tyrne built this way?** Architecture Decision Records (ADRs) in MADR format. One ADR per non-trivial choice. | | [guides/](guides/) | **How do I do X?** Task-oriented walkthroughs: setting up the toolchain, running the kernel under QEMU, porting to a new board, writing a new userspace driver. | | [standards/](standards/) | **How should things be written?** Documentation style, code style, commit message style, review checklists, security-review checklist. | +| [analysis/](analysis/) | **What is being built, and how is it going?** Per-task user stories and the business / code / security / performance reviews. | +| [roadmap/](roadmap/) | **What is active, and what is next?** Phase plans and the currently-active milestone. | +| [audits/](audits/) | **What `unsafe` is in the tree?** The append-only `unsafe` audit log — one entry per audited `unsafe` block. | | [glossary.md](glossary.md) | Project-specific terminology. | ## Suggested reading order for newcomers 1. [glossary.md](glossary.md) — terms used throughout the project. 2. [decisions/](decisions/) — the numbered ADRs, in order. These capture the reasoning behind the design and are the fastest way to get oriented. -3. [architecture/](architecture/) — start with the overview (Phase 2), then dive into whichever subsystem interests you. +3. [architecture/](architecture/) — start with [`overview.md`](architecture/overview.md), then dive into whichever subsystem interests you. 4. [standards/documentation-style.md](standards/documentation-style.md) — before you send a documentation PR. ## Conventions in this tree diff --git a/docs/analysis/reviews/business-reviews/2026-04-28-B1-closure.md b/docs/analysis/reviews/business-reviews/2026-04-28-B1-closure.md index 9f25729..56575f8 100644 --- a/docs/analysis/reviews/business-reviews/2026-04-28-B1-closure.md +++ b/docs/analysis/reviews/business-reviews/2026-04-28-B1-closure.md @@ -136,7 +136,7 @@ The B0 closure security review §8 recommendation ("architecture docs are a secu ### File renames and stale-link drift — third confirmation -The T-009 mini-retro flagged file-rename / cross-reference drift as a pattern. The B0 retro confirmed it as a pattern (T-008's review needed a second round to catch four broken cross-references). T-012 produced **zero broken cross-references in its own changes** during PR #10's two rounds. Why? The post-fix sweep DoD rule (codified during B0) plus design-first (the `exceptions.md` doc was authoritative from commit-1, so subsequent commits couldn't drift). **Pattern confirmed: design-first + post-fix sweep DoD + review-fix-round cadence = stale-link drift suppressed for new T-012 changes.** Pre-existing repo-wide drift — the 2026-04-22 Umbrix → Tyrne rename had moved most filename references to the new shape but left two artefact files (`docs/analysis/reviews/{code,security}-reviews/2026-04-21-umbrix-to-phase-a.md`) at the old name, surfaced by the B1-closure approval-review's N2 finding — was a separate concern outside PR #10's scope; the [`docs(refs):` commit `10e3351`](https://github.com/cemililik/Tyrne/commit/10e3351) (one of the four commits in PR #11, the same PR that lands this retro) renames both files to the new shape and fixes the three remaining link-form references that still point at the old name (the other ~20 references already used the new name and resolve correctly post-rename). The R6 CI follow-up (automated link check) is still desirable, both as a regression gate for new changes and as a backstop against future rename drift. +The T-009 mini-retro flagged file-rename / cross-reference drift as a pattern. The B0 retro confirmed it as a pattern (T-008's review needed a second round to catch four broken cross-references). T-012 produced **zero broken cross-references in its own changes** during PR #10's two rounds. Why? The post-fix sweep DoD rule (codified during B0) plus design-first (the `exceptions.md` doc was authoritative from commit-1, so subsequent commits couldn't drift). **Pattern confirmed: design-first + post-fix sweep DoD + review-fix-round cadence = stale-link drift suppressed for new T-012 changes.** Pre-existing repo-wide drift — the 2026-04-22 Umbrix → Tyrne rename had moved most filename references to the new shape but left two artefact files (`docs/analysis/reviews/{code,security}-reviews/2026-04-21-umbrix-to-phase-a.md`) at the old name, surfaced by the B1-closure approval-review's N2 finding — was a separate concern outside PR #10's scope; the [`docs(refs):` commit `10e3351`](https://github.com/HodeTech/Tyrne/commit/10e3351) (one of the four commits in PR #11, the same PR that lands this retro) renames both files to the new shape and fixes the three remaining link-form references that still point at the old name (the other ~20 references already used the new name and resolve correctly post-rename). The R6 CI follow-up (automated link check) is still desirable, both as a regression gate for new changes and as a backstop against future rename drift. ## Adjustments diff --git a/docs/analysis/reviews/business-reviews/2026-05-07-B1-closure.md b/docs/analysis/reviews/business-reviews/2026-05-07-B1-closure.md index 8166f85..4b45925 100644 --- a/docs/analysis/reviews/business-reviews/2026-05-07-B1-closure.md +++ b/docs/analysis/reviews/business-reviews/2026-05-07-B1-closure.md @@ -5,7 +5,7 @@ - **Period:** 2026-04-28 (PR #10 merge — original B1 implementation-complete) → 2026-05-07 (today; PR #15 merged; T-014 verified; B1 truly closed). - **Participants:** @cemililik (+ Claude Opus 4.7 agent as scribe; ten parallel multi-agent comprehensive code-review run on 2026-05-06; bot-driven review-rounds on PRs #12 / #13 / #14 / #15 from coderabbitai-bot, gemini-code-assist-bot, sourcery-ai-bot). -> **Canonical source for B1 closure metrics.** This artefact + the [security review](../security-reviews/2026-05-07-B1-closure.md) + the [performance baseline](../performance-optimization-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing numbers (test counts, ELF section sizes, smoke trace, audit-log Amendments). Every other location that mentions B1 metrics ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`T-014`](../../tasks/phase-b/T-014-idle-dispatch-fallback.md) review-history) is a *summary at its layer of abstraction* and should be read alongside this trio rather than as an independent record. When the metrics drift, the trio is what gets corrected first; downstream summaries follow. Per [sourcery-ai PR #16 review feedback](https://github.com/cemililik/Tyrne/pull/16). +> **Canonical source for B1 closure metrics.** This artefact + the [security review](../security-reviews/2026-05-07-B1-closure.md) + the [performance baseline](../performance-optimization-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing numbers (test counts, ELF section sizes, smoke trace, audit-log Amendments). Every other location that mentions B1 metrics ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`T-014`](../../tasks/phase-b/T-014-idle-dispatch-fallback.md) review-history) is a *summary at its layer of abstraction* and should be read alongside this trio rather than as an independent record. When the metrics drift, the trio is what gets corrected first; downstream summaries follow. Per [sourcery-ai PR #16 review feedback](https://github.com/HodeTech/Tyrne/pull/16). --- diff --git a/docs/analysis/reviews/business-reviews/2026-05-14-B3-closure.md b/docs/analysis/reviews/business-reviews/2026-05-14-B3-closure.md index e4b3c81..359f4b7 100644 --- a/docs/analysis/reviews/business-reviews/2026-05-14-B3-closure.md +++ b/docs/analysis/reviews/business-reviews/2026-05-14-B3-closure.md @@ -1,6 +1,6 @@ # Business review 2026-05-14 — B3 closure retrospective (post-T-017 + T-018) -- **Trigger:** milestone-completion. Phase B / Milestone B3 ("Address space abstraction") closed via T-018's [PR #28](https://github.com/cemililik/Tyrne/pull/28) merge into `main` on 2026-05-14 (merge commit [`47b0a86`](https://github.com/cemililik/Tyrne/commit/47b0a86)). The B3 implementation arc spans two tasks: T-017 (Physical Memory Manager) landed first on 2026-05-10 via PR #26; T-018 (`AddressSpace` kernel object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch) followed on 2026-05-11 internally and merged to `main` on 2026-05-14 after five review rounds. +- **Trigger:** milestone-completion. Phase B / Milestone B3 ("Address space abstraction") closed via T-018's [PR #28](https://github.com/HodeTech/Tyrne/pull/28) merge into `main` on 2026-05-14 (merge commit [`47b0a86`](https://github.com/HodeTech/Tyrne/commit/47b0a86)). The B3 implementation arc spans two tasks: T-017 (Physical Memory Manager) landed first on 2026-05-10 via PR #26; T-018 (`AddressSpace` kernel object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch) followed on 2026-05-11 internally and merged to `main` on 2026-05-14 after five review rounds. - **Scope:** the B3 PMM bring-up arc (PR #25 ADR-0035 design + PR #26 T-017 implementation) plus the AddressSpace arc (PR #27 ADR-0028 design + PR #28 T-018 implementation). Two cross-cutting additions that landed during T-018's review-round arc are absorbed here: [`MmuError::BlockMapped`][mmu-error] variant + the [`CapabilityTable::depth_of`][cap-depth] `pub(crate)` preflight helper. The `.claude/skills/` → `.agents/skills/` migration that also closed during the same arc is recorded as a §Plan-diff item rather than a B3 implementation item. - **Period:** 2026-05-09 (B2 closed; PR #24 closure-trio merged; ADR-0035 drafting reactivated) → 2026-05-14 (today; PR #28 merged; B3 truly closed). - **Participants:** @cemililik (+ Claude Opus 4.7 agent as scribe for the implementation arc; Claude Sonnet 4.6 agent as scribe for the T-018 review-round arc; bot-driven review-rounds on PRs #25 / #26 / #27 / #28 from coderabbitai-bot, gemini-code-assist-bot, sourcery-ai-bot, qodo-code-review-bot). @@ -139,14 +139,14 @@ PR #28 went through **five review rounds** before merge, an unusual depth that d | Round | Trigger | Findings | Applied | Skipped (reason) | |---|---|---|---|---| -| **1** ([`fe2d9bf`](https://github.com/cemililik/Tyrne/commit/fe2d9bf)) | First external pass on PR #28's bundled commits | 2 | 2 | 0 | -| **2** ([`2cc1678`](https://github.com/cemililik/Tyrne/commit/2cc1678)) | Re-read after R1 fixes | 1 | 1 | 1 (block-mapped unmap path — deferred to forward variant addition) | -| **Self-review** ([`e74ad60`](https://github.com/cemililik/Tyrne/commit/e74ad60)) | Maintainer-initiated agent self-review | 3 | 3 | 0 | -| **3** ([`2ed16fc`](https://github.com/cemililik/Tyrne/commit/2ed16fc)) | Re-read after self-review | 6 | 6 | 1 (per-op MAP/UNMAP rights — deferred to B5+ ADR alongside MemoryRegion) | -| **Daily auditor** ([`8b9f52e`](https://github.com/cemililik/Tyrne/commit/8b9f52e)) | A new auditor agent re-read R3's deferred + open items | 4 | 4 | 0 (the deferred `BlockMapped` from R2 + the depth-preflight refinement + the Deadlock-doc widening + the unsafe-log activation-hook enumeration completeness) | -| **Skills migration** ([`0d4e62c`](https://github.com/cemililik/Tyrne/commit/0d4e62c) + [`77d3e7e`](https://github.com/cemililik/Tyrne/commit/77d3e7e)) | Cross-cutting `.claude/skills/` → `.agents/skills/` consolidation surfaced by R4's link-rot finding | 1 (migration) | 1 | 0 | -| **R4 / CodeRabbit on R3's HEAD** ([`3b86a71`](https://github.com/cemililik/Tyrne/commit/3b86a71)) | Bot review of the skills migration + audit-log + ADR-0028 row-3 corrections | 6 | 6 | 1 (Nit — duplicated subsystems list; informative for first-time readers) | -| **R5 / CodeRabbit on R4's HEAD** ([`097acfd`](https://github.com/cemililik/Tyrne/commit/097acfd)) | Bot follow-up on `t-018.md` doc drift | 2 | 2 | 0 | +| **1** ([`fe2d9bf`](https://github.com/HodeTech/Tyrne/commit/fe2d9bf)) | First external pass on PR #28's bundled commits | 2 | 2 | 0 | +| **2** ([`2cc1678`](https://github.com/HodeTech/Tyrne/commit/2cc1678)) | Re-read after R1 fixes | 1 | 1 | 1 (block-mapped unmap path — deferred to forward variant addition) | +| **Self-review** ([`e74ad60`](https://github.com/HodeTech/Tyrne/commit/e74ad60)) | Maintainer-initiated agent self-review | 3 | 3 | 0 | +| **3** ([`2ed16fc`](https://github.com/HodeTech/Tyrne/commit/2ed16fc)) | Re-read after self-review | 6 | 6 | 1 (per-op MAP/UNMAP rights — deferred to B5+ ADR alongside MemoryRegion) | +| **Daily auditor** ([`8b9f52e`](https://github.com/HodeTech/Tyrne/commit/8b9f52e)) | A new auditor agent re-read R3's deferred + open items | 4 | 4 | 0 (the deferred `BlockMapped` from R2 + the depth-preflight refinement + the Deadlock-doc widening + the unsafe-log activation-hook enumeration completeness) | +| **Skills migration** ([`0d4e62c`](https://github.com/HodeTech/Tyrne/commit/0d4e62c) + [`77d3e7e`](https://github.com/HodeTech/Tyrne/commit/77d3e7e)) | Cross-cutting `.claude/skills/` → `.agents/skills/` consolidation surfaced by R4's link-rot finding | 1 (migration) | 1 | 0 | +| **R4 / CodeRabbit on R3's HEAD** ([`3b86a71`](https://github.com/HodeTech/Tyrne/commit/3b86a71)) | Bot review of the skills migration + audit-log + ADR-0028 row-3 corrections | 6 | 6 | 1 (Nit — duplicated subsystems list; informative for first-time readers) | +| **R5 / CodeRabbit on R4's HEAD** ([`097acfd`](https://github.com/HodeTech/Tyrne/commit/097acfd)) | Bot follow-up on `t-018.md` doc drift | 2 | 2 | 0 | The pattern that emerged: agent-driven re-reads compound in value when each round names a different *axis* (correctness, security, doc-discipline, link-integrity). The first round catches obvious bugs; later rounds catch second-order effects of the first round's fixes (round 2's deferred `BlockMapped` made round 4's audit-log clarification load-bearing; round 4's `.agents/skills/` consolidation made round 5's `t-018.md` link drift visible). The cost is real (six review-round commits) but each commit is small (median diff ≈ 50–150 lines) and the result is a final merge with no known open issues at the bot-reviewer layer. diff --git a/docs/analysis/reviews/business-reviews/README.md b/docs/analysis/reviews/business-reviews/README.md index e37322e..3e022eb 100644 --- a/docs/analysis/reviews/business-reviews/README.md +++ b/docs/analysis/reviews/business-reviews/README.md @@ -25,11 +25,11 @@ A business review may point at outcomes from those other reviews as part of "wha | Date | Scope | File | |------|-------|------| | 2026-04-21 | Milestone A2 — Capability table foundation | [2026-04-21-A2-completion.md](2026-04-21-A2-completion.md) | -| 2026-04-21 | A6 completion / Phase A retrospective (A3–A6) | [2026-04-21-A6-completion.md](2026-04-21-A6-completion.md) | +| 2026-04-21 | A6 completion / Phase A retrospective (A3–A6) — predates the five-section template; its headings differ from `master-plan.md` | [2026-04-21-A6-completion.md](2026-04-21-A6-completion.md) | | 2026-04-22 | T-006 mini-retro — raw-pointer scheduler API refactor | [2026-04-22-T-006-mini-retro.md](2026-04-22-T-006-mini-retro.md) | | 2026-04-27 | T-009 mini-retro — Timer impl + lessons-into-rules in ADR-0013 | [2026-04-27-T-009-mini-retro.md](2026-04-27-T-009-mini-retro.md) | | 2026-04-27 | B0 closure retrospective — Phase A exit hygiene + ADR-0025 + cost-of-arc analysis | [2026-04-27-B0-closure.md](2026-04-27-B0-closure.md) | -| 2026-04-28 | B1 closure retrospective — drop to EL1 + exception infrastructure (T-013 + T-012) | [2026-04-28-B1-closure.md](2026-04-28-B1-closure.md) | +| 2026-04-28 | B1 closure retrospective — drop to EL1 + exception infrastructure (T-013 + T-012); **superseded by [2026-05-07-B1-closure.md](2026-05-07-B1-closure.md)** — filed pre-smoke, before the B1 smoke regression re-opened the milestone (see the 2026-05-06 smoke-regression mini-retro) | [2026-04-28-B1-closure.md](2026-04-28-B1-closure.md) | | 2026-05-06 | B1 smoke-regression mini-retro — first end-to-end smoke surfaced an idle-dispatch hang inherited from T-007 / ADR-0022 Option A; B1 reopened pending T-014 fix | [2026-05-06-B1-smoke-regression.md](2026-05-06-B1-smoke-regression.md) | | 2026-05-07 | B1 closure retrospective (post-T-014) — fresh closure trio replacing the 2026-04-28 trio's load-bearing role; T-014 + ADR-0026 fixed the smoke regression; α/β/γ closed comprehensive-review Track-E/J/A/B/F/G/I non-blockers | [2026-05-07-B1-closure.md](2026-05-07-B1-closure.md) | | 2026-05-09 | B2 closure retrospective — MMU activation + kernel-half mapping (T-016); ADR-0027 + `MapperFlush` flush-token discipline; closed cleanly on first attempt (no smoke-regression arc) | [2026-05-09-B2-closure.md](2026-05-09-B2-closure.md) | diff --git a/docs/analysis/reviews/business-reviews/master-plan.md b/docs/analysis/reviews/business-reviews/master-plan.md index 8c018c0..a76c7d2 100644 --- a/docs/analysis/reviews/business-reviews/master-plan.md +++ b/docs/analysis/reviews/business-reviews/master-plan.md @@ -56,7 +56,7 @@ A single reviewer can cover all roles sequentially; the benefit of naming them i **Task:** translate the learnings into concrete adjustments. -- For each learning that implies action, propose: a new task (to be opened via [`start-task`](../../../../.claude/skills/start-task/SKILL.md)), an ADR to write (via [`write-adr`](../../../../.claude/skills/write-adr/SKILL.md)), or a standard/guide update. +- For each learning that implies action, propose: a new task (to be opened via [`start-task`](../../../../.agents/skills/start-task/SKILL.md)), an ADR to write (via [`write-adr`](../../../../.agents/skills/write-adr/SKILL.md)), or a standard/guide update. - For each adjustment, name the trigger (the next thing to do to act on it). - Do not execute the adjustments here — just record them. Execution happens after the review is committed. diff --git a/docs/analysis/reviews/code-reviews/2026-05-06-full-tree/track-j-hygiene.md b/docs/analysis/reviews/code-reviews/2026-05-06-full-tree/track-j-hygiene.md index 2d415d0..5e7d6ca 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-06-full-tree/track-j-hygiene.md +++ b/docs/analysis/reviews/code-reviews/2026-05-06-full-tree/track-j-hygiene.md @@ -21,7 +21,7 @@ Files with `umbrix` (case-insensitive, excluding `.git/`, `target/`, `docs/analy - [`docs/analysis/reviews/code-reviews/2026-05-06-full-tree-comprehensive-review-plan.md`](../2026-05-06-full-tree-comprehensive-review-plan.md) — four mentions, all describing this Track J's own scope (and the glossary track's "no orphaned entries from the umbrix→tyrne rename" check). Self-referential / scoping prose; not residue. - One filename match — `docs/analysis/technical-analysis/WOSR/12-comparison-with-umbrix.md` — sits inside the explicitly out-of-scope `technical-analysis/` subtree (per the review plan's §2 carve-out). -**Verdict on the residue scan: clean.** Commit [`10e3351`](https://github.com/cemililik/Tyrne/commit/10e3351) closed the file-name half of the rename. The four lines that still mention "Umbrix" are all narrative, not link or identifier residue. +**Verdict on the residue scan: clean.** Commit [`10e3351`](https://github.com/HodeTech/Tyrne/commit/10e3351) closed the file-name half of the rename. The four lines that still mention "Umbrix" are all narrative, not link or identifier residue. ## Repository-URL drift (scope cousin to umbrix residue) @@ -30,7 +30,7 @@ While verifying the umbrix scan I noticed a **second, distinct** rename inconsis | Form | Count (excluding `target/`, `.git/`, `technical-analysis/`) | Where | |---|---|---| | `https://github.com/cemililik/TyrneOS` | 66 (across 27 files) | `Cargo.toml`, `SECURITY.md`, every `tyrne-hal` / `tyrne-kernel` / `tyrne-bsp-qemu-virt` rustdoc cross-reference, `docs/guides/run-under-qemu.md`'s clone command | -| `https://github.com/cemililik/Tyrne` | 4 | recent docs only — `T-012-exception-and-irq-infrastructure.md`, `2026-04-28-B1-closure.md` (business + security), `current.md` | +| `https://github.com/HodeTech/Tyrne` | 4 | recent docs only — `T-012-exception-and-irq-infrastructure.md`, `2026-04-28-B1-closure.md` (business + security), `current.md` | | `git@github.com:cemililik/UmbrixOS.git` (origin) | 1 | local `git remote -v` (not in working tree) | The 64 `TyrneOS` URLs all 404 against the actual remote (whatever the canonical name is — `git remote -v` and the four newest references disagree). Track A's cross-track note (`track-a-kernel.md` line 85) already observed "every link points at `cemililik/TyrneOS`" but flagged it as a clean-state observation rather than a finding; in fact none of those URLs resolve. Cross-track note coordinates with Track G (root-doc link integrity) and Track H (rustdoc cross-references) on whether to fix in this review window or hand off as a follow-up commit. See *Findings → Non-blocking → J-NB1* below. @@ -135,7 +135,7 @@ Severity: **Non-blocking** — the term is a single word, semantically clear fro - `docs/standards/security-review.md` and `docs/standards/code-review.md` use the English vocabulary (`Critical` / `High` / `Medium` / `Low`). - A reader using the project as a reference for *their own* security-review template will see two competing severity vocabularies in the same review tree. -Recommended action: pick one English replacement (`High` is the natural carry-over) and apply it across the seven affected non-quoted references. The two commit-message-quoted instances ([`db3a4c7`](https://github.com/cemililik/Tyrne/commit/db3a4c7) and the [T-009 mini-retro line 29 quote](../../business-reviews/2026-04-27-T-009-mini-retro.md)) cannot be retroactively edited and should be left as-is. A small follow-up `docs(localization):` commit is the most compact framing. +Recommended action: pick one English replacement (`High` is the natural carry-over) and apply it across the seven affected non-quoted references. The two commit-message-quoted instances ([`db3a4c7`](https://github.com/HodeTech/Tyrne/commit/db3a4c7) and the [T-009 mini-retro line 29 quote](../../business-reviews/2026-04-27-T-009-mini-retro.md)) cannot be retroactively edited and should be left as-is. A small follow-up `docs(localization):` commit is the most compact framing. ### Observation diff --git a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review.md b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review.md index 290243f..0898c3c 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review.md +++ b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review.md @@ -1,12 +1,12 @@ # Code review 2026-05-07 — PR #12 to PR #17 multi-axis post-merge sweep -- **Change:** PRs [#12](https://github.com/cemililik/Tyrne/pull/12), [#13](https://github.com/cemililik/Tyrne/pull/13), [#14](https://github.com/cemililik/Tyrne/pull/14), [#15](https://github.com/cemililik/Tyrne/pull/15), [#16](https://github.com/cemililik/Tyrne/pull/16), [#17](https://github.com/cemililik/Tyrne/pull/17) (the 14-day window 2026-04-23 → 2026-05-07 closing on `main`). +- **Change:** PRs [#12](https://github.com/HodeTech/Tyrne/pull/12), [#13](https://github.com/HodeTech/Tyrne/pull/13), [#14](https://github.com/HodeTech/Tyrne/pull/14), [#15](https://github.com/HodeTech/Tyrne/pull/15), [#16](https://github.com/HodeTech/Tyrne/pull/16), [#17](https://github.com/HodeTech/Tyrne/pull/17) (the 14-day window 2026-04-23 → 2026-05-07 closing on `main`). - **Merge SHA range:** `298b5d2a..8dc433ee` on `main`; HEAD at review time `c258ee3` (doc-side rollup commit on `t-015-endpoint-rollback-cancel-recv`). - **Reviewer:** @cemililik (+ Claude Opus 4.7 multi-agent fan-out — 8 parallel axis agents). - **Type:** Post-merge multi-axis sweep — *not* a merge gate. Findings flow into a follow-up PR; "Block" reserved for genuine regression/audit-discipline violation. - **Risk class:** Security-sensitive (PR #17 touches IPC + scheduler invariants; cross-references the [2026-05-07 B1 closure security review](../security-reviews/2026-05-07-B1-closure.md), whose single forward-flag this PR closes). -> **2026-05-08 closure status:** all 9 hygiene items from §Follow-up backlog closed in [PR #18](https://github.com/cemililik/Tyrne/pull/18) (merge `aa7e6c5`). Forward-flagged item 11 (P10 wall-clock harness) closed by the [2026-05-08 B2-prep integration PR](https://github.com/cemililik/Tyrne/pull/22) (replaces the originally-opened #19 / #20 / #21). Items 10 / 12 / 13 remain forward-flagged on their downstream venues (ADR-0030 / ADR-0019; first userspace-destroy task; B5+ preemption ADR). See §Follow-up backlog at the bottom of this file for per-item closure annotations. +> **2026-05-08 closure status:** all 9 hygiene items from §Follow-up backlog closed in [PR #18](https://github.com/HodeTech/Tyrne/pull/18) (merge `aa7e6c5`). Forward-flagged item 11 (P10 wall-clock harness) closed by the [2026-05-08 B2-prep integration PR](https://github.com/HodeTech/Tyrne/pull/22) (replaces the originally-opened #19 / #20 / #21). Items 10 / 12 / 13 remain forward-flagged on their downstream venues (ADR-0030 / ADR-0019; first userspace-destroy task; B5+ preemption ADR). See §Follow-up backlog at the bottom of this file for per-item closure annotations. ## Scope @@ -43,12 +43,12 @@ The 0 / 1-forward / 13-Minor distribution is consistent with the prior 2026-05-0 | PR | Headline change | Highest-severity finding from this review | |----|-----------------|-------------------------------------------| -| **#12** [`298b5d2a`](https://github.com/cemililik/Tyrne/pull/12) | T-014 idle-dispatch hotfix; ADR-0026 supersedes ADR-0022's *idle-task-location* axis (Option A → separate `Scheduler::idle: Option` slot, dispatched via `ready.dequeue().or(s.idle)`); UNSAFE-2026-0014 3rd Amendment names `register_idle`. | **Minor (G):** ADR-0026 Propose + Accept landed in a single commit (`10dea48`), in tension with `write-adr` §10 "separate commit" but technically permitted by `supersede-adr` §7 "solo-phase combined commit" — needs a clarifying rider reconciling the two skill clauses. | -| **#13** [`cfc49249`](https://github.com/cemililik/Tyrne/pull/13) | α doc-fix sweep — closes the 7 Track-E blockers from the 2026-05-06 comprehensive review (GIC v3→v2 in 3 sites, idle-body documentation per ADR-0026, security-model open-question, glossary dead-link). | None new; Track E confirmed all 7 blockers closed and Mermaid diagrams stay valid. | -| **#14** [`1cd810d9`](https://github.com/cemililik/Tyrne/pull/14) | Repo-wide `TyrneOS → Tyrne` URL rename (64 URLs) + `Yüksek → High` localization sweep (5 docs); review-round caught `cd TyrneOS` orphan + `TyrneOS repository` typo in SECURITY.md (both fixed in PR). | **Minor (H):** UNSAFE-2026-0016's body was edited in-place by the localization sweep, technically violating the introducing-commit-boundary discipline — fix is a small Amendment or a `unsafe-policy.md §3` exemption for mechanical localization. (Track B also notes the brief mis-attributed the `tools/run-qemu.sh` Bash 3.2 fix to PR #14 — actual introducing commit is `0f0c97c` on PR #12's branch.) | -| **#15** [`e9fa019a`](https://github.com/cemililik/Tyrne/pull/15) | γ code-side polish — Track A/B/F kernel + HAL + test, Track G/I BSP + integration; `register_idle` `debug_assert!` → `assert!`; γ.6 reverted "defensive loop after start()" (clippy::unreachable_code + too_many_lines); 4 line-ref drops + 1 metadata trim in review-round. | **Praise (B):** clean disposition of Track-G #4 by documented-rejection (`-> !` is the type-system belt-and-braces), plus `Aarch64TaskContext == 168` const-assert mirroring `TrapFrame == 192` extends the existing discipline. | -| **#16** [`95b15aa1`](https://github.com/cemililik/Tyrne/pull/16) | B1 closure trio (business + consolidated security + performance baseline 2026-05-07, replacing the 2026-04-28 trio's load-bearing role); δ items: ADR-0023 Deferred placeholder, ADR-0032 Propose, T-015 Draft, `write-adr` skill §Simulation codification, master-plan AC change "no closure-trio without recorded smoke." | **Minor (G):** the §Simulation rule was retro-extracted, not pre-existing — commit `77a578a` codifies it 84 seconds before commit `4aa4b24` proposes ADR-0032 with a Simulation table. Chronology is honest in commit bodies but artefacts read as if rule pre-existed; one-line rider in ADR-0026 §Revision notes naming the codifying commit closes it. Master-plan AC also landed only in `business-reviews/master-plan.md`; security and performance master-plans not cross-referenced. | -| **#17** [`8dc433ee`](https://github.com/cemililik/Tyrne/pull/17) | T-015 implementation — ADR-0032 Accept (separate commit per `write-adr` §10), new `ipc_cancel_recv` primitive, `ipc_recv_and_yield` Phase 2 Deadlock branch upgraded to symmetric scheduler + endpoint rollback; 6 new tests (5 IPC + 1 sched); UNSAFE-2026-0014 4th Amendment names the new Deadlock-branch momentary `&mut EndpointArena` + `&mut IpcQueues` site; ADR-0017 §Revision rider records additive recovery primitive (user-observable surface unchanged). | **Major forward-flag (C):** `RecvWaiting` unit-variant identity gap — kernel records no waiter identity; v1's depth-1 cooperative discipline makes it unobservable, but ADR-0030 (syscall ABI) and ADR-0019 (multi-waiter) must address it. ADR-0032 forecasts the `caller: TaskHandle` signature change correctly at [kernel/src/ipc/mod.rs:447-453](../../../../kernel/src/ipc/mod.rs#L447). **Tracked, no v1 action.** | +| **#12** [`298b5d2a`](https://github.com/HodeTech/Tyrne/pull/12) | T-014 idle-dispatch hotfix; ADR-0026 supersedes ADR-0022's *idle-task-location* axis (Option A → separate `Scheduler::idle: Option` slot, dispatched via `ready.dequeue().or(s.idle)`); UNSAFE-2026-0014 3rd Amendment names `register_idle`. | **Minor (G):** ADR-0026 Propose + Accept landed in a single commit (`10dea48`), in tension with `write-adr` §10 "separate commit" but technically permitted by `supersede-adr` §7 "solo-phase combined commit" — needs a clarifying rider reconciling the two skill clauses. | +| **#13** [`cfc49249`](https://github.com/HodeTech/Tyrne/pull/13) | α doc-fix sweep — closes the 7 Track-E blockers from the 2026-05-06 comprehensive review (GIC v3→v2 in 3 sites, idle-body documentation per ADR-0026, security-model open-question, glossary dead-link). | None new; Track E confirmed all 7 blockers closed and Mermaid diagrams stay valid. | +| **#14** [`1cd810d9`](https://github.com/HodeTech/Tyrne/pull/14) | Repo-wide `TyrneOS → Tyrne` URL rename (64 URLs) + `Yüksek → High` localization sweep (5 docs); review-round caught `cd TyrneOS` orphan + `TyrneOS repository` typo in SECURITY.md (both fixed in PR). | **Minor (H):** UNSAFE-2026-0016's body was edited in-place by the localization sweep, technically violating the introducing-commit-boundary discipline — fix is a small Amendment or a `unsafe-policy.md §3` exemption for mechanical localization. (Track B also notes the brief mis-attributed the `tools/run-qemu.sh` Bash 3.2 fix to PR #14 — actual introducing commit is `0f0c97c` on PR #12's branch.) | +| **#15** [`e9fa019a`](https://github.com/HodeTech/Tyrne/pull/15) | γ code-side polish — Track A/B/F kernel + HAL + test, Track G/I BSP + integration; `register_idle` `debug_assert!` → `assert!`; γ.6 reverted "defensive loop after start()" (clippy::unreachable_code + too_many_lines); 4 line-ref drops + 1 metadata trim in review-round. | **Praise (B):** clean disposition of Track-G #4 by documented-rejection (`-> !` is the type-system belt-and-braces), plus `Aarch64TaskContext == 168` const-assert mirroring `TrapFrame == 192` extends the existing discipline. | +| **#16** [`95b15aa1`](https://github.com/HodeTech/Tyrne/pull/16) | B1 closure trio (business + consolidated security + performance baseline 2026-05-07, replacing the 2026-04-28 trio's load-bearing role); δ items: ADR-0023 Deferred placeholder, ADR-0032 Propose, T-015 Draft, `write-adr` skill §Simulation codification, master-plan AC change "no closure-trio without recorded smoke." | **Minor (G):** the §Simulation rule was retro-extracted, not pre-existing — commit `77a578a` codifies it 84 seconds before commit `4aa4b24` proposes ADR-0032 with a Simulation table. Chronology is honest in commit bodies but artefacts read as if rule pre-existed; one-line rider in ADR-0026 §Revision notes naming the codifying commit closes it. Master-plan AC also landed only in `business-reviews/master-plan.md`; security and performance master-plans not cross-referenced. | +| **#17** [`8dc433ee`](https://github.com/HodeTech/Tyrne/pull/17) | T-015 implementation — ADR-0032 Accept (separate commit per `write-adr` §10), new `ipc_cancel_recv` primitive, `ipc_recv_and_yield` Phase 2 Deadlock branch upgraded to symmetric scheduler + endpoint rollback; 6 new tests (5 IPC + 1 sched); UNSAFE-2026-0014 4th Amendment names the new Deadlock-branch momentary `&mut EndpointArena` + `&mut IpcQueues` site; ADR-0017 §Revision rider records additive recovery primitive (user-observable surface unchanged). | **Major forward-flag (C):** `RecvWaiting` unit-variant identity gap — kernel records no waiter identity; v1's depth-1 cooperative discipline makes it unobservable, but ADR-0030 (syscall ABI) and ADR-0019 (multi-waiter) must address it. ADR-0032 forecasts the `caller: TaskHandle` signature change correctly at [kernel/src/ipc/mod.rs:447-453](../../../../kernel/src/ipc/mod.rs#L447). **Tracked, no v1 action.** | ## Per-track summary @@ -104,7 +104,7 @@ Patterns repeating across two or more tracks — promote-to-process candidates: Severity-sorted, action-cumulative. **No Blocker**, **no v1 Major**. The Major C-1 is forward-flagged for ADR-0030 / ADR-0019 work and is *not* a B2-prep follow-up. -> **Status (2026-05-08): all 9 hygiene items closed in [PR #18](https://github.com/cemililik/Tyrne/pull/18) (merge `aa7e6c5`); item 11 (P10 wall-clock harness) closed in this branch's integration PR (replaces #19/#20/#21). Items 10 / 12 / 13 remain forward-flagged on the appropriate downstream venues. See per-item closure notes below.** +> **Status (2026-05-08): all 9 hygiene items closed in [PR #18](https://github.com/HodeTech/Tyrne/pull/18) (merge `aa7e6c5`); item 11 (P10 wall-clock harness) closed in this branch's integration PR (replaces #19/#20/#21). Items 10 / 12 / 13 remain forward-flagged on the appropriate downstream venues. See per-item closure notes below.** ### Hygiene PR before ADR-0027 drafting (closed by PR #18) @@ -132,7 +132,7 @@ The 2026-05-06 ten-agent comprehensive review missed the T-014 smoke regression. ## References - 8 axis files under [`2026-05-07-pr-12-to-17-multi-axis-review/`](2026-05-07-pr-12-to-17-multi-axis-review/) (track-a..h) -- PR pages: [#12](https://github.com/cemililik/Tyrne/pull/12) · [#13](https://github.com/cemililik/Tyrne/pull/13) · [#14](https://github.com/cemililik/Tyrne/pull/14) · [#15](https://github.com/cemililik/Tyrne/pull/15) · [#16](https://github.com/cemililik/Tyrne/pull/16) · [#17](https://github.com/cemililik/Tyrne/pull/17) +- PR pages: [#12](https://github.com/HodeTech/Tyrne/pull/12) · [#13](https://github.com/HodeTech/Tyrne/pull/13) · [#14](https://github.com/HodeTech/Tyrne/pull/14) · [#15](https://github.com/HodeTech/Tyrne/pull/15) · [#16](https://github.com/HodeTech/Tyrne/pull/16) · [#17](https://github.com/HodeTech/Tyrne/pull/17) - Prior review baseline: [2026-05-06 full-tree comprehensive code review](2026-05-06-full-tree-comprehensive.md) - B1 closure trio (2026-05-07): [business](../business-reviews/2026-05-07-B1-closure.md) · [security](../security-reviews/2026-05-07-B1-closure.md) · [performance](../performance-optimization-reviews/2026-05-07-B1-closure.md) - Load-bearing ADRs: [0017](../../../decisions/0017-ipc-primitive-set.md) · [0019](../../../decisions/0019-scheduler-shape.md) · [0021](../../../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) · [0022](../../../decisions/0022-idle-task-and-typed-scheduler-deadlock.md) · [0023](../../../decisions/0023-cross-table-capability-revocation-policy.md) · [0025](../../../decisions/0025-adr-governance-amendments.md) · [0026](../../../decisions/0026-idle-dispatch-fallback.md) · [0032](../../../decisions/0032-endpoint-rollback-and-cancel-recv.md) diff --git a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-a-kernel.md b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-a-kernel.md index f879417..1878291 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-a-kernel.md +++ b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-a-kernel.md @@ -96,7 +96,7 @@ split → ADR-0030), or carry over unchanged with no new conflations. - **→ Track G (BSP):** [`bsp-qemu-virt/src/main.rs::idle_entry`](../../../../../bsp-qemu-virt/src/main.rs) calls `yield_now(SCHED.as_mut_ptr(), cpu).expect(...)` — the `expect` is allowed because the BSP crate is not under the kernel's `clippy::expect_used` denylist. Confirmed (kernel denylist is `kernel/src/lib.rs`-scoped). The fallback discipline "current is idle and queue empty → no switch, return Ok(())" inside `yield_now` is correctly implemented at [sched/mod.rs:738-746](../../../../../kernel/src/sched/mod.rs#L738) — idle's WFI loop won't re-enter the dispatcher in a broken way. -- **→ Track J (hygiene):** PR #14's URL rename (`cemililik/TyrneOS` → `cemililik/Tyrne`) touched `kernel/src/sched/mod.rs` rustdoc footers (16 changes per the merge stat) and `kernel/src/ipc/mod.rs` (8 changes). All are pure URL replacements within rustdoc `[link]: https://...` definitions; no source-behaviour or doc-semantics change. Cross-checked: every cross-reference in the new ADR-0032 / `ipc_cancel_recv` rustdoc uses the new `cemililik/Tyrne` URL (e.g., L46 `[adr-0017]: https://github.com/cemililik/Tyrne/...`). No stale URL drift introduced by the new T-015 code. +- **→ Track J (hygiene):** PR #14's URL rename (`cemililik/TyrneOS` → `cemililik/Tyrne`) touched `kernel/src/sched/mod.rs` rustdoc footers (16 changes per the merge stat) and `kernel/src/ipc/mod.rs` (8 changes). All are pure URL replacements within rustdoc `[link]: https://...` definitions; no source-behaviour or doc-semantics change. Cross-checked: every cross-reference in the new ADR-0032 / `ipc_cancel_recv` rustdoc uses the new `cemililik/Tyrne` URL (e.g., L46 `[adr-0017]: https://github.com/HodeTech/Tyrne/...`). No stale URL drift introduced by the new T-015 code. ## Suggested follow-up actions diff --git a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-f-tests.md b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-f-tests.md index 26516cc..1b16870 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-f-tests.md +++ b/docs/analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-f-tests.md @@ -40,7 +40,7 @@ ADR-0032's *Decision outcome* simulation table row 3b (the Phase 2 Deadlock bran | `RecvWaiting → Idle` (success path) | #1, #5 | Covered | | Called from `Idle` (no-op error path) | #2 | Covered | | Called from `SendPending` (no-op) | #3 | Covered | -| Called from `RecvComplete` (no-op) | New: `cancel_recv_on_recv_complete_does_not_drop_message_or_cap` | **Closed by [PR #18](https://github.com/cemililik/Tyrne/pull/18) hygiene commit `25854a1`.** The new test drives the endpoint to `RecvComplete { msg, cap: Some(_) }` via `ipc_recv` (registers receiver) → `ipc_send` with a transferred capability (delivers), then calls `ipc_cancel_recv` and asserts both the parked message AND the transferred cap survive on the next `ipc_recv`. The originally-flagged Finding §F-1 is now resolved (see "Findings → Minor (closed)" below). | +| Called from `RecvComplete` (no-op) | New: `cancel_recv_on_recv_complete_does_not_drop_message_or_cap` | **Closed by [PR #18](https://github.com/HodeTech/Tyrne/pull/18) hygiene commit `25854a1`.** The new test drives the endpoint to `RecvComplete { msg, cap: Some(_) }` via `ipc_recv` (registers receiver) → `ipc_send` with a transferred capability (delivers), then calls `ipc_cancel_recv` and asserts both the parked message AND the transferred cap survive on the next `ipc_recv`. The originally-flagged Finding §F-1 is now resolved (see "Findings → Minor (closed)" below). | | Bad-cap path — stale generation | **None.** | **Observation §M-1** — `validate_ep_cap` returns `IpcError::InvalidCapability` for a stale lookup; `ep_arena.get(slot)` rejects a stale-generation `EndpointHandle`. Both paths are exercised by other IPC tests' setup but not by a dedicated cancel-stale test. | | Bad-cap path — missing right | #4 | Covered | | Interleaved cancel (cancel after another op) | #5 (cancel-after-cancel); #3 (cancel-after-send) | Covered | @@ -131,7 +131,7 @@ None. ### Minor (closed) -**§F-1 — `cancel_recv` `RecvComplete` branch not directly tested.** *Closed by [PR #18](https://github.com/cemililik/Tyrne/pull/18) commit `25854a1` (post-merge hygiene).* The new test [`cancel_recv_on_recv_complete_does_not_drop_message_or_cap`](../../../../../kernel/src/ipc/mod.rs) drives the endpoint to `RecvComplete { msg, cap: Some(_) }` via send-after-recv with a transferred capability, then calls `ipc_cancel_recv` and asserts both the parked message AND the transferred cap survive on the next `ipc_recv`. Stronger than the originally-recommended ~25-LOC variant because it pins the cap-bearing-state property (which the originally-recommended test did not), satisfying both Track F §F-1 and Track C cap-bearing-state semantics. Test count: 158 → 159 host + miri. The doc-comment at [`kernel/src/ipc/mod.rs`](../../../../../kernel/src/ipc/mod.rs) `ipc_cancel_recv` gained a forward-rider clarifying that this no-op is the *recovery* shape; the future B2+ destroy-drain caller will need different semantics for cap-bearing branches (consolidated with Track A §MIN-2 and Track C cap-bearing-state). +**§F-1 — `cancel_recv` `RecvComplete` branch not directly tested.** *Closed by [PR #18](https://github.com/HodeTech/Tyrne/pull/18) commit `25854a1` (post-merge hygiene).* The new test [`cancel_recv_on_recv_complete_does_not_drop_message_or_cap`](../../../../../kernel/src/ipc/mod.rs) drives the endpoint to `RecvComplete { msg, cap: Some(_) }` via send-after-recv with a transferred capability, then calls `ipc_cancel_recv` and asserts both the parked message AND the transferred cap survive on the next `ipc_recv`. Stronger than the originally-recommended ~25-LOC variant because it pins the cap-bearing-state property (which the originally-recommended test did not), satisfying both Track F §F-1 and Track C cap-bearing-state semantics. Test count: 158 → 159 host + miri. The doc-comment at [`kernel/src/ipc/mod.rs`](../../../../../kernel/src/ipc/mod.rs) `ipc_cancel_recv` gained a forward-rider clarifying that this no-op is the *recovery* shape; the future B2+ destroy-drain caller will need different semantics for cap-bearing branches (consolidated with Track A §MIN-2 and Track C cap-bearing-state). **§F-2 — No smoke variant exercises the Deadlock-branch cancel arc.** v1's idle task makes `SchedError::Deadlock` structurally unreachable; the new cancel call site at [lines 971-995](../../../../../kernel/src/sched/mod.rs#L971-L995) is exercised only by host tests + miri. Adequate for v1 (per §"QEMU smoke" above), but worth queueing for B5+ preemption work — when preemption lands, the cancel arc will be reachable from real production paths (preempted-mid-Phase-2 per ADR-0032 §Context bullet 3), and a smoke variant or preemption-test will be the natural venue. **Recommend:** open a phase-B5 follow-up note in [phases/phase-b.md](../../../../roadmap/phases/phase-b.md) tagging "Deadlock-cancel arc gains a real exerciser when preemption lands." diff --git a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review.md b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review.md index fd7b6d4..f89acf3 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review.md +++ b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review.md @@ -1,6 +1,6 @@ # Code review 2026-05-08 — PR #19 / #20 / #21 multi-axis pre-merge sweep -- **Change:** PRs [#19](https://github.com/cemililik/Tyrne/pull/19) (path-drift sweep), [#20](https://github.com/cemililik/Tyrne/pull/20) (ADR-0027 + T-016 open), [#21](https://github.com/cemililik/Tyrne/pull/21) (P10 perf-harness + first measured baseline). +- **Change:** PRs [#19](https://github.com/HodeTech/Tyrne/pull/19) (path-drift sweep), [#20](https://github.com/HodeTech/Tyrne/pull/20) (ADR-0027 + T-016 open), [#21](https://github.com/HodeTech/Tyrne/pull/21) (P10 perf-harness + first measured baseline). - **Branches:** `doc-hygiene-2026-05-06-path-drift-sweep` / `adr-0027-kernel-virtual-memory-layout` / `p10-wall-clock-bench-harness`. - **Reviewer:** @cemililik (+ Claude Opus 4.7 multi-agent fan-out — 4 parallel track agents). - **Type:** **Pre-merge** multi-axis sweep — these PRs together open the B2 milestone; quality is load-bearing for the T-016 implementation arc that follows. "Block" reserved for genuine regression / discipline violation; "Approve-with-followups" means a one-line same-branch fixup or a hygiene-PR rider closes the finding. @@ -122,7 +122,7 @@ A hygiene PR after #20 merges captures items 2–11; the perf-harness follow-ups - Track 2 (design): [track-2-pr-20-design.md](2026-05-08-pr-19-20-21-multi-axis-review/track-2-pr-20-design.md) - Track 3 (governance): [track-3-pr-20-governance.md](2026-05-08-pr-19-20-21-multi-axis-review/track-3-pr-20-governance.md) - Track 4 (perf-harness): [track-4-pr-21-perf-harness.md](2026-05-08-pr-19-20-21-multi-axis-review/track-4-pr-21-perf-harness.md) -- PR diffs: [`gh pr view 19`](https://github.com/cemililik/Tyrne/pull/19) / [`gh pr view 20`](https://github.com/cemililik/Tyrne/pull/20) / [`gh pr view 21`](https://github.com/cemililik/Tyrne/pull/21) +- PR diffs: [`gh pr view 19`](https://github.com/HodeTech/Tyrne/pull/19) / [`gh pr view 20`](https://github.com/HodeTech/Tyrne/pull/20) / [`gh pr view 21`](https://github.com/HodeTech/Tyrne/pull/21) - ADR-0027 (kernel virtual memory layout): [`docs/decisions/0027-kernel-virtual-memory-layout.md`](../../../decisions/0027-kernel-virtual-memory-layout.md) - T-016 (MMU activation): [`docs/analysis/tasks/phase-b/T-016-mmu-activation.md`](../../tasks/phase-b/T-016-mmu-activation.md) - ADR-0009 §Revision rider, ADR-0012 §Open questions resolution diff --git a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-1-pr-19-mechanical.md b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-1-pr-19-mechanical.md index 935e4ce..c531b10 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-1-pr-19-mechanical.md +++ b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-1-pr-19-mechanical.md @@ -1,6 +1,6 @@ # Track 1 — PR #19 mechanical sweep validation -- **PR:** [#19](https://github.com/cemililik/Tyrne/pull/19) +- **PR:** [#19](https://github.com/HodeTech/Tyrne/pull/19) - **Branch:** doc-hygiene-2026-05-06-path-drift-sweep - **Commit reviewed:** 2877e0d4ebda67641e5e40e4dbdb52119abce0c6 - **Reviewer:** Claude Opus 4.7 sub-agent (Track 1) diff --git a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-2-pr-20-design.md b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-2-pr-20-design.md index 8496451..625f98b 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-2-pr-20-design.md +++ b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-2-pr-20-design.md @@ -1,6 +1,6 @@ # Track 2 — PR #20 ADR-0027 design correctness + §Simulation -- **PR:** [#20](https://github.com/cemililik/Tyrne/pull/20) +- **PR:** [#20](https://github.com/HodeTech/Tyrne/pull/20) - **Branch:** adr-0027-kernel-virtual-memory-layout - **Commits reviewed:** dc4d92b (Propose) + bb0a6ba (Accept) + 8b6eef4 (PR-num fix) - **Reviewer:** Claude Opus 4.7 sub-agent (Track 2) diff --git a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-3-pr-20-governance.md b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-3-pr-20-governance.md index ba5064a..8d403ca 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-3-pr-20-governance.md +++ b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-3-pr-20-governance.md @@ -1,6 +1,6 @@ # Track 3 — PR #20 governance, T-016 scoping, audit-log forward-flags, docs -- **PR:** [#20](https://github.com/cemililik/Tyrne/pull/20) +- **PR:** [#20](https://github.com/HodeTech/Tyrne/pull/20) - **Branch:** adr-0027-kernel-virtual-memory-layout - **Commits reviewed:** dc4d92b + bb0a6ba + 8b6eef4 - **Reviewer:** Claude Opus 4.7 sub-agent (Track 3) diff --git a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-4-pr-21-perf-harness.md b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-4-pr-21-perf-harness.md index cfa148b..da8c3a5 100644 --- a/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-4-pr-21-perf-harness.md +++ b/docs/analysis/reviews/code-reviews/2026-05-08-pr-19-20-21-multi-axis-review/track-4-pr-21-perf-harness.md @@ -1,6 +1,6 @@ # Track 4 — PR #21 perf-harness implementation -- **PR:** [#21](https://github.com/cemililik/Tyrne/pull/21) +- **PR:** [#21](https://github.com/HodeTech/Tyrne/pull/21) - **Branch:** p10-wall-clock-bench-harness - **Commits reviewed:** `1de8143` + `abf26b9` - **Reviewer:** Claude Opus 4.7 sub-agent (Track 4) — paper review only; harness not executed diff --git a/docs/analysis/reviews/master-review/2026-05-22-152729/tracks/D5a-meta-core.md b/docs/analysis/reviews/master-review/2026-05-22-152729/tracks/D5a-meta-core.md index fda87b2..d750450 100644 --- a/docs/analysis/reviews/master-review/2026-05-22-152729/tracks/D5a-meta-core.md +++ b/docs/analysis/reviews/master-review/2026-05-22-152729/tracks/D5a-meta-core.md @@ -137,9 +137,9 @@ The skills README states: *"If a skill needs more than ~200 lines, either (a) th `NOTICE:5`; `SECURITY.md:13` -NOTICE references `https://github.com/cemililik/TyrneOS` (note: `TyrneOS`). SECURITY.md references `https://github.com/cemililik/Tyrne`. The correct slug per the project name is `Tyrne`. The NOTICE URL likely predates the rename from Umbrix / pre-rename-era tooling. +NOTICE references `https://github.com/cemililik/TyrneOS` (note: `TyrneOS`). SECURITY.md references `https://github.com/HodeTech/Tyrne`. The correct slug per the project name is `Tyrne`. The NOTICE URL likely predates the rename from Umbrix / pre-rename-era tooling. -**Suggested fix:** Update NOTICE line 5 to `https://github.com/cemililik/Tyrne`. +**Suggested fix:** Update NOTICE line 5 to `https://github.com/HodeTech/Tyrne`. --- @@ -205,7 +205,7 @@ Every path was verified with `ls` or `git ls-files` at commit 288ddb2. | Reference | Source file:line | Status | Notes | |---|---|---|---| | `docs/roadmap/reviews/` | `.agents/skills/README.md:76` | Stale — path does not exist | Correct path is `docs/analysis/reviews/-reviews/`; the conduct-review SKILL.md itself uses the correct path | -| `https://github.com/cemililik/TyrneOS` | `NOTICE:5` | Stale repo slug | Should be `https://github.com/cemililik/Tyrne` (matches SECURITY.md) | +| `https://github.com/cemililik/TyrneOS` | `NOTICE:5` | Stale repo slug | Should be `https://github.com/HodeTech/Tyrne` (matches SECURITY.md) | | "(Phase 2)" in reading order | `docs/README.md:19` | Orphaned phrase | No Phase-N numbering scheme exists in arch docs | | `docs/standards/` for unsafe audit tracking | `CLAUDE.md:16` | Misleading pointer | Policy is in docs/standards/; log is in docs/audits/; only the former is pointed to | | "most code is not yet written … architecture design" | `CLAUDE.md:7` | Stale status description | Phase B implementation is well underway | diff --git a/docs/analysis/reviews/performance-optimization-reviews/2026-05-07-B1-closure.md b/docs/analysis/reviews/performance-optimization-reviews/2026-05-07-B1-closure.md index 107f041..618c5a2 100644 --- a/docs/analysis/reviews/performance-optimization-reviews/2026-05-07-B1-closure.md +++ b/docs/analysis/reviews/performance-optimization-reviews/2026-05-07-B1-closure.md @@ -7,7 +7,7 @@ - **Target:** QEMU `virt`, aarch64, Cortex-A72 model, single core, 128 MiB RAM (unchanged). - **Build:** release profile (`cargo build --release --target aarch64-unknown-none -p tyrne-bsp-qemu-virt`). -> **Canonical source for B1 closure metrics.** This artefact + the [business retrospective](../business-reviews/2026-05-07-B1-closure.md) + the [security review](../security-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing footprint / timing numbers. Other locations referencing kernel image size, test counts, or boot-to-end timing ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`T-014`](../../tasks/phase-b/T-014-idle-dispatch-fallback.md) review-history) are *summaries at their layer of abstraction*; corrections start here. Per [sourcery-ai PR #16 review feedback](https://github.com/cemililik/Tyrne/pull/16). +> **Canonical source for B1 closure metrics.** This artefact + the [business retrospective](../business-reviews/2026-05-07-B1-closure.md) + the [security review](../security-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing footprint / timing numbers. Other locations referencing kernel image size, test counts, or boot-to-end timing ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`T-014`](../../tasks/phase-b/T-014-idle-dispatch-fallback.md) review-history) are *summaries at their layer of abstraction*; corrections start here. Per [sourcery-ai PR #16 review feedback](https://github.com/HodeTech/Tyrne/pull/16). --- diff --git a/docs/analysis/reviews/performance-optimization-reviews/2026-05-14-B3-closure.md b/docs/analysis/reviews/performance-optimization-reviews/2026-05-14-B3-closure.md index 0c3ebaa..efb4125 100644 --- a/docs/analysis/reviews/performance-optimization-reviews/2026-05-14-B3-closure.md +++ b/docs/analysis/reviews/performance-optimization-reviews/2026-05-14-B3-closure.md @@ -1,7 +1,7 @@ # Performance baseline 2026-05-14 — B3 closure (post-T-017 + T-018) - **Concern:** Did the T-017 PMM bring-up + T-018 `AddressSpace` kernel object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch hook shift the kernel image footprint, RAM use, or hot-path instruction counts versus the [post-T-016 B2 closure baseline](2026-05-09-B2-closure.md)? -- **Scope:** All committed code on `main` from [`b0035ce`](https://github.com/cemililik/Tyrne/commit/b0035ce) (PR #23 merge — T-016 / B2 closure baseline) through [`47b0a86`](https://github.com/cemililik/Tyrne/commit/47b0a86) (PR #28 merge — T-018 with five review-rounds applied). The arc covers ADR-0035 + T-017 (`Pmm` bitmap allocator + `FrameProvider` impl + UNSAFE-2026-0026 frame-zeroing); ADR-0028 + T-018 (`AddressSpace` kernel-object + `AddressSpaceArena` + `CapKind::AddressSpace` variant + cap-gated wrappers + activation-on-context-switch hook closure-threading); cross-cutting `MmuError::BlockMapped` variant + `CapabilityTable::depth_of` `pub(crate)` preflight helper; UNSAFE-2026-0014 fifth Amendment scope-extending to the activation hook; UNSAFE-2026-0025's body-correction Amendment for the variant split; and the `.claude/skills/` → `.agents/skills/` directory consolidation. +- **Scope:** All committed code on `main` from [`b0035ce`](https://github.com/HodeTech/Tyrne/commit/b0035ce) (PR #23 merge — T-016 / B2 closure baseline) through [`47b0a86`](https://github.com/HodeTech/Tyrne/commit/47b0a86) (PR #28 merge — T-018 with five review-rounds applied). The arc covers ADR-0035 + T-017 (`Pmm` bitmap allocator + `FrameProvider` impl + UNSAFE-2026-0026 frame-zeroing); ADR-0028 + T-018 (`AddressSpace` kernel-object + `AddressSpaceArena` + `CapKind::AddressSpace` variant + cap-gated wrappers + activation-on-context-switch hook closure-threading); cross-cutting `MmuError::BlockMapped` variant + `CapabilityTable::depth_of` `pub(crate)` preflight helper; UNSAFE-2026-0014 fifth Amendment scope-extending to the activation hook; UNSAFE-2026-0025's body-correction Amendment for the variant split; and the `.claude/skills/` → `.agents/skills/` directory consolidation. - **Hypothesis:** Re-baseline artefact, not a hypothesis-driven optimisation cycle. Per the [master plan's pre-flight](master-plan.md#pre-flight-hypothesis), no concrete improvement target is set — the goal is to record the post-T-018 baseline so future hypothesis-driven cycles (B4 userspace bring-up, B5 syscall ABI, B5+ ASID isolation) have a fresh reference point. Implicit non-hypothesis: T-017 + T-018 should add bounded `.text` (PMM bitmap scan + AddressSpace arena methods + cap-wrapper preflights + activation closure threading; each function-body sized), bounded `.bss` (the `Pmm` static — 32K-frame bitmap + reserved-list array — and the `AddressSpaceArena` `StaticCell`), and a measurable boot-to-end timing cost (the boot-time PMM bitmap initialisation pass over 32K frames + the AddressSpaceArena slot-0 wrap + bootstrap-AS cap mint). - **Reviewer:** @cemililik (+ Claude Sonnet 4.6 agent acting in the Baseline + Hotspot + Reporter roles below; Proposal / Measurement / Regression-check sections are short because no proposal is being measured this cycle). - **Target:** QEMU `virt`, aarch64, Cortex-A72 model, single core, 128 MiB RAM (unchanged). diff --git a/docs/analysis/reviews/security-reviews/2026-04-28-B1-closure.md b/docs/analysis/reviews/security-reviews/2026-04-28-B1-closure.md index a285595..11ac8b8 100644 --- a/docs/analysis/reviews/security-reviews/2026-04-28-B1-closure.md +++ b/docs/analysis/reviews/security-reviews/2026-04-28-B1-closure.md @@ -1,6 +1,6 @@ # Security review 2026-04-28 — B1 closure consolidated pass -- **Change:** all committed code in branch `development` from `9a66e8b` (PR #9 merge — B0 closure tip, last reviewed by [`2026-04-27-B0-closure.md`](2026-04-27-B0-closure.md)) through [PR #10](https://github.com/cemililik/Tyrne/pull/10)'s merge into `main` (merge commit `7b42bbe`). The arc covers T-012 — GIC v2 driver, EL1 exception vector table, IRQ-handler dispatch, generic-timer IRQ wiring, idle's WFI activation — plus the ADR-0021 §Revision notes 2026-04-28 Amendment, the UNSAFE-2026-0014 commit-`28c5ce9` Amendment, and the three new audit-log entries (UNSAFE-2026-0019 / 0020 / 0021). +- **Change:** all committed code in branch `development` from `9a66e8b` (PR #9 merge — B0 closure tip, last reviewed by [`2026-04-27-B0-closure.md`](2026-04-27-B0-closure.md)) through [PR #10](https://github.com/HodeTech/Tyrne/pull/10)'s merge into `main` (merge commit `7b42bbe`). The arc covers T-012 — GIC v2 driver, EL1 exception vector table, IRQ-handler dispatch, generic-timer IRQ wiring, idle's WFI activation — plus the ADR-0021 §Revision notes 2026-04-28 Amendment, the UNSAFE-2026-0014 commit-`28c5ce9` Amendment, and the three new audit-log entries (UNSAFE-2026-0019 / 0020 / 0021). - **Reviewer:** @cemililik (+ Claude Opus 4.7 agent acting adversarial across the eight axes the security-review master plan defines). - **Separation from code review:** standalone consolidated pass after PR #10's two review-fix sweeps. PR #10 had two independent review-style rounds during its lifetime (`d820a88` round-1 and `5c9cf06` round-2, closing 19 findings + 2 nitpicks combined); this artifact is the single security-axis pass over the post-merge surface, deliberately scoped to the *adversarial* dimension the code rounds did not target. - **Unsafe audit cross-reference:** UNSAFE-2026-0019 (GIC v2 MMIO surface), UNSAFE-2026-0020 (EL1 exception vector table install + asm trampolines), UNSAFE-2026-0021 (EL1 virtual generic-timer compare-register writes), UNSAFE-2026-0014 commit-`28c5ce9` Amendment (IRQ-handler frame as a future site of the same momentary-`&mut` pattern). All previously-active entries (UNSAFE-2026-0001 .. UNSAFE-2026-0018 plus their B0-era Amendments) re-verified against the post-PR-#10 source — no in-place body edits, append-only discipline holds. diff --git a/docs/analysis/reviews/security-reviews/2026-05-07-B1-closure.md b/docs/analysis/reviews/security-reviews/2026-05-07-B1-closure.md index e7a7ab8..dd2b3f0 100644 --- a/docs/analysis/reviews/security-reviews/2026-05-07-B1-closure.md +++ b/docs/analysis/reviews/security-reviews/2026-05-07-B1-closure.md @@ -5,7 +5,7 @@ - **Separation from code review:** standalone consolidated pass after the comprehensive multi-agent code review at HEAD `214052d` (2026-05-06) and the four PR-rounds it triggered. The comprehensive review's [Track C](../code-reviews/2026-05-06-full-tree/track-c-security.md) was a full eight-axis pass at 2026-05-06; this artifact is the *closure-trio* security pass scoped to the post-T-014 surface. - **Unsafe audit cross-reference:** UNSAFE-2026-0014 (third Amendment, 2026-05-06; `register_idle` is the new sanctioned site for the same momentary-`&mut Scheduler` pattern); UNSAFE-2026-0019 / 0020 (2026-05-06 partial-verification + post-T-014 smoke Amendments); UNSAFE-2026-0021 (2026-05-06 no-verification Amendment recording that the timer-write site is unreachable in the v1 demo). Entries 0001..0021 (with 0012 Removed) re-verified against the post-PR-#15 source — append-only discipline holds, no in-place body edits. -> **Canonical source for B1 closure metrics.** The [business retrospective](../business-reviews/2026-05-07-B1-closure.md) + this artefact + the [performance baseline](../performance-optimization-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing audit / security numbers. Other locations referencing B1 audit state ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`docs/audits/unsafe-log.md`](../../../audits/unsafe-log.md) entry-body Amendments) are *summaries at their layer of abstraction*; corrections start here. Per [sourcery-ai PR #16 review feedback](https://github.com/cemililik/Tyrne/pull/16). +> **Canonical source for B1 closure metrics.** The [business retrospective](../business-reviews/2026-05-07-B1-closure.md) + this artefact + the [performance baseline](../performance-optimization-reviews/2026-05-07-B1-closure.md) are the source of truth for B1's closing audit / security numbers. Other locations referencing B1 audit state ([`current.md`](../../../roadmap/current.md), [`phase-b.md`](../../../roadmap/phases/phase-b.md), [`docs/audits/unsafe-log.md`](../../../audits/unsafe-log.md) entry-body Amendments) are *summaries at their layer of abstraction*; corrections start here. Per [sourcery-ai PR #16 review feedback](https://github.com/HodeTech/Tyrne/pull/16). --- diff --git a/docs/analysis/reviews/security-reviews/2026-05-14-B3-closure.md b/docs/analysis/reviews/security-reviews/2026-05-14-B3-closure.md index 58a7a3e..b9a4234 100644 --- a/docs/analysis/reviews/security-reviews/2026-05-14-B3-closure.md +++ b/docs/analysis/reviews/security-reviews/2026-05-14-B3-closure.md @@ -1,6 +1,6 @@ # Security review 2026-05-14 — B3 closure consolidated pass (post-T-017 + T-018) -- **Change:** all committed code on `main` from [`b0035ce`](https://github.com/cemililik/Tyrne/commit/b0035ce) (PR #23 merge — T-016 / B2 closure baseline) through [`47b0a86`](https://github.com/cemililik/Tyrne/commit/47b0a86) (PR #28 merge — T-018 with five review-rounds applied). The arc covers ADR-0035 (Physical Memory Manager — `Accepted` 2026-05-09); ADR-0028 (Address-space data structure — `Accepted` 2026-05-11); T-017 (`Pmm` bitmap allocator + `FrameProvider` impl); T-018 (`AddressSpace` kernel-object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch); the `CapKind::AddressSpace` + `CapObject::AddressSpace` cap-variant additions; the `CapabilityTable::depth_of` `pub(crate)` preflight helper; the `MmuError::BlockMapped` additive variant; the UNSAFE-2026-0014 fifth Amendment (activation-hook scope extension) + the new UNSAFE-2026-0026 entry (PMM frame-zeroing); the `.claude/skills/` → `.agents/skills/` directory consolidation; the housekeeping refresh of `current.md`. +- **Change:** all committed code on `main` from [`b0035ce`](https://github.com/HodeTech/Tyrne/commit/b0035ce) (PR #23 merge — T-016 / B2 closure baseline) through [`47b0a86`](https://github.com/HodeTech/Tyrne/commit/47b0a86) (PR #28 merge — T-018 with five review-rounds applied). The arc covers ADR-0035 (Physical Memory Manager — `Accepted` 2026-05-09); ADR-0028 (Address-space data structure — `Accepted` 2026-05-11); T-017 (`Pmm` bitmap allocator + `FrameProvider` impl); T-018 (`AddressSpace` kernel-object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch); the `CapKind::AddressSpace` + `CapObject::AddressSpace` cap-variant additions; the `CapabilityTable::depth_of` `pub(crate)` preflight helper; the `MmuError::BlockMapped` additive variant; the UNSAFE-2026-0014 fifth Amendment (activation-hook scope extension) + the new UNSAFE-2026-0026 entry (PMM frame-zeroing); the `.claude/skills/` → `.agents/skills/` directory consolidation; the housekeeping refresh of `current.md`. - **Reviewer:** @cemililik (+ Claude Sonnet 4.6 agent acting adversarial across the eight axes the security-review master plan defines). - **Separation from code review:** standalone consolidated pass after the **five review-rounds** T-018 went through (rounds 1, 2, self-review, 3, daily-auditor, R4 CodeRabbit follow-on, R5 CodeRabbit follow-on; see [business retrospective §"Review-round arc on PR #28"](../business-reviews/2026-05-14-B3-closure.md#review-round-arc-on-pr-28--five-rounds-agent-driven-verification-compounded)). The bot review-rounds were per-PR; this artefact is the *closure-trio* security pass scoped to the post-B2 → post-T-018 surface. - **Unsafe audit cross-reference:** UNSAFE-2026-0026 (new; PMM frame-zeroing via `core::ptr::write_bytes` in `Pmm::alloc_frame`); UNSAFE-2026-0014 fifth Amendment (scope extension to the activation-on-context-switch hook + BSP-side activation closure; sites named: `kernel/src/sched/mod.rs::yield_now` + `::ipc_recv_and_yield` + `::ipc_send_and_yield` (delegating) + `::start` + `bsp-qemu-virt/src/main.rs::activate_address_space`; helper `address_space_activation_target` named for completeness as a pure `&self` accessor — no `unsafe`); UNSAFE-2026-0025's body-correction Amendment (block-descriptor unmap path now surfaces `MmuError::BlockMapped` instead of `AlreadyMapped`). Entries 0001..0026 (with 0012 Removed) re-verified against the post-PR-#28 source — append-only discipline holds, no in-place body edits. diff --git a/docs/analysis/reviews/security-reviews/master-plan.md b/docs/analysis/reviews/security-reviews/master-plan.md index 967f27f..74daa26 100644 --- a/docs/analysis/reviews/security-reviews/master-plan.md +++ b/docs/analysis/reviews/security-reviews/master-plan.md @@ -88,7 +88,7 @@ Each role produces a section of the final artifact. Within each role, every item **Adversarial question:** does the change pull in trust that we have not earned? -- Any new dependency went through [`add-dependency`](../../../../.claude/skills/add-dependency/SKILL.md). +- Any new dependency went through [`add-dependency`](../../../../.agents/skills/add-dependency/SKILL.md). - The dependency's trust category is understood; build-time-only is very different from kernel-linked. - `cargo-vet` trust decisions are updated. @@ -105,7 +105,7 @@ Combine the role outputs into a single artifact. The verdict is computed from th - **Approve** — every applicable axis returned OK; flagged items are all minor non-blocking. - **Changes requested** — one or more axes returned a blocking `flagged` outcome. Each is specific and actionable. -- **Escalate** — the review surfaces an issue larger than this change (e.g., a trust-model gap that the subsystem exposes). A tracking task is opened via [`start-task`](../../../../.claude/skills/start-task/SKILL.md). +- **Escalate** — the review surfaces an issue larger than this change (e.g., a trust-model gap that the subsystem exposes). A tracking task is opened via [`start-task`](../../../../.agents/skills/start-task/SKILL.md). The verdict is propagated to the corresponding code-review artifact as a cross-reference. diff --git a/docs/analysis/tasks/phase-b/README.md b/docs/analysis/tasks/phase-b/README.md index 71d1840..c49630d 100644 --- a/docs/analysis/tasks/phase-b/README.md +++ b/docs/analysis/tasks/phase-b/README.md @@ -17,5 +17,7 @@ Tasks belonging to [Phase B — Real userspace](../../../roadmap/phases/phase-b. | [T-015](T-015-endpoint-rollback-cancel-recv.md) | Endpoint state rollback on `ipc_recv_and_yield` Deadlock + `ipc_cancel_recv` primitive (implements ADR-0032) | B2 prep | Done (2026-05-07) | | [T-016](T-016-mmu-activation.md) | MMU activation with identity-mapped kernel + `MapperFlush` token discipline (implements ADR-0027) | B2 | Done (2026-05-08; B2 milestone Closed 2026-05-09) | | [T-017](T-017-physical-memory-manager.md) | Physical Memory Manager (PMM): bitmap allocator + reservation tracking + `FrameProvider` impl (implements ADR-0035) | B3 | Done (2026-05-10) | +| [T-018](T-018-address-space-kernel-object.md) | `AddressSpace` kernel object + capability-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch (implements ADR-0028) | B3 | Done (2026-05-11; live on `main` 2026-05-14 via PR #28) | +| [T-019](T-019-task-loader.md) | Task loader: embedded raw-flat userspace image → `LoadedImage` metadata (implements ADR-0029) | B4 | Done (2026-05-16 via PR #31 merge) | Tasks are added here as they become active. See [`../../../roadmap/phases/phase-b.md`](../../../roadmap/phases/phase-b.md) for the full phase plan. diff --git a/docs/analysis/tasks/phase-b/T-012-exception-and-irq-infrastructure.md b/docs/analysis/tasks/phase-b/T-012-exception-and-irq-infrastructure.md index b7a41a3..0c71c89 100644 --- a/docs/analysis/tasks/phase-b/T-012-exception-and-irq-infrastructure.md +++ b/docs/analysis/tasks/phase-b/T-012-exception-and-irq-infrastructure.md @@ -110,4 +110,4 @@ In commit order — sequence chosen so each step is testable on its own and the | 2026-04-28 | @cemililik (+ Claude Opus 4.7 agent) | Promoted `Draft → In Progress` with a *design-first* arc. The B0 closure security review §8 ("Architecture docs are a security multiplier") tightened T-012's Documentation AC to require `docs/architecture/exceptions.md` as a hard deliverable; this entry records that doc landing **ahead of** the code, not as a follow-up. The doc covers: 16-entry vector table layout at `VBAR_EL1` (Mermaid 4×4 grid; only the Current-EL-SP_ELx IRQ entry fires in v1, others trampoline to a panic-class handler); IRQ dispatch flow (trampoline → register-frame save → Rust handler → ack via `IrqController` → dispatch → EOI → eret; Mermaid sequence diagram); GIC v2 driver shape (distributor `0x0800_0000` + CPU interface `0x0801_0000`; per-register table; init sequence; per-method MMIO mapping); generic-timer IRQ wiring (PPI 27, `CNTV_CVAL_EL0` / `CNTV_CTL_EL0` programming, handler hook); idle WFI activation closure; IRQ-handler interaction with the raw-pointer scheduler bridge (likely an ADR-0021 Amendment when the handler is wired). Implementation map at the end of the doc shows the seven Approach steps with status flags (🔵 next, 🟡 follows, 🟢 in-progress / partially done by this doc) so the next implementation commit knows exactly where to land. **No code in this commit** — `bsp-qemu-virt` and `kernel/src/sched/mod.rs` untouched; `bsp-qemu-virt/src/gic.rs` not yet created; boot.s not yet extended; the `unimplemented!()` panics in `` still in place. T-012 stays In Progress; the next session brings up the code against this design. `docs/architecture/README.md` index gains an `exceptions.md` row marked Design status. ADR-0024 (post-T-013) status note in the README also corrected (was "EL drop pending T-013"; T-013 has been Done since 2026-04-27). | | 2026-04-28 | @cemililik (+ Claude Opus 4.7 agent) | Promoted `In Progress → In Review`. Three implementation commits land in order: (1) commit `a043079` — GIC v2 driver (`bsp-qemu-virt/src/gic.rs`), 16-entry vector table at `tyrne_vectors` (`bsp-qemu-virt/src/vectors.s`), `MSR VBAR_EL1` install + `MSR DAIFClr, #0x2` unmask in `kernel_entry`; UNSAFE-2026-0019 (GIC MMIO) + UNSAFE-2026-0020 (vector table + trampolines) appended. (2) commit `b4ed68c` — `::arm_deadline` / `cancel_deadline` real implementations; `irq_entry` dispatch on PPI 27 (mask `CNTV_CTL_EL0` + EOI; ack-and-ignore for v1); `idle_entry` body switched from `core::hint::spin_loop()` to `cpu.wait_for_interrupt()`; `tyrne_hal::timer::ns_to_ticks` helper + 5 host tests (148/148 host suite green); UNSAFE-2026-0021 (CNTV_CTL/CVAL writes) appended. (3) commit `28c5ce9` — documentation sweep: ADR-0021 §Revision notes Amendment for IRQ-handler aliasing discipline (extends ADR-0021's no-`&mut`-across-switch rule to the IRQ frame; v1's ack-and-ignore body vacuously satisfies the discipline; future scheduler-touching IRQ arcs follow Option A's momentary-`&mut` pattern); UNSAFE-2026-0014 Amendment naming `irq_entry` as a future site of the same pattern; ADR-0010 §Revision notes recording that the deferred `arm_deadline` / `cancel_deadline` halves are now live; ADR-0022 first rider's *Sub-rider* gains a closure paragraph naming this task and commit; `docs/architecture/README.md` `exceptions.md` row promoted from `Design` to live status; phase-b §B1 README + `docs/roadmap/current.md` updated. QEMU smoke + Miri pass deferred to maintainer-side verification (R6 CI skeleton pinned for Miri; QEMU trace reproduces in maintainer's environment, not in this dev container — same disclaimer T-013 used). All four DoD gates that depend on this dev environment pass; the two deferred items are flagged on the DoD checklist with rationale. | | 2026-04-28 | @cemililik (+ Claude Opus 4.7 agent) | Two review-fix sweeps before merge. (a) PR #10 review-round-1 (commit `d820a88`) applied 11 of 12 findings: `irq_entry` / `panic_entry` promoted to `unsafe extern "C" fn` so caller-side preconditions are visible at the type level; `QemuVirtGic::enable` / `disable` now assert `irq.0 < GIC_MAX_IRQ` (1020) per ARM IHI 0048B §4.3.2; `tyrne_hal::timer::ns_to_ticks` switched from floor to ceiling division (via `u128::div_ceil`) so `Timer::arm_deadline`'s "reaches or exceeds `deadline_ns`" contract holds at non-divisor frequencies; saturation test extended with two over-boundary cases; nine doc fixes (Mermaid diagram + saved-register prose in `exceptions.md` matching shipped ack-and-ignore behaviour, ADR-0021 Amendment-already-exists rephrasing, B0-closure retro arithmetic, phase-b T-012 row label). (b) PR #10 review-round-2 (commit `5c9cf06`) applied 8 findings + 2 nitpicks: both `irq_entry` inline SAFETY blocks expanded to the full `unsafe-policy.md` §1 triplet (invariants + rejected alternatives + audit ref); `exceptions.md` ADR-0021 amendment language and date corrected (2026-04-27 → 2026-04-28) at three sites + sched::on_timer_irq prose at lines 160-167 rewritten to match shipped ack-and-ignore body; ADR-0021 §Revision notes Amendment cited signature updated `extern "C" fn` → `unsafe extern "C" fn` with explanatory parenthetical; `current.md` "Next review trigger" intermediate-gate clause about WFI activation removed (already landed in `b4ed68c`); compile-time `const _: () = assert!(size_of::() == 192)` guard added; `ns_to_ticks_rounds_up_on_subtick` test (freq=3, ns=333_333_334 → 2) added so a future floor-rounding regression fails the host suite. Host count grew 148 → 149 (+1 sub-tick rounding test). Gates remain green across both rounds. | -| 2026-04-28 | @cemililik (+ independent review agent) | Promoted `In Review → Done` with the merge of [PR #10](https://github.com/cemililik/Tyrne/pull/10) into `main` (merge commit `7b42bbe`). At merge: 149 / 149 host tests, kernel-build / kernel-clippy / host-clippy / fmt all clean. Three implementation commits + one documentation sweep + two review-round commits land as one cohesive `In Review` arc. **Two DoD gates remain explicitly maintainer-side and untouched by this promotion** — `cargo +nightly miri test` and the QEMU smoke run that exercises the deliberate-deadline path; both are pinned for the maintainer's CI / hardware-side runs (R6 GitHub Actions skeleton). UNSAFE-2026-0019 / 0020 / 0021 audit-log entries keep their `Pending QEMU smoke verification at the maintainer's first opportunity` status notes intact; those notes will be lifted to past-tense via append-only Amendments only after the maintainer has actually run the smoke, not as part of the merge promotion. T-012 closes B1's exception-infrastructure half: ADR-0010 deferred halves are live (`arm_deadline` / `cancel_deadline` real on `QemuVirtCpu`), ADR-0022 first rider's *Sub-rider* is closed (idle's WFI activation), ADR-0021 carries the IRQ-handler aliasing discipline as the 2026-04-28 Amendment. Next review trigger is the **B1 closure** business + security + performance review trio (per `docs/roadmap/current.md`'s next-review-trigger entry). | +| 2026-04-28 | @cemililik (+ independent review agent) | Promoted `In Review → Done` with the merge of [PR #10](https://github.com/HodeTech/Tyrne/pull/10) into `main` (merge commit `7b42bbe`). At merge: 149 / 149 host tests, kernel-build / kernel-clippy / host-clippy / fmt all clean. Three implementation commits + one documentation sweep + two review-round commits land as one cohesive `In Review` arc. **Two DoD gates remain explicitly maintainer-side and untouched by this promotion** — `cargo +nightly miri test` and the QEMU smoke run that exercises the deliberate-deadline path; both are pinned for the maintainer's CI / hardware-side runs (R6 GitHub Actions skeleton). UNSAFE-2026-0019 / 0020 / 0021 audit-log entries keep their `Pending QEMU smoke verification at the maintainer's first opportunity` status notes intact; those notes will be lifted to past-tense via append-only Amendments only after the maintainer has actually run the smoke, not as part of the merge promotion. T-012 closes B1's exception-infrastructure half: ADR-0010 deferred halves are live (`arm_deadline` / `cancel_deadline` real on `QemuVirtCpu`), ADR-0022 first rider's *Sub-rider* is closed (idle's WFI activation), ADR-0021 carries the IRQ-handler aliasing discipline as the 2026-04-28 Amendment. Next review trigger is the **B1 closure** business + security + performance review trio (per `docs/roadmap/current.md`'s next-review-trigger entry). | diff --git a/docs/analysis/tasks/phase-b/T-019-task-loader.md b/docs/analysis/tasks/phase-b/T-019-task-loader.md index 24e9704..2f8bb40 100644 --- a/docs/analysis/tasks/phase-b/T-019-task-loader.md +++ b/docs/analysis/tasks/phase-b/T-019-task-loader.md @@ -2,8 +2,9 @@ - **Phase:** B - **Milestone:** B4 — Task loader (this task is B4's implementation; ADR-0029 settles the format choice) -- **Status:** In Review +- **Status:** Done - **Created:** 2026-05-14 +- **date_done:** 2026-05-16 - **Author:** @cemililik (+ Claude Sonnet 4.6 agent) - **Dependencies:** [ADR-0029](../../../decisions/0029-initial-userspace-image-format.md) — must be `Accepted` before code lands. Also depends on [T-017](T-017-physical-memory-manager.md) (Done 2026-05-10 — `Pmm` provides the frames the loader allocates **and** the `free_frame` rollback surface this task relies on) and [T-018](T-018-address-space-kernel-object.md) (Done 2026-05-11; live 2026-05-14 via PR #28 — provides `cap_create_address_space` + `cap_map` + `cap_unmap` the loader composes). - **Informs:** Closes the B4 milestone's **loader half** — produces a `LoadedImage` metadata struct describing a freshly populated address space. **Does not** mint a runnable task; that work gates on B5 (syscall ABI, ADR-0030) + B6 (first userspace "hello") which together provide: kernel mappings in the userspace AS (currently not present — the AS holds only the image + stack), EL0-ready context register file (currently the `Task` struct carries no PC/SP), and the syscall entry path that lets the task make its first kernel call. The future `task_create_from_image` wrapper that turns a `LoadedImage` into a `CapHandle{CapObject::Task(...)}` lands with B5 or B6 per the [phase-b §B4 §Revision-notes rider](../../../roadmap/phases/phase-b.md#milestone-b4--task-loader) added in the ADR-0029 propose commit. First runtime caller of [UNSAFE-2026-0025](../../../audits/unsafe-log.md)'s per-call `Mmu::map` path post-bootstrap; lifts its `Pending QEMU smoke verification` status note via Amendment when the smoke trace exercises a real mapping. @@ -29,10 +30,10 @@ The work touches three subsystems already in tree (PMM, AddressSpace, exception- A checklist of items that must be true for the task to move from `In Review` to `Done`. -- [ ] **ADR-0029 Accepted** before code lands. Same-day Accept after careful re-read is permitted per [ADR-0025 §Revision notes](../../../decisions/0025-adr-governance-amendments.md); Propose commit is separate from the Accept commit per [`write-adr` skill §10](../../../../.agents/skills/write-adr/SKILL.md). -- [ ] **`pub fn load_image(image: &[u8], pmm: &mut Pmm<...>, mmu: &M, table: &mut CapabilityTable, as_arena: &mut AddressSpaceArena, parent_as_cap: CapHandle, new_rights: CapRights, image_base_va: VirtAddr, stack_size_pages: usize) -> Result`** lands in `kernel/src/obj/task_loader.rs` (new module — Approach decides whether to nest under `obj/` or `mm/`). The signature explicitly takes `&mut Pmm<...>` (**not** `&mut dyn FrameProvider`) because the rollback path requires `Pmm::free_frame` which is not on the trait surface. `parent_as_cap` authorises minting a new child AS via `cap_create_address_space`. **DERIVE rights enforcement** happens inside `cap_create_address_space`'s step 2a (see [`kernel/src/mm/address_space.rs`](../../../../kernel/src/mm/address_space.rs) — `if !parent_cap.rights().contains(CapRights::DERIVE) { return Err(InsufficientRights); }`); T-019's pre-step does a lookup + `CapKind::AddressSpace` sanity check only (the `resolve_address_space_cap` helper at [`kernel/src/mm/address_space.rs`](../../../../kernel/src/mm/address_space.rs) is kind-only by design per its v1 rights-model doc-comment), and lets the DERIVE check surface via `AddressSpaceCreationFailed(CapError(InsufficientRights))` rather than duplicating the check. `new_rights` is the rights set the new AS cap carries. `image_base_va` is the userspace-linker-defined base VA the image is laid out at; `stack_size_pages` is the stack region size in 4 KiB pages (minimum 1 page = 4 KiB ≥ the [`ContextSwitch::init_context`](../../../../hal/src/cpu.rs) trait's 512-byte stack-backing requirement; 16-byte AAPCS64 alignment is satisfied by construction since `stack_top_va = image_base_va + image_pages * PAGE_SIZE + stack_size_pages * PAGE_SIZE` is `PAGE_SIZE`-aligned ⇒ 16-byte aligned). -- [ ] **`pub struct LoadedImage { pub as_cap: CapHandle, pub entry_va: VirtAddr, pub stack_top_va: VirtAddr, pub image_bytes: usize, pub stack_bytes: usize }`** is the loader's success return type. `as_cap` is the `CapHandle` for the newly-minted AS (wraps `CapObject::AddressSpace(handle)` internally); `entry_va` equals `image_base_va` (raw flat format: offset 0 ↔ entry point); `stack_top_va` is **one-past-the-highest mapped VA** of the stack region — i.e., the first VA above the stack that is NOT mapped. The stack mapped range is `[stack_base, stack_top_va)` half-open; `sp = stack_top_va` at task-creation initialisation is correct because the first userspace push decrements `sp` (e.g. `sp -= 16`) which lands inside the mapped range. `image_bytes` is the byte-count of the image as loaded into the AS (may be smaller than `image_pages * PAGE_SIZE` because tail-zeroing happens on the partial last page). -- [ ] **`pub enum LoadError`** with **explicit rollback-discharging** variants: +- [x] **ADR-0029 Accepted** before code lands. Same-day Accept after careful re-read is permitted per [ADR-0025 §Revision notes](../../../decisions/0025-adr-governance-amendments.md); Propose commit is separate from the Accept commit per [`write-adr` skill §10](../../../../.agents/skills/write-adr/SKILL.md). +- [x] **`pub fn load_image(image: &[u8], pmm: &mut Pmm<...>, mmu: &M, table: &mut CapabilityTable, as_arena: &mut AddressSpaceArena, parent_as_cap: CapHandle, new_rights: CapRights, image_base_va: VirtAddr, stack_size_pages: usize) -> Result`** lands in `kernel/src/obj/task_loader.rs` (new module — Approach decides whether to nest under `obj/` or `mm/`). The signature explicitly takes `&mut Pmm<...>` (**not** `&mut dyn FrameProvider`) because the rollback path requires `Pmm::free_frame` which is not on the trait surface. `parent_as_cap` authorises minting a new child AS via `cap_create_address_space`. **DERIVE rights enforcement** happens inside `cap_create_address_space`'s step 2a (see [`kernel/src/mm/address_space.rs`](../../../../kernel/src/mm/address_space.rs) — `if !parent_cap.rights().contains(CapRights::DERIVE) { return Err(InsufficientRights); }`); T-019's pre-step does a lookup + `CapKind::AddressSpace` sanity check only (the `resolve_address_space_cap` helper at [`kernel/src/mm/address_space.rs`](../../../../kernel/src/mm/address_space.rs) is kind-only by design per its v1 rights-model doc-comment), and lets the DERIVE check surface via `AddressSpaceCreationFailed(CapError(InsufficientRights))` rather than duplicating the check. `new_rights` is the rights set the new AS cap carries. `image_base_va` is the userspace-linker-defined base VA the image is laid out at; `stack_size_pages` is the stack region size in 4 KiB pages (minimum 1 page = 4 KiB ≥ the [`ContextSwitch::init_context`](../../../../hal/src/cpu.rs) trait's 512-byte stack-backing requirement; 16-byte AAPCS64 alignment is satisfied by construction since `stack_top_va = image_base_va + image_pages * PAGE_SIZE + stack_size_pages * PAGE_SIZE` is `PAGE_SIZE`-aligned ⇒ 16-byte aligned). +- [x] **`pub struct LoadedImage { pub as_cap: CapHandle, pub entry_va: VirtAddr, pub stack_top_va: VirtAddr, pub image_bytes: usize, pub stack_bytes: usize }`** is the loader's success return type. `as_cap` is the `CapHandle` for the newly-minted AS (wraps `CapObject::AddressSpace(handle)` internally); `entry_va` equals `image_base_va` (raw flat format: offset 0 ↔ entry point); `stack_top_va` is **one-past-the-highest mapped VA** of the stack region — i.e., the first VA above the stack that is NOT mapped. The stack mapped range is `[stack_base, stack_top_va)` half-open; `sp = stack_top_va` at task-creation initialisation is correct because the first userspace push decrements `sp` (e.g. `sp -= 16`) which lands inside the mapped range. `image_bytes` is the byte-count of the image as loaded into the AS (may be smaller than `image_pages * PAGE_SIZE` because tail-zeroing happens on the partial last page). +- [x] **`pub enum LoadError`** with **explicit rollback-discharging** variants: - `InvalidImage` — `image.is_empty()` returns this **before** any state change (no rollback needed). - `InvalidStackSize` — `stack_size_pages == 0` returns this before any state change. - `MisalignedImageBaseVa(VirtAddr)` — `image_base_va` is not `PAGE_SIZE`-aligned. Returns **before** any state change (no `cap_create_address_space` call, no PMM mutation). Wraps the offending `VirtAddr` for diagnostics. Pre-fix (PR #31 review-round 4 P2), this case surfaced as `MapFailed(MmuMapError(MisalignedAddress))` from the first `cap_map` call inside the image-page loop, by which point `cap_create_address_space` had already allocated the root L0 frame — which then leaked via the v1 baseline rollback. The new argument-preflight catches the misalignment at row 1 and keeps PMM byte-stable on rejection. @@ -43,15 +44,15 @@ A checklist of items that must be true for the task to move from `In Review` to - `AddressSpaceCreationFailed(AddressSpaceError)` — `cap_create_address_space` failure (covers `InsufficientRights` if `parent_as_cap` lacks DERIVE, plus the structurally-unreachable-in-v1 `CapsExhausted` / `DerivationTooDeep` / `ArenaFull` paths). T-018's preflight discipline guarantees no PMM/arena state was committed on failure; no rollback needed at this layer. - `OutOfFrames` — direct `Pmm::alloc_frame()` returned `None` mid-loop (image or stack page allocation). Should be structurally unreachable post-FrameBudgetExceeded preflight under v1's single-threaded model, but kept as a defensive variant for budget-calculation bugs and future-concurrency scenarios. **Rollback required:** same shape as `MapFailed` below — undo allocated frames + committed mappings + revoke AS cap. - `MapFailed(AddressSpaceError)` — `cap_map` failure (typically `MmuMapError(MmuError::OutOfFrames)` if the intermediate-frame budget was underestimated, `MmuMapError(MmuError::AlreadyMapped)` if the VA range collides with an existing mapping, or `MmuMapError(MmuError::BlockMapped)` if the VA falls inside a block descriptor). **Rollback required:** leaf frames + partial mappings undone, AS cap invalidated via `cap_drop(loaded_as_cap)`; canonical contract + the cap_drop-vs-cap_revoke + v1 leak baseline + B5+ reclaim ADR pointer in §Approach §"Rollback contract (explicit)" below. -- [ ] **Frame budget preflight is a safe upper bound, not exact.** The check is `1 + image_pages + stack_pages + intermediate_budget <= pmm.stats().free_frames`, where the leading `1` accounts for the root L0 frame that `cap_create_address_space` will allocate. `intermediate_budget = 6` for v1's fresh-AS scenario (worst case: 3 intermediate frames per contiguous VA range × 2 ranges for image + stack). A future exact calculation per VA decomposition (e.g., image + stack sharing an L1 if their VA ranges are close enough to land in the same 1 GiB block) is a possible refinement, but the v1 upper-bound of 6 over-allocates at most ~24 KiB of frame headroom under any realistic image/stack VA placement — well within the 128 MiB extent's budget. Documented inline in `load_image`'s rustdoc. -- [ ] **Image page-rounding + tail zeroing.** `image_pages = (image.len() + PAGE_SIZE - 1) / PAGE_SIZE` (ceiling division). The last image page contains `image.len() % PAGE_SIZE` payload bytes followed by zero-fill to the page boundary (the zero-fill is automatic per UNSAFE-2026-0026's PMM contract — `alloc_frame` zero-initialises, then the loader's byte-copy overwrites only the first `image.len()` bytes; the remainder of the last page stays zero). -- [ ] **Mapping flags fixed:** image region maps with `MappingFlags::USER | MappingFlags::EXECUTE`; stack region maps with `MappingFlags::USER | MappingFlags::WRITE`. Per-section flags (`.text` RX-only, `.rodata` R-only, `.data` RW-only, NX on non-text) deferred to ADR-0034 (placeholder; B5+ trigger). -- [ ] **`LoadError` covers every error path; no `panic!` / `unwrap` / `expect` on the kernel-reachable path.** `cargo kernel-clippy -D warnings` clean (the kernel crate's `#![deny(clippy::panic)]` discipline enforces this). -- [ ] **The loader produces a `LoadedImage` but does NOT mint a `CapHandle{CapObject::Task(...)}`.** Per §Context's scope boundary, runnability gates on B5/B6. The smoke trace shows the loader returning a `LoadedImage` + a new banner line (e.g. `tyrne: image loaded (entry = 0x..., sp = 0x...; image bytes N, stack bytes M, AS cap = )`) inserted in a stable position in the boot sequence. Full demo through `tyrne: all tasks complete` still passes; `-d int,unimp,guest_errors` reports no new event classes beyond the pre-existing PL011-disabled-UART noise. -- [ ] **§Approach §Simulation table — every row mapped to a verification artefact** per the [`write-adr` skill §Procedure step 5 — "Decision outcome → Simulation row-to-verification mapping"](../../../../.agents/skills/write-adr/SKILL.md) discipline codified in commit `3ec94b0`. -- [ ] **`unsafe` discipline for the frame byte-copy site — new audit entry required.** The loader copies `image[i*PAGE_SIZE..]` into the freshly-allocated frame via `core::ptr::copy_nonoverlapping(src_ptr, frame_pa as *mut u8, copy_len)` (NOT `write_volatile` — the target is plain RAM, not a memory-ordered page-table descriptor; volatile is unnecessary and obscures the operation). The `unsafe` block needs **its own** audit entry — UNSAFE-2026-0025's scope is page-table descriptor writes (specific volatile + ordering semantics), which is a different safety argument from "kernel writes bytes into a freshly-PMM-allocated, identity-mapped, exclusively-owned RAM frame". UNSAFE-2026-0026's scope is the PMM's zero-fill at `alloc_frame` (also distinct — same site shape but different operation: zero-fill vs byte-copy). The expected shape is a new entry like **UNSAFE-2026-0027 — `task_loader::load_image` frame byte-copy via `core::ptr::copy_nonoverlapping`** under the Operation / Invariants / Rejected-alternatives discipline, covering: (a) the raw-pointer construction `frame_pa as *mut u8` (PMM's contract guarantees alignment + exclusive ownership + identity-mapping in v1's kernel AS); (b) the `copy_nonoverlapping` itself (source slice has at least `copy_len` bytes from the image bounds check; destination is exactly `copy_len` bytes from the PAGE_SIZE-aligned frame; non-overlapping is satisfied because source is `.rodata` and destination is freshly-allocated PMM RAM); (c) why `copy_nonoverlapping` was chosen over `write_volatile` (RAM-to-RAM byte copy needs no memory ordering; volatile would falsely imply MMIO semantics). The audit-entry shape — whether to extend UNSAFE-2026-0026 via Amendment or open UNSAFE-2026-0027 standalone — is decided at first implementation commit per the [`justify-unsafe`](../../../../.agents/skills/justify-unsafe/SKILL.md) skill's audit-tag scoping discipline. -- [ ] **`cargo fmt --check`, `cargo host-clippy -D warnings`, `cargo kernel-clippy -D warnings`, `cargo host-test`, `cargo kernel-build`** all clean. -- [ ] **Documentation:** a new short chapter `docs/architecture/task-loader.md` (file does not yet exist; lands with implementation) describing the loader sequence + the userspace linker layout this task assumes + the explicit rollback contract; cross-link from [`memory-management.md` §"Address-space objects"](../../../architecture/memory-management.md) and from [`boot.md` §Stage 3](../../../architecture/boot.md). +- [x] **Frame budget preflight is a safe upper bound, not exact.** The check is `1 + image_pages + stack_pages + intermediate_budget <= pmm.stats().free_frames`, where the leading `1` accounts for the root L0 frame that `cap_create_address_space` will allocate. `intermediate_budget = 6` for v1's fresh-AS scenario (worst case: 3 intermediate frames per contiguous VA range × 2 ranges for image + stack). A future exact calculation per VA decomposition (e.g., image + stack sharing an L1 if their VA ranges are close enough to land in the same 1 GiB block) is a possible refinement, but the v1 upper-bound of 6 over-allocates at most ~24 KiB of frame headroom under any realistic image/stack VA placement — well within the 128 MiB extent's budget. Documented inline in `load_image`'s rustdoc. +- [x] **Image page-rounding + tail zeroing.** `image_pages = (image.len() + PAGE_SIZE - 1) / PAGE_SIZE` (ceiling division). The last image page contains `image.len() % PAGE_SIZE` payload bytes followed by zero-fill to the page boundary (the zero-fill is automatic per UNSAFE-2026-0026's PMM contract — `alloc_frame` zero-initialises, then the loader's byte-copy overwrites only the first `image.len()` bytes; the remainder of the last page stays zero). +- [x] **Mapping flags fixed:** image region maps with `MappingFlags::USER | MappingFlags::EXECUTE`; stack region maps with `MappingFlags::USER | MappingFlags::WRITE`. Per-section flags (`.text` RX-only, `.rodata` R-only, `.data` RW-only, NX on non-text) deferred to ADR-0034 (placeholder; B5+ trigger). +- [x] **`LoadError` covers every error path; no `panic!` / `unwrap` / `expect` on the kernel-reachable path.** `cargo kernel-clippy -D warnings` clean (the kernel crate's `#![deny(clippy::panic)]` discipline enforces this). +- [x] **The loader produces a `LoadedImage` but does NOT mint a `CapHandle{CapObject::Task(...)}`.** Per §Context's scope boundary, runnability gates on B5/B6. The smoke trace shows the loader returning a `LoadedImage` + a new banner line (e.g. `tyrne: image loaded (entry = 0x..., sp = 0x...; image bytes N, stack bytes M, AS cap = )`) inserted in a stable position in the boot sequence. Full demo through `tyrne: all tasks complete` still passes; `-d int,unimp,guest_errors` reports no new event classes beyond the pre-existing PL011-disabled-UART noise. +- [x] **§Approach §Simulation table — every row mapped to a verification artefact** per the [`write-adr` skill §Procedure step 5 — "Decision outcome → Simulation row-to-verification mapping"](../../../../.agents/skills/write-adr/SKILL.md) discipline codified in commit `3ec94b0`. +- [x] **`unsafe` discipline for the frame byte-copy site — new audit entry required.** The loader copies `image[i*PAGE_SIZE..]` into the freshly-allocated frame via `core::ptr::copy_nonoverlapping(src_ptr, frame_pa as *mut u8, copy_len)` (NOT `write_volatile` — the target is plain RAM, not a memory-ordered page-table descriptor; volatile is unnecessary and obscures the operation). The `unsafe` block needs **its own** audit entry — UNSAFE-2026-0025's scope is page-table descriptor writes (specific volatile + ordering semantics), which is a different safety argument from "kernel writes bytes into a freshly-PMM-allocated, identity-mapped, exclusively-owned RAM frame". UNSAFE-2026-0026's scope is the PMM's zero-fill at `alloc_frame` (also distinct — same site shape but different operation: zero-fill vs byte-copy). The expected shape is a new entry like **UNSAFE-2026-0027 — `task_loader::load_image` frame byte-copy via `core::ptr::copy_nonoverlapping`** under the Operation / Invariants / Rejected-alternatives discipline, covering: (a) the raw-pointer construction `frame_pa as *mut u8` (PMM's contract guarantees alignment + exclusive ownership + identity-mapping in v1's kernel AS); (b) the `copy_nonoverlapping` itself (source slice has at least `copy_len` bytes from the image bounds check; destination is exactly `copy_len` bytes from the PAGE_SIZE-aligned frame; non-overlapping is satisfied because source is `.rodata` and destination is freshly-allocated PMM RAM); (c) why `copy_nonoverlapping` was chosen over `write_volatile` (RAM-to-RAM byte copy needs no memory ordering; volatile would falsely imply MMIO semantics). The audit-entry shape — whether to extend UNSAFE-2026-0026 via Amendment or open UNSAFE-2026-0027 standalone — is decided at first implementation commit per the [`justify-unsafe`](../../../../.agents/skills/justify-unsafe/SKILL.md) skill's audit-tag scoping discipline. +- [x] **`cargo fmt --check`, `cargo host-clippy -D warnings`, `cargo kernel-clippy -D warnings`, `cargo host-test`, `cargo kernel-build`** all clean. +- [x] **Documentation:** a new short chapter `docs/architecture/task-loader.md` (file does not yet exist; lands with implementation) describing the loader sequence + the userspace linker layout this task assumes + the explicit rollback contract; cross-link from [`memory-management.md` §"Address-space objects"](../../../architecture/memory-management.md) and from [`boot.md` §Stage 3](../../../architecture/boot.md). ## Out of scope @@ -127,6 +128,7 @@ Listed by error variant for unambiguous reviewer audit. **Critical scope note:** | Date | Reviewer | Notes | |------|----------|-------| +| 2026-05-16 | @cemililik (merge confirmation) | **PR #31 merged into `main` at commit `7f876af`** ("Merge pull request #31 from cemililik/t-019-task-loader"). The branch arc continued past the review-round-4 row below with two further follow-up commits not separately rowed here: `5078944` (review-round 5 — P1 + P2 + P3 follow-up; added one PMM host test, taking the suite to **260**) and `eb14c51` (review-round 6 — 5 valid findings). Status flips `In Review → Done`; `date_done: 2026-05-16`; all acceptance-criteria checkboxes above marked satisfied (no acceptance-criteria item was deferred — the B5/B6 deferrals live in §Out of scope). **Host-test count at merge: 260/260** (42 hal + 175 kernel + 43 test-hal); the round-4/round-3 rows below state **259** because that was accurate when written, before the round-5 PMM test landed. All gates clean at merge; QEMU smoke byte-stable through `tyrne: all tasks complete`. B4 implementation half is complete; the B4 closure trio (business + security + performance) is the next review trigger and has not yet fired. | | 2026-05-14 | @cemililik (+ Claude Sonnet 4.6 agent) | Task opened at `Draft` paired with ADR-0029 propose commit (Phase B / Milestone B4). Gates on ADR-0029 `Accepted` before implementation begins. Will move to `In Progress` after ADR Accept; status flips to `In Review` after the full acceptance-criteria checklist passes locally + bot-review-round arc settles. | | 2026-05-14 | @cemililik + reviewer agent (pre-Accept review #1) | Pre-Accept review surfaced 8 findings (P1×4 + P2×3 + P3×1). All verified valid against current code state. Architectural decision: T-019 returns a `LoadedImage` opaque struct rather than a `CapHandle{CapObject::Task(...)}`; the "loaded-but-not-runnable" framing matches phase-b §B4's acceptance criteria ("report the entry point and initial stack pointer") and defers the runnability prerequisites (kernel mappings in userspace AS, EL0 context, syscall entry) to B5/B6. Task user-story rewritten under this framing: signature expanded (`Pmm` direct + `Mmu` + `AddressSpaceArena` + `CapRights` + `image_base_va` + `stack_size_pages` explicit), error enum renamed `LoadError` with explicit rollback-discharging variants, §Simulation table refined to 7 rows with row-to-test mapping per the [`write-adr` skill row-to-verification mapping discipline](../../../../.agents/skills/write-adr/SKILL.md). ADR-0029 also touched: §Decision outcome's "(RW + NX initially)" flag claim moved out (flags are loader/T-019 concern); §Positive Consequences rewritten with honest page-loop + intermediate-frame-budget complexity. Phase-b §B4 gained a §Revision-notes rider recording the `LoadedImage` / `task_create_from_image` split. | | 2026-05-14 | @cemililik + reviewer agent (pre-Accept review #2) | Second pre-Accept pass surfaced 9 findings (P1×3 + P2×4 + P2/P3×1 + P3×1). All verified valid against current code state. P1 corrections: (1) Rollback cap-side cleanup scope clarified — the v1 cap-side cleanup is cap-table-only (`free_slot` bumps generation + clears `entry`) and does **not** free the AS arena slot, root L0 frame, or intermediate page-table frames. *(Note: this row's earlier draft used `cap_revoke` for the cap-side step; review #3 corrected this to `cap_drop` because `cap_revoke` walks descendants and leaves the source cap valid + requires `CapRights::REVOKE`, neither of which fits a freshly-minted leaf cap. The "leak baseline" description above stands regardless of which cap-side API is used.)* Full reclaim deferred to future `MemoryRegionCap` + per-AS destroy ADR. (2) Frame budget math corrected: `1 + image_pages + stack_pages + intermediate_budget <= pmm.stats().free_frames` (the leading `1` accounts for `cap_create_address_space`'s root L0 allocation; field name corrected from invented `free_count` to actual `free_frames`). (3) DERIVE enforcement location clarified: T-019's step 2 is lookup + kind sanity only; DERIVE rights check is delegated to `cap_create_address_space`'s step 2a and surfaces as `AddressSpaceCreationFailed(CapError(InsufficientRights))`. P2 corrections: (4) phase-b §B4 §2 `MemoryRegionCap` wording corrected to `cap_create_address_space + cap_map` (current reality); MemoryRegionCap explicitly deferred to B5+. (5) `LoadError::OutOfFrames` variant added for direct `pmm.alloc_frame()` failures mid-loop (defensive — should be structurally unreachable post-FrameBudgetExceeded preflight in v1 single-threaded model). (6) Frame byte-copy changed from `write_volatile` (wrong abstraction — implies MMIO ordering for plain RAM) to `core::ptr::copy_nonoverlapping`; new audit-entry (likely UNSAFE-2026-0027) explicitly required for the byte-copy site with `justify-unsafe` skill discipline; entry shape (standalone vs UNSAFE-2026-0026 Amendment) decided at first implementation commit. (7) "Frame budget exact, not over-conservative" wording rewritten as "safe upper bound" — the `intermediate_budget = 6` is an upper bound, not an exact calculation. P3 corrections: (8) ADR-0029 §Build pipeline split into B4-T-019-placeholder vs B6-userland-crate paths. (9) `stack_top_va` semantics corrected from "highest mapped VA" to "one-past-the-highest mapped address" (half-open `[stack_base, stack_top_va)` range convention; matches AAPCS64 `sp` initialisation correctly). | diff --git a/docs/architecture/README.md b/docs/architecture/README.md index e117d72..eeaf380 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -17,7 +17,8 @@ The architecture is being written in phases. Many documents listed below are pla | [`scheduler.md`](scheduler.md) | Cooperative FIFO scheduler: ready queue, idle task, raw-pointer IPC bridge, ContextSwitch trait. | Accepted (v0.0.1 — single-core, no preemption) | | [`ipc.md`](ipc.md) | Inter-process communication: synchronous send/recv, endpoint state machine, capability transfer, scheduler-bridge wrappers. | Accepted (v0.0.1 — depth-1 endpoints) | | [`exceptions.md`](exceptions.md) | Exception vector table, IRQ dispatch, GIC v2 driver, generic-timer IRQ wiring, idle WFI activation. | Accepted (v0.0.1 — T-012 Done 2026-04-28 via PR #10 merge; design + implementation match; maintainer-side QEMU smoke verification of the deliberate-deadline path remains pre-B1-closure work) | -| `memory-management.md` | Physical + virtual memory, MMU/paging, allocators. | Planned — B2 | +| [`memory-management.md`](memory-management.md) | Physical + virtual memory, MMU/paging, allocators, address-space objects, task loader. | Accepted (v0.0.1 — MMU/PMM/AddressSpace/loader; T-016..T-019) | +| [`task-loader.md`](task-loader.md) | Task loader: raw-flat image → populated address space; rollback contract; audit-log surface. | Accepted (v0.0.1 — T-019) | | `drivers.md` | Userspace driver model, capability grants, driver API. | Planned | | `userspace.md` | Init process, system services, shell, root of trust. | Planned | diff --git a/docs/architecture/boot.md b/docs/architecture/boot.md index 33be179..340ffe2 100644 --- a/docs/architecture/boot.md +++ b/docs/architecture/boot.md @@ -1,10 +1,10 @@ # Boot flow -Tyrne boots in four stages: QEMU (or the board firmware) hands control to the ELF entry point, a short assembly stub sets up the runtime environment, a Rust entry function (`kernel_entry`) wires the BSP together, and the portable `tyrne_kernel::run` function takes over. This document is the "how" for Phase 4c on `bsp-qemu-virt`; the "why" for each concrete choice lives in [ADR-0012](../decisions/0012-boot-flow-qemu-virt.md). Each future BSP will follow the same stage structure with its own addresses and peripherals. +Tyrne boots in four stages: QEMU (or the board firmware) hands control to the ELF entry point, a short assembly stub sets up the runtime environment, a Rust entry function (`kernel_entry`) wires the BSP together and brings up every kernel subsystem, and finally `start()` transfers control to the cooperative scheduler. This document is the "how" for Phase 4c on `bsp-qemu-virt`; the "why" for each concrete choice lives in [ADR-0012](../decisions/0012-boot-flow-qemu-virt.md). Each future BSP will follow the same stage structure with its own addresses and peripherals. ## Context -The overall three-layer architecture is described in [`overview.md`](overview.md), and the HAL traits the kernel uses are in [`hal.md`](hal.md). This document focuses specifically on the boot path from reset to `kernel_main` steady state, as implemented for the QEMU `virt` aarch64 target. +The overall three-layer architecture is described in [`overview.md`](overview.md), and the HAL traits the kernel uses are in [`hal.md`](hal.md). This document focuses specifically on the boot path from reset to scheduler steady state, as implemented for the QEMU `virt` aarch64 target. ## Design @@ -15,7 +15,7 @@ The four boot stages, each with a tightly bounded responsibility: 1. **Firmware / loader.** QEMU's `-kernel` flag loads the ELF image at its linked-in load address (`0x40080000` per [ADR-0012](../decisions/0012-boot-flow-qemu-virt.md)), sets the PC to the ELF's entry point (`_start`), and enters at EL1 (default QEMU `virt`) or EL2 (`-machine virtualization=on`, or most real-hardware boot stacks delivering at EL2). The device-tree blob address is placed in `x0`; v1 ignores it. 2. **Assembly stub (`_start`).** Three phases: first, K3-12 (interrupts masked via `MSR DAIFSet, #0xf`) executes at the very head of the reset vector so a spurious interrupt cannot escape into an uninstalled vector table. Second, the EL drop (per [ADR-0024](../decisions/0024-el-drop-policy.md)) reads `CurrentEL`; on EL2 it configures `HCR_EL2` / `SPSR_EL2` / `ELR_EL2` and `eret`s to a post-drop label, on EL1 it falls through, on EL3 (or any unexpected EL) it halts in a named-label `wfe`-loop (`halt_unsupported_el: wfe ; b halt_unsupported_el`) — there is no Rust panic infrastructure pre-`kernel_entry`. Third, the conventional setup: load `__stack_top` into `SP`, enable FP/SIMD via `CPACR_EL1`, zero the BSS range (`__bss_start` .. `__bss_end`) using 8-byte stores, and branch to `kernel_entry`. If `kernel_entry` ever returns (it shouldn't), the stub falls into a defensive `wfe ; b 2b` halt loop. After phase two, every later instruction runs at EL1 — the precondition T-009's `UNSAFE-2026-0016` runtime check now relies on as a load-bearing invariant rather than a defensive guard. 3. **`kernel_entry` (Rust, in the BSP).** The first Rust code to run. Constructs the BSP's concrete HAL instances (for Phase 4c: the `Pl011Uart` console), installs the EL1 vector table (T-012), captures the boot-to-end timestamp, **activates the MMU** via `mmu_bootstrap` (T-016 / ADR-0027 — this lands the v1 identity layout in `TTBR0_EL1` and flips `SCTLR_EL1.{M,I,C} = 1`; every subsequent MMIO access goes through device-nGnRnE attributes), **initialises the Physical Memory Manager** (T-017 / ADR-0035 — bitmap allocator over the 128 MiB RAM extent with two reserved ranges covering the QEMU firmware region and the kernel image / `.bss` / `.boot_pt` / boot stack), **initialises the address-space arena** (T-018 / ADR-0028 — wraps the already-active L0 root frame as `AddressSpaceArena` slot 0 + mints the bootstrap AS authority cap; no `Mmu::create_address_space` call on the live root per ADR-0028 §Simulation row 0), **loads the embedded userspace placeholder image** via [`task_loader::load_image`](task-loader.md) (T-019 / ADR-0029 — produces a `LoadedImage` describing a freshly populated AS for the embedded `mov w0, #42; ret` blob; **does NOT execute** — runnability gates on B5/B6 per phase-b §B4 §Revision-notes; first runtime exerciser of [UNSAFE-2026-0025](../audits/unsafe-log.md) post-bootstrap `Mmu::map`, [UNSAFE-2026-0026](../audits/unsafe-log.md) `Pmm::alloc_frame` zero-fill, and [UNSAFE-2026-0027](../audits/unsafe-log.md) loader byte-copy), initialises the GIC, unmasks `DAIF.I`, prints the timer banner, then sets up the kernel-object arenas + capability tables + IPC + scheduler before transferring control. Marked `#[no_mangle] extern "C"` so the assembly stub can find it. -4. **`tyrne_kernel::run` (portable kernel).** Architecture- and board-agnostic. In Phase 4c v0.0.1 it writes a greeting to the console and halts with a `spin_loop` idle. Subsequent phases will bring up the scheduler, IPC, and capability system here before reaching steady state. +4. **Scheduler start (`start`).** The final call in `kernel_entry` is `start(SCHED.as_mut_ptr(), cpu, activate_address_space)`, which hands control to the cooperative FIFO scheduler and never returns; the scheduler runs the first ready task and drives the cooperative IPC demo until the system halts (see [scheduler.md](scheduler.md)). An early design intended a portable `tyrne_kernel::run` that a BSP would delegate to; the B-phase brought subsystem bring-up into `kernel_entry` instead, and `start` (defined in `kernel/src/sched/mod.rs`) is the actual handoff point. Consolidating the bring-up back into a portable kernel entry is a possible future refactor. ### Boot-time sequence @@ -24,7 +24,6 @@ sequenceDiagram participant QEMU as QEMU virt / firmware participant Asm as _start (asm stub) participant KE as kernel_entry (BSP, Rust) - participant K as tyrne_kernel::run participant U as PL011 UART QEMU->>Asm: PC = _start, DTB in x0 (ignored), entry EL = 1 or 2 @@ -155,7 +154,7 @@ post_eret: ### Panic path -When `tyrne_kernel::run` or any later kernel code panics, control reaches the BSP's `#[panic_handler]` function. In Phase 4c, that handler: +When `kernel_entry`, the scheduler, or any later kernel code panics, control reaches the BSP's `#[panic_handler]` function. In Phase 4c, that handler: 1. Reconstructs the `Pl011Uart` (the original instance may not be reachable from the panic context). 2. Writes a short marker (`"\n!! tyrne panic !!\n"`). diff --git a/docs/architecture/hal.md b/docs/architecture/hal.md index 0ddc0d4..78a9b16 100644 --- a/docs/architecture/hal.md +++ b/docs/architecture/hal.md @@ -42,15 +42,15 @@ flowchart TB TIrq["IrqController"] TTimer["Timer"] TConsole["Console"] - TIommu["Iommu (optional)"] + TIommu["Iommu (planned — stub)"] end subgraph BSP["BSP (per-board, selected at build time)"] - BCpu["aarch64 Cpu impl"] + BCpu["aarch64 Cpu + ContextSwitch impl"] BMmu["VMSAv8 Mmu impl"] BIrq["GICv2 / GIC-400 impl"] BTimer["ARM generic timer impl"] BConsole["PL011 / mini-UART impl"] - BIommu["SMMUv3 impl (bsp-qemu-virt)"] + BIommu["Iommu impl (planned — none yet)"] end subgraph HW["Hardware"] CPU["CPU cores"] @@ -73,17 +73,26 @@ This section describes the traits at a high level. Each trait will have a dedica #### `Cpu` -Privileged CPU state and control. Implementations are architecture-specific. +Privileged CPU state and control. Implementations are architecture-specific. The trait as shipped ([`hal/src/cpu.rs`](../../hal/src/cpu.rs)) carries exactly these methods: -- Current core identifier. -- Number of cores online. -- Enable / disable IRQs at the CPU level (PSTATE on aarch64). -- `wait_for_interrupt()` — low-power halt until the next interrupt. -- Context save / restore primitives used by the scheduler. -- Memory barriers that Rust's atomics do not cover. -- Secondary-core start via PSCI (or architecturally equivalent mechanism). +- `current_core_id()` — identifier of the core this call runs on. +- `disable_irqs()` / `restore_irq_state(state)` — mask CPU-level interrupts and later restore the saved mask (PSTATE/DAIF on aarch64). Paired to form a critical section; the `IrqGuard` RAII wrapper is layered on top. +- `wait_for_interrupt()` — low-power halt until the next interrupt (`WFI` on aarch64). +- `instruction_barrier()` — synchronize the instruction stream after writing privileged system registers (`ISB` on aarch64). Data memory barriers are covered by Rust's `core::sync::atomic::fence` and are not on this trait. -Most methods are `unsafe fn`. The kernel wraps them with safe helpers that encode the preconditions. +Context save / restore is **not** on `Cpu`; it lives in the separate `ContextSwitch` trait (see below). The trait deliberately exposes no "number of cores online," no secondary-core start (PSCI), and no `enable_interrupts()`; those are **future / planned** surfaces that arrive with the multi-core ADR. Boot-time interrupt unmasking is done by the BSP via DAIF manipulation (`restore_irq_state`) plus the GIC enable sequence, not a `Cpu` method. + +Most methods touch privileged state internally; the kernel wraps them with safe helpers that encode the preconditions. + +#### `ContextSwitch` + +Cooperative register save/restore for task switching. Settled by [ADR-0020](../decisions/0020-cpu-trait-v2-context-switch.md), which split this out of `Cpu` so that `Cpu` stays object-safe and so the `unsafe` audit surface around register manipulation is concentrated in one place rather than diffused across the CPU trait. Defined in [`hal/src/context_switch.rs`](../../hal/src/context_switch.rs). + +- An associated `TaskContext` type — the BSP-specific saved-register layout (`Default + Send`). +- `context_switch(current, next)` — `unsafe`; atomically saves the calling task's callee-saved register set into `current` and restores `next`. The caller must have interrupts disabled across the call. +- `init_context(ctx, entry, stack_top)` — `unsafe`; writes an initial register state so the first restore begins executing `entry` on `stack_top`. + +The scheduler is generic over this trait: `Scheduler` — the BSP type provides both the CPU control surface and the context-switch primitive, and the scheduler never inspects the saved context's contents (see [scheduler.md](scheduler.md)). #### `Mmu` @@ -142,15 +151,15 @@ A byte sink for the earliest possible diagnostic output. The console is used during boot (before the log service is up), during panic (when nothing else can be trusted), and — optionally, gated by a build flag — for debug output in development builds. -#### `Iommu` (platforms with an IOMMU) +#### `Iommu` (planned — stub) -Programs the system IOMMU (SMMUv3 on aarch64 platforms that have one) to scope a peripheral's DMA to the regions granted to its driver. +Programming the system IOMMU to scope a peripheral's DMA to the regions granted to its driver. The eventual responsibilities are: - Install a stream-to-address-space mapping. - Update or remove such a mapping. - Invalidate IOMMU caches when mappings change. -See [security-model.md — Trust boundary 7](security-model.md) for the security role the IOMMU plays. `bsp-qemu-virt` implements this trait; `bsp-pi4` does not (the Pi 4 has no IOMMU) and the trait is therefore either absent or a no-op on that target — an explicit decision in a future ADR. +**Current status.** QEMU virt is GICv2 / no IOMMU in v1; the `Iommu` trait ([`hal/src/lib.rs`](../../hal/src/lib.rs)) is an empty stub (`pub trait Iommu {}`) reserved for a future SMMUv3 ADR. No BSP implements it today — `bsp-qemu-virt` does not, and `bsp-pi4` has no IOMMU hardware at all. See [security-model.md — Trust boundary 7](security-model.md) for the security role the IOMMU will play and the honest description of the present DMA-scoping gap. ### BSP structure @@ -158,7 +167,7 @@ A BSP is a Rust crate named `bsp-` that provides: 1. **An entry point** — the architecture reset vector (`_start`), implemented as a small assembly stub that sets up a stack, zeros BSS, and jumps into Rust early-init. 2. **Early-init Rust code** that configures the MMU with a minimal identity + high-half mapping, installs exception vectors, and hands control to `kernel_main(boot_info)`. -3. **Implementations of the HAL traits** — `Cpu`, `Mmu`, `IrqController`, `Timer`, `Console`, and `Iommu` where applicable. +3. **Implementations of the HAL traits** — `Cpu`, `ContextSwitch`, `Mmu`, `IrqController`, `Timer`, and `Console`. (`Iommu` is a stub today; no BSP implements it yet — see the `Iommu` trait note above.) 4. **Board-specific constants** — MMIO base addresses, IRQ numbers, expected memory layout — as a `bsp::config` module, not spread across the crate. 5. **A linker script** specifying where the kernel image is loaded, where RAM begins, and any reserved regions. @@ -185,7 +194,7 @@ Runtime multi-board support (one kernel binary that detects its host and selects | Interrupt controller | GICv2 | | Console | PL011 UART at `0x0900_0000` | | Timer | ARM generic timer | -| IOMMU | SMMUv3 (optional, enabled with `-device smmuv3`; CI uses it) | +| IOMMU | none in v1 (QEMU virt is GICv2 / no IOMMU); the `Iommu` trait is a stub reserved for a future SMMUv3 ADR | | Boot loader | none — QEMU loads the ELF and jumps to `_start` | | Secondary-core start | PSCI | | Virtio | present (virtio-mmio); used by userspace drivers in future phases | @@ -235,10 +244,10 @@ sequenceDiagram Early->>Early: install exception vectors Early->>Early: configure Cpu trait state Early->>K: kernel_main(boot_info) - K->>HAL: Cpu::enable_interrupts() K->>HAL: Mmu::activate(kernel_tt) - K->>HAL: IrqController::init() + K->>HAL: IrqController init + GIC enable sequence K->>HAL: Timer::init() + K->>HAL: unmask DAIF.I (BSP, via the CPU IRQ-state path) K->>HAL: Console::write_bytes(b"tyrne: online\n") Note over K: scheduler, init task, steady state ``` diff --git a/docs/architecture/ipc.md b/docs/architecture/ipc.md index e1e1845..d11b447 100644 --- a/docs/architecture/ipc.md +++ b/docs/architecture/ipc.md @@ -11,7 +11,7 @@ Three Accepted ADRs and one Phase-A task fix the IPC design: - [ADR-0021: Raw-pointer scheduler IPC-bridge API](../decisions/0021-raw-pointer-scheduler-ipc-bridge.md) — the scheduler-side wrapper functions that block / yield / resume around the IPC primitives. The discipline behind the wrappers is summarised in [`scheduler.md`](scheduler.md) §"The raw-pointer bridge". - [T-003 (Phase A IPC primitive set)](../analysis/tasks/phase-a/T-003-ipc-primitives.md) and [T-005 (two-task IPC demo)](../analysis/tasks/phase-a/T-005-two-task-ipc-demo.md) shipped the implementation; [T-006](../analysis/tasks/phase-b/T-006-raw-pointer-scheduler-api.md) refactored the scheduler-side wrapper. -Why a custom IPC layer rather than reusing a port from another microkernel? ADR-0017 §"Decision drivers" enumerates: capability-system interaction (capabilities must move atomically with messages, which most ports do not), tight kernel-object coupling (endpoints share their state machine with the scheduler's wake path), and audit-friendliness (the entire IPC surface fits in one ~990-line file under `unsafe-policy.md` review). +Why a custom IPC layer rather than reusing a port from another microkernel? ADR-0017 §"Decision drivers" enumerates: capability-system interaction (capabilities must move atomically with messages, which most ports do not), tight kernel-object coupling (endpoints share their state machine with the scheduler's wake path), and audit-friendliness (the entire IPC surface fits in one ~1425-line file under `unsafe-policy.md` review). ## Design @@ -62,7 +62,7 @@ Depth is **one** — at most one sender or receiver waits per endpoint at a time The `RecvWaiting → Idle` reverse arc is the recovery primitive [`ipc_cancel_recv`][cancel-recv] added by [ADR-0032](../decisions/0032-endpoint-rollback-and-cancel-recv.md). It is consumed exclusively by `ipc_recv_and_yield`'s Phase 2 Deadlock branch in v1 (kernel-internal — no userspace caller exists), keeping the symmetric "error path leaves observable state unchanged" invariant: when the bridge returns `SchedError::Deadlock`, both the scheduler state *and* the endpoint state are restored to their pre-call shape, so a subsequent retry observes a clean `Idle` slot rather than `QueueFull`. Userspace-driven endpoint destroy (Phase B2+) and a future preemption-rollback path (B5+) will reuse the same primitive. -[cancel-recv]: https://github.com/cemililik/Tyrne/blob/main/kernel/src/ipc/mod.rs +[cancel-recv]: https://github.com/HodeTech/Tyrne/blob/main/kernel/src/ipc/mod.rs ### `IpcQueues`: states + slot generations diff --git a/docs/architecture/memory-management.md b/docs/architecture/memory-management.md index 429dc9a..66a5105 100644 --- a/docs/architecture/memory-management.md +++ b/docs/architecture/memory-management.md @@ -6,7 +6,7 @@ It synthesises [ADR-0009](../decisions/0009-mmu-trait.md) (the `Mmu` HAL trait), ## Why a memory-management chapter -The MMU is the architectural surface where capability semantics meet hardware. A `MemoryRegionCap` grant becomes a sequence of [`Mmu::map`](../../hal/src/mmu.rs) calls; a revocation becomes [`Mmu::unmap`](../../hal/src/mmu.rs) plus a TLB invalidation; a page fault from userspace becomes a synchronous exception that the capability system routes back to the offending task. None of that is reachable while translation is off, which is why B2's first commitment is to turn the MMU on. +The MMU is the architectural surface where capability semantics meet hardware. A `MemoryRegionCap` grant becomes a sequence of [`Mmu::map`](../../hal/src/mmu/mod.rs) calls; a revocation becomes [`Mmu::unmap`](../../hal/src/mmu/mod.rs) plus a TLB invalidation; a page fault from userspace becomes a synchronous exception that the capability system routes back to the offending task. None of that is reachable while translation is off, which is why B2's first commitment is to turn the MMU on. Activating the MMU is also the project's first architectural state-machine transition that *cannot fail gracefully on the first instruction after the flip*: a typo in any of the page-table entries, the `MAIR_EL1` encoding, the `TCR_EL1` configuration, or the `SCTLR_EL1` write produces a translation fault on the very next instruction-fetch. The simulation table in [ADR-0027 §Decision outcome / §Simulation](../decisions/0027-kernel-virtual-memory-layout.md#simulation) walks the worst-case interaction step-by-step — read it before changing anything in `bsp-qemu-virt/src/mmu.rs` or `bsp-qemu-virt/linker.ld`. @@ -129,7 +129,7 @@ for region in regions { mmu.invalidate_tlb_all(); ``` -Forgetting both `.flush()` and `.ignore()` is a compile error. This is the type-system-side encoding of the discipline the [2026-05-07 B1 closure retro §"What we learned"](../analysis/reviews/business-reviews/2026-05-07-B1-closure.md) codified into the [`write-adr` skill](../../.claude/skills/write-adr/SKILL.md) §Simulation rule: when reviewer attention is the only thing standing between a class of bug and shipping, prefer to encode the discipline in the type system. +Forgetting both `.flush()` and `.ignore()` is a compile error. This is the type-system-side encoding of the discipline the [2026-05-07 B1 closure retro §"What we learned"](../analysis/reviews/business-reviews/2026-05-07-B1-closure.md) codified into the [`write-adr` skill](../../.agents/skills/write-adr/SKILL.md) §Simulation rule: when reviewer attention is the only thing standing between a class of bug and shipping, prefer to encode the discipline in the type system. The token does not carry a `&Mmu` reference (which would require a lifetime parameter and complicate the return signature); the caller passes the `Mmu` reference at `.flush()` time. Mirrors the `x86_64` crate's `MapperFlush` shape — Rust ecosystem prior art for the same problem. diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index 2ff82ca..1fcd1f5 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -66,7 +66,8 @@ The HAL is the trait boundary between the kernel's portable core and any one boa HAL trait surface (final form documented in [`hal.md`](hal.md), Accepted): -- `Cpu` — disable / enable interrupts at the CPU level, halt / wait-for-interrupt, context-switch primitives. +- `Cpu` — mask / restore interrupts at the CPU level, halt / wait-for-interrupt, instruction barriers, current-core identity. +- `ContextSwitch` — register save/restore for cooperative task switching (a separate trait from `Cpu` per [ADR-0020](../decisions/0020-cpu-trait-v2-context-switch.md)). - `Mmu` — translation-table layout, entry installation, TLB invalidation. - `IrqController` — IRQ mask / unmask, acknowledge, end-of-interrupt. - `Timer` — monotonic clock, one-shot deadline arming. @@ -138,9 +139,9 @@ From that point, no code runs inside the kernel except in response to syscalls, IPC is the central operation of a microkernel. Tyrne will offer two IPC flavours, both mediated by the kernel, both capability-controlled. - **Synchronous rendezvous.** A sender issues `send(endpoint_cap, msg)` and blocks until a receiver pairs with the endpoint. Used for request-response. Inspired by seL4. -- **Asynchronous notification.** A sender fires a one-bit (or small-bit-field) notification that accumulates on the receiver's endpoint. Used for interrupts and low-rate signals. No block on the sender side. +- **Asynchronous notification.** A sender fires a one-bit (or small-bit-field) notification that accumulates in a notification object. Used for interrupts and low-rate signals. No block on the sender side. -Both flavours use the same `EndpointCap` kernel object, discriminated by capability rights at send/receive time. +The two flavours use **independent** kernel objects. Synchronous rendezvous uses `EndpointCap` (kernel object `Endpoint`). Asynchronous notification uses `NotificationCap` (kernel object `Notification`); a notification accumulates bits in the `Notification` object — no endpoint is involved. The full state machine for both is in [`ipc.md`](ipc.md). ```mermaid sequenceDiagram diff --git a/docs/architecture/scheduler.md b/docs/architecture/scheduler.md index f2f4bee..d7a25a9 100644 --- a/docs/architecture/scheduler.md +++ b/docs/architecture/scheduler.md @@ -24,7 +24,9 @@ classDiagram ready: SchedQueue~TASK_ARENA_CAPACITY~ task_states: [TaskState; TASK_ARENA_CAPACITY] task_handles: [Option~TaskHandle~; TASK_ARENA_CAPACITY] + task_address_space_handles: [Option~AddressSpaceHandle~; TASK_ARENA_CAPACITY] current: Option~TaskHandle~ + idle: Option~TaskHandle~ contexts: [C::TaskContext; TASK_ARENA_CAPACITY] } class SchedQueue~N~ { @@ -47,10 +49,12 @@ A `Scheduler` is parametrised by the BSP's CPU type so t - **`ready`** — a bounded FIFO queue (`SchedQueue`) of `TaskHandle`s. Capacity equals `TASK_ARENA_CAPACITY`, so the queue is structurally never full relative to the number of tasks that can exist. - **`task_states`** — one `TaskState` per arena slot. `Idle` means the slot is unoccupied; `Ready` means the task is in the ready queue or currently running; `Blocked { on }` means the task is parked on a specific endpoint waiting for a message. - **`task_handles`** — caches the slot's `TaskHandle` so the scheduler can wake a specific task by index without re-querying the arena. +- **`task_address_space_handles`** — per-slot `Option`, parallel to `task_handles`. Read by the activation-on-context-switch hook to decide whether `Mmu::activate` must fire before the architectural switch (per [ADR-0028](../decisions/0028-address-space-data-structure.md)). In v1 all tasks share the bootstrap address space, so the hook short-circuits. - **`current`** — `Some(handle)` of the running task, or `None` before `start` runs and during the brief window inside `ipc_recv_and_yield`'s Phase 2 block. +- **`idle`** — the idle-task fallback slot per [ADR-0026](../decisions/0026-idle-dispatch-fallback.md). Consulted only when `ready.dequeue()` returns `None` (`ready.dequeue().or(s.idle)`); idle's handle is deliberately **not** stored in `ready`, so it can never displace a real `Ready` task. - **`contexts`** — the BSP-defined `TaskContext` array. Each entry is the saved-register block for the slot's task. The scheduler never reads or writes the inside of a context; it only hands them to `cpu.context_switch`. -`Idle` here is the *task-state* sentinel for an unoccupied arena slot. It is unrelated to the **idle task** that ADR-0022 introduces — that is an ordinary `Ready` task whose body spins. +`Idle` here is the *task-state* sentinel for an unoccupied arena slot. It is unrelated to the **idle task** that ADR-0022 introduces and ADR-0026 moved into the dedicated `idle` fallback slot described above. ### Lifecycle of a task slot diff --git a/docs/architecture/security-model.md b/docs/architecture/security-model.md index b665293..298fa8d 100644 --- a/docs/architecture/security-model.md +++ b/docs/architecture/security-model.md @@ -265,7 +265,7 @@ The code the kernel runs is only as trustworthy as the sources it came from. Tyr - Reproducible builds are an explicit goal. - SBOM generation per release. - Signed release artifacts. -- Dependency additions follow the [add-dependency skill](../../.claude/skills/add-dependency/SKILL.md). +- Dependency additions follow the [add-dependency skill](../../.agents/skills/add-dependency/SKILL.md). ### Boot integrity (future) diff --git a/docs/audits/unsafe-log.md b/docs/audits/unsafe-log.md index 64407c6..fcadaae 100644 --- a/docs/audits/unsafe-log.md +++ b/docs/audits/unsafe-log.md @@ -607,3 +607,21 @@ Neither change touches the `copy_nonoverlapping` site itself; both correct contr [t-019-ac]: ../analysis/tasks/phase-b/T-019-task-loader.md#acceptance-criteria [UNSAFE-2026-0026]: #unsafe-2026-0026--pmm-frame-zeroing-via-coreptrwrite_bytes-in-pmmalloc_frame + +### UNSAFE-2026-0028 — wrap an already-live populated `VMSAv8` L0 root via `QemuVirtAddressSpace::from_existing_root` + +- **Introduced:** 2026-05-22, master-review remediation (WS-I) — audit-trail completion for an `unsafe fn` introduced earlier (T-018, AddressSpace bring-up) without its own log entry. The function and its sole call site are pre-existing; this entry closes the gap reported as master-review finding **MR-011 / X3-001**. +- **Location:** [`bsp-qemu-virt/src/mmu.rs`](../../bsp-qemu-virt/src/mmu.rs) — `QemuVirtAddressSpace::from_existing_root` (declaration); [`bsp-qemu-virt/src/main.rs`](../../bsp-qemu-virt/src/main.rs) — `kernel_entry`'s bootstrap-AS wrap block (sole call site). +- **Operation:** Construct a `QemuVirtAddressSpace` naming an **already-live, already-populated** `VMSAv8` L0 root translation-table frame **without** zero-filling it. This is a contract distinct from [`Mmu::create_address_space`](../../hal/src/mmu/mod.rs) (which requires a *zero-filled*, exclusively-owned root for PMM-allocated frames). `from_existing_root` exists precisely so `kernel_entry` can wrap the live bootstrap L0 frame — the one `mmu_bootstrap` populated and installed into `TTBR0_EL1` — into a kernel `AddressSpace` value without routing through the zero-fill constructor. +- **Invariants relied on:** + - **Root is the currently-live bootstrap L0 frame.** `root` is derived from the `__boot_pt_l0` linker symbol — the exact 4 KiB L0 frame `mmu_bootstrap` (UNSAFE-2026-0022/0023/0024) populated and wrote into `TTBR0_EL1`. The frame's 512 × 8-byte descriptors are correctly-encoded `VMSAv8` table/block/page descriptors with at least the kernel-half mappings populated (verified by the host-tested `tyrne_hal::mmu::vmsav8` encoders + the C7 bootstrap descriptor-math review). + - **Caller runs strictly after `mmu_bootstrap`.** The sole call site (`main.rs::kernel_entry`) executes the wrap block only after `mmu_bootstrap` has returned and the MMU is live; the `from_existing_root` precondition ("`root` is currently-live") is therefore satisfied at the call. + - **Exactly one such wrapper per boot — alias-freedom.** The bootstrap root is the only well-known already-live root in v1; `from_existing_root` is called once, on it, and no `create_address_space` ever wraps the same frame. No second wrapper aliases the live root. + - **No zero-fill is performed (and none must be).** Zero-filling a live L0 root would erase the kernel-half mappings the running CPU is actively translating through — i.e. it would unmap the kernel out from under itself. The absence of zero-fill is the *correct* behaviour for this constructor; it is exactly why the operation cannot route through `create_address_space`. + - **Subsequent `map`/`unmap` rely on the UNSAFE-2026-0025 walker invariants.** Any later `Mmu::map`/`Mmu::unmap` on the resulting `QemuVirtAddressSpace` performs `volatile` descriptor reads/writes down this root's chain under the index-bound + leaf-written-last discipline of UNSAFE-2026-0025; passing an arbitrary `PhysFrame` here would dereference garbage bytes as descriptors at the walker level. +- **Rejected alternatives:** + - **Route the bootstrap root through `Mmu::create_address_space`.** Rejected: that contract demands a *zero-filled* root (sound for fresh PMM-allocated frames, wrong for the live bootstrap root). Calling it on the live root would either require zero-filling (which would unmap the running kernel) or silently violate the trait's `# Safety` precondition. A separate inherent constructor with its own contract is the honest expression. + - **Make `from_existing_root` safe.** Rejected: the "root is a currently-live, correctly-encoded L0 table" precondition is not expressible in the type system (a `PhysFrame` is an aligned address, not a proof of live-and-populated descriptors), so the constructor must remain `unsafe fn` with a caller-side contract. + - **Attribute the call-site unsafety to UNSAFE-2026-0010 + 0014.** Rejected (was the pre-fix state): 0010 (StaticCell `Sync`) and 0014 (momentary `&mut` to the just-initialised arena) cover the *surrounding* publish mechanics, not the `from_existing_root` operation itself (wrapping a non-zero-filled live root). The call-site SAFETY block now narrows 0010/0014 to the StaticCell/arena lines they actually cover and cites this entry for the wrap. +- **Reviewed by:** @cemililik (+ Claude Opus 4.7 agent). Security-sensitive (boot + MMU root install) → second-reviewer required per [unsafe-policy §Review.4](../standards/unsafe-policy.md). +- **Status:** Active. The contract is sound and the sole caller honours it (C7-P5 / X3-001 confirm `mmu_bootstrap` populates the exact frame and that `kernel_entry` runs post-bootstrap). Smoke-verified at runtime: the 2026-05-14 QEMU trace wraps the bootstrap root on boot and runs the demo to completion (`tyrne: all tasks complete`) with no Translation/Permission faults; this is an audit-trail-completeness fix, not a behaviour change. diff --git a/docs/decisions/0004-target-platforms.md b/docs/decisions/0004-target-platforms.md index 9213c8d..ca3c599 100644 --- a/docs/decisions/0004-target-platforms.md +++ b/docs/decisions/0004-target-platforms.md @@ -4,6 +4,8 @@ - **Date:** 2026-04-20 - **Deciders:** @cemililik +> **Correction (2026-05-22).** This ADR's "GICv3" reference (§Decision outcome) is stale: QEMU virt is GICv2 / no IOMMU in v1; the `Iommu` trait is a stub reserved for a future SMMUv3 ADR. See [ADR-0036](0036-qemu-virt-gicv2-no-iommu-v1.md). The original body below is preserved unchanged for the historical record (append-only). + ## Context Tyrne needs a concrete first hardware target to make implementation choices — architecture-specific code, HAL surfaces, toolchain pinning — possible. The long-term vision spans constrained smart-home devices, single-board computers, and eventually mobile-class aarch64 SoCs. The short-term question is: *which platform do we bring up first, and what is the roadmap for the rest?* diff --git a/docs/decisions/0006-workspace-layout.md b/docs/decisions/0006-workspace-layout.md index 0cba056..781a9ee 100644 --- a/docs/decisions/0006-workspace-layout.md +++ b/docs/decisions/0006-workspace-layout.md @@ -4,6 +4,8 @@ - **Date:** 2026-04-20 - **Deciders:** @cemililik +> **Correction (2026-05-22).** This ADR's "GICv3 + PL011 + SMMUv3" BSP-role line (§Decision outcome) is stale: QEMU virt is GICv2 / no IOMMU in v1; the `Iommu` trait is a stub reserved for a future SMMUv3 ADR. See [ADR-0036](0036-qemu-virt-gicv2-no-iommu-v1.md). The original body below is preserved unchanged for the historical record (append-only). + ## Context Phase 4 of the project begins implementation. The architecture documents ([overview.md](../architecture/overview.md), [hal.md](../architecture/hal.md), [security-model.md](../architecture/security-model.md)) have settled that Tyrne is a narrow kernel core, a trait-based HAL, and per-board BSPs that implement the HAL. The Cargo workspace must reflect that decomposition concretely: the set of crates, their boundaries, and their roles. diff --git a/docs/decisions/0008-cpu-trait.md b/docs/decisions/0008-cpu-trait.md index 68fea2e..9d92793 100644 --- a/docs/decisions/0008-cpu-trait.md +++ b/docs/decisions/0008-cpu-trait.md @@ -153,6 +153,10 @@ The closure form (Option A) was rejected because the kernel has several places w - Pro: guard can be extended or replaced without touching the `Cpu` trait. - Con: callers write one extra line for RAII. Accepted. +## Revision notes + +- **2026-05-22 — `IrqGuard` changed from `&'a dyn Cpu` to generic ``.** The §Decision-outcome sketch shows `IrqGuard<'a>` holding `cpu: &'a dyn Cpu` (dynamic dispatch). The shipped type in [`hal/src/cpu.rs`](../../hal/src/cpu.rs) is `pub struct IrqGuard<'a, C: Cpu>` holding `cpu: &'a C` — a concrete generic, not a trait object. **Rationale (safety-relevant):** coercing a concrete CPU type to a trait object at certain inlining depths can produce vtable references that alias unrelated data in `.rodata`; using a concrete type parameter eliminates the coercion site entirely and also avoids fat-pointer vtable dispatch on critical-section paths. The rationale is documented at the type's rustdoc in `hal/src/cpu.rs`. **The `Cpu` trait itself remains object-safe** — `&dyn Cpu` is still the kernel's canonical handle for calling individual `Cpu` trait methods (the explicit-pair-plus-free-guard decision, Option C, is unchanged); only the `IrqGuard` wrapper switched to a concrete type parameter. This rider corrects the type signature only; the decision (RAII via a free-standing guard layered on an object-safe `Cpu`) stands. + ## References - [ADR-0006: Workspace layout](0006-workspace-layout.md). diff --git a/docs/decisions/0012-boot-flow-qemu-virt.md b/docs/decisions/0012-boot-flow-qemu-virt.md index 5564c5c..02fb0e5 100644 --- a/docs/decisions/0012-boot-flow-qemu-virt.md +++ b/docs/decisions/0012-boot-flow-qemu-virt.md @@ -4,6 +4,8 @@ - **Date:** 2026-04-20 - **Deciders:** @cemililik +> **Correction (2026-05-22).** This ADR's "`GICv3` distributor" label (§Decision drivers) is stale: QEMU virt is GICv2 / no IOMMU in v1; the `Iommu` trait is a stub reserved for a future SMMUv3 ADR. The address `0x0800_0000` is correct — only the version label is wrong. See [ADR-0036](0036-qemu-virt-gicv2-no-iommu-v1.md). The original body below is preserved unchanged for the historical record (append-only). + ## Context Phase 4c needs the kernel to boot. Getting the CPU from QEMU's entry point into `kernel_main` requires decisions that bake into the build artifact for the lifetime of the BSP: @@ -151,6 +153,22 @@ Each will be resolved by a future ADR or a paired update. - **Measured boot.** Hooks for a measurement register to record the boot code. Out of scope until Pi 4 hardware support and a TPM / secure-element substitute. - **`.init_array` / C++-style static init.** The kernel does not use these today; if a future dependency pulls them in, the linker script needs to call the init array from Rust. +## Revision notes + +- **2026-05-22 — §Memory-layout diagram updated for the `.boot_pt` reservation (ADR-0027).** The §Memory-layout diagram above predates [ADR-0027](0027-kernel-virtual-memory-layout.md), which added four bootstrap page-table frames (16 KiB) inside `.bss`. The original diagram is preserved (append-only); the current layout is: + + ```mermaid + flowchart TD + A["0x40080000 — _start (.text.boot)"] --> B[".text"] --> C[".rodata"] --> D[".data"] --> E[".bss — zeroed in _start"] + E --> F[".boot_pt — 4 × 4 KiB bootstrap page-table frames (16 KiB);
bracketed by __boot_pt_start / __boot_pt_end;
pre-zeroed by the BSS-zero loop (ADR-0027 / T-016)"] + E --> G["(other BSS)"] + F --> H["(64 KiB) — initial stack region"] + G --> H + H --> I["__stack_top"] + ``` + + The kernel image's load address stays at `0x4008_0000` and is identity-mapped (no mapped-vs-identity split in v1). See the §Open-questions "Boot-time MMU activation" rider above and [`bsp-qemu-virt/linker.ld`](../../bsp-qemu-virt/linker.ld) for the authoritative section ordering. + ## References - [ADR-0004: Target platforms and support tiers](0004-target-platforms.md). diff --git a/docs/decisions/0013-roadmap-and-planning.md b/docs/decisions/0013-roadmap-and-planning.md index 67e677c..120a940 100644 --- a/docs/decisions/0013-roadmap-and-planning.md +++ b/docs/decisions/0013-roadmap-and-planning.md @@ -6,7 +6,7 @@ ## Context -Tyrne is a multi-year solo project with AI assistance. The ADR process ([ADR-0001](0001-microkernel-architecture.md) onward) answers *why* decisions are made; the [standards](../standards/) answer *how* work is done; the [skills](../../.claude/skills/) encode procedures for recurring tasks. What is missing is the **sequencing and tracking layer**: which phase of the project we are in, which milestone is active, which task is being worked on, and what reviews happen when a milestone lands or when a change affects security / performance. +Tyrne is a multi-year solo project with AI assistance. The ADR process ([ADR-0001](0001-microkernel-architecture.md) onward) answers *why* decisions are made; the [standards](../standards/) answer *how* work is done; the [skills](../../.agents/skills/) encode procedures for recurring tasks. What is missing is the **sequencing and tracking layer**: which phase of the project we are in, which milestone is active, which task is being worked on, and what reviews happen when a milestone lands or when a change affects security / performance. A project with no deadline still needs a roadmap — not to meet dates, but so that work proceeds in a considered order, so that the maintainer can pause for weeks and return without having to page the entire project back in, and so that contributors (human or AI) arriving later know the direction without guessing. Without a plan, time does not produce progress; it produces ad-hoc choices that accumulate. @@ -126,7 +126,7 @@ Reviews are **event-triggered**, not calendar-triggered. See the individual revi ### Changing the roadmap -- **Adding a task** — use the [`start-task`](../../.claude/skills/start-task/SKILL.md) skill; new tasks get the next T-NNN and land under the right phase folder. +- **Adding a task** — use the [`start-task`](../../.agents/skills/start-task/SKILL.md) skill; new tasks get the next T-NNN and land under the right phase folder. - **Reordering tasks within a milestone** — edit `phases/phase-.md` and each affected task's frontmatter. - **Moving tasks between milestones (same phase)** — edit the phase file and the task's frontmatter. - **Moving tasks between phases** — move the file from `analysis/tasks/phase-/` to `analysis/tasks/phase-/`, update the task's frontmatter, note the move in the task's review-history section. @@ -136,9 +136,9 @@ Reviews are **event-triggered**, not calendar-triggered. See the individual revi ### Skills -- [`start-task`](../../.claude/skills/start-task/SKILL.md) — create a new task file in the correct phase folder, assign next T-NNN, update `current.md` on status transition. -- [`conduct-review`](../../.claude/skills/conduct-review/SKILL.md) — produce a review artifact, taking the review type as input, following that type's master plan. -- [`perform-code-review`](../../.claude/skills/perform-code-review/SKILL.md) and [`perform-security-review`](../../.claude/skills/perform-security-review/SKILL.md) — pre-existing skills for executing a review during development; they now also produce an artifact in the corresponding `analysis/reviews//` directory. +- [`start-task`](../../.agents/skills/start-task/SKILL.md) — create a new task file in the correct phase folder, assign next T-NNN, update `current.md` on status transition. +- [`conduct-review`](../../.agents/skills/conduct-review/SKILL.md) — produce a review artifact, taking the review type as input, following that type's master plan. +- [`perform-code-review`](../../.agents/skills/perform-code-review/SKILL.md) and [`perform-security-review`](../../.agents/skills/perform-security-review/SKILL.md) — pre-existing skills for executing a review during development; they now also produce an artifact in the corresponding `analysis/reviews//` directory. ### Integration with ADRs @@ -212,7 +212,7 @@ The roadmap does not replace ADRs. It sequences them. A task may require an ADR ## References -- Existing [ADRs](.), [standards](../standards/), [skills](../../.claude/skills/). +- Existing [ADRs](.), [standards](../standards/), [skills](../../.agents/skills/). - User story format — agile community prior art, adapted here. - Amazon six-pager / working-backwards — partial inspiration for the "what, why, definition of done" structure. - Hubris roadmap practices — public prior art. diff --git a/docs/decisions/0014-capability-representation.md b/docs/decisions/0014-capability-representation.md index 330c98f..bbaceb6 100644 --- a/docs/decisions/0014-capability-representation.md +++ b/docs/decisions/0014-capability-representation.md @@ -251,6 +251,10 @@ Explicit subtree walk, iterative using a small local stack buffer sized for `MAX - **Raising `CAP_TABLE_CAPACITY` and `MAX_DERIVATION_DEPTH`.** Both are revisited when a concrete use-case demands more. For now, both are `const` and documented. - **Adopting the `bitflags` crate.** `CapRights` is hand-rolled to keep the kernel dependency-free. [ADR-0009](0009-mmu-trait.md) has the same open question for `MappingFlags`; both may migrate together in a future ADR. +## Revision notes + +- **2026-05-22 — `CapError` carries seven variants, not the five sketched above.** The §Decision-outcome `CapError` sketch lists five variants (`CapsExhausted`, `InvalidHandle`, `WidenedRights`, `InsufficientRights`, `DerivationTooDeep`). The shipped enum in [`kernel/src/cap/mod.rs`](../../kernel/src/cap/mod.rs) adds two more: **`HasChildren`** (a `revoke`/operation refused because the capability still has live derivations) and **`WrongKind`** (a typed-accessor wrapper rejected because the slot holds a different `CapKind`). Both additions are append-only-safe by design: the enum is declared `#[non_exhaustive]` precisely so future ADRs can add variants without it being a breaking change (the sketch's own doc-comment says so). The five-variant sketch is preserved as the historical record; this rider records that the contract grew to seven by additive, `#[non_exhaustive]`-protected extension. The capability-representation decision (typed handle + rights bitfield + derivation tree) is unchanged. + ## References - [ADR-0001 — Capability-based microkernel architecture](0001-microkernel-architecture.md). diff --git a/docs/decisions/0017-ipc-primitive-set.md b/docs/decisions/0017-ipc-primitive-set.md index 4a02953..5389341 100644 --- a/docs/decisions/0017-ipc-primitive-set.md +++ b/docs/decisions/0017-ipc-primitive-set.md @@ -213,6 +213,7 @@ The `notify` operation is non-blocking: it ORs bits into the `Notification` word - **2026-04-27 — pointer to architecture doc.** [T-008](../analysis/tasks/phase-b/T-008-architecture-docs.md) created [`docs/architecture/ipc.md`](../architecture/ipc.md), which synthesises this ADR (three-primitive set, endpoint state machine, capability-transfer pre-flight) with [ADR-0021](0021-raw-pointer-scheduler-ipc-bridge.md) (the scheduler-bridge wrappers). The ADR body is unchanged; this rider provides the bidirectional cross-reference T-008's DoD asks for ("ADRs cited from architecture docs are the same ADRs whose §References sections cite the new architecture docs"). - **2026-05-07 — `ipc_cancel_recv` recovery primitive added (ADR-0032 / T-015).** [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md) introduces `ipc_cancel_recv(ep_arena, queues, ep_cap, table)` — a fourth function in `kernel/src/ipc/mod.rs` that reverses an `Idle → RecvWaiting` transition for the calling task. **It is a recovery primitive, not an extension of the user-observable IPC surface this ADR enumerated.** The user-observable set remains `send` / `recv` / `notify`; `cancel_recv` is consumed exclusively by the scheduler bridge's `ipc_recv_and_yield` Deadlock-rollback branch in v1 (kernel-internal). When userspace destroy paths land (Phase B2+), they may invoke it as a "drain receivers" sweep, and the future syscall-ABI ADR (currently pencilled as ADR-0030) decides whether to expose it directly. The `EndpointState` machine itself is unchanged — `cancel_recv` is a single-edge `RecvWaiting → Idle` reverse of an existing arc, not a new state. ADR-0017's *Decision outcome* (three-primitive set) is therefore not superseded; this rider records the additive recovery primitive that lands alongside it. +- **2026-05-22 — "ADR-0030" forward-reference is a reserved slot, not yet filed.** The "pencilled as ADR-0030" syscall-ABI reference in the 2026-05-07 rider above is a **reserved slot number** (the `phase-b.md` §B5 ADR ledger formally reserves ADR-0030 for the syscall ABI and ADR-0031 for the initial syscall set), not a claim that a file exists. No `docs/decisions/0030-*.md` file exists yet; the slot opens with B5 userspace work. If the syscall ABI eventually lands under a different number, this reference is the one to update. (Mirrors the ADR-0033/0034 named-but-unallocated placeholder pattern.) ## References diff --git a/docs/decisions/0019-scheduler-shape.md b/docs/decisions/0019-scheduler-shape.md index 8d6b201..dc3b206 100644 --- a/docs/decisions/0019-scheduler-shape.md +++ b/docs/decisions/0019-scheduler-shape.md @@ -213,6 +213,13 @@ impl Scheduler { - **2026-04-27 — pointer to architecture doc.** [T-008](../analysis/tasks/phase-b/T-008-architecture-docs.md) created [`docs/architecture/scheduler.md`](../architecture/scheduler.md), which synthesises this ADR (FIFO ready queue + bounded arena), [ADR-0020](0020-cpu-trait-v2-context-switch.md) (`ContextSwitch` split), [ADR-0021](0021-raw-pointer-scheduler-ipc-bridge.md) (raw-pointer bridge), and [ADR-0022](0022-idle-task-and-typed-scheduler-deadlock.md) (idle task + `SchedError::Deadlock`) into a single readable picture of the scheduler's `how`. The ADR body is unchanged; this rider provides the bidirectional cross-reference T-008's DoD asks for. The "Idle task" open question above was settled by ADR-0022. +- **2026-05-22 — the §Public-API sketch is superseded in shape by ADR-0021/0026/0028.** The `&mut self` method sketch in §Public API and the separate `TaskContexts` struct above predate three later ADRs and no longer match the shipped [`kernel/src/sched/mod.rs`](../../kernel/src/sched/mod.rs). The original sketch is preserved (append-only); for the evolved shape, read: + - [ADR-0021](0021-raw-pointer-scheduler-ipc-bridge.md) — `yield_now` / `ipc_send_and_yield` / `ipc_recv_and_yield` are now module-level `unsafe fn` **free functions** taking `*mut Scheduler` (the raw-pointer bridge), not `&mut self` methods. The `TaskContexts` struct is **inlined directly into `Scheduler`** as a `contexts: [C::TaskContext; TASK_ARENA_CAPACITY]` field; no separate `TaskContexts` type exists. + - [ADR-0026](0026-idle-dispatch-fallback.md) — adds the dedicated `idle: Option` fallback slot (the "Idle task" open question's production shape; the 2026-04-27 rider noted ADR-0022 settled it, ADR-0026 then superseded the idle-task-location axis). + - [ADR-0028](0028-address-space-data-structure.md) — adds the `task_address_space_handles: [Option; TASK_ARENA_CAPACITY]` parallel array and the `activate_address_space: impl FnOnce(AddressSpaceHandle)` activation-on-context-switch hook parameter threaded through the bridge functions. + + The §Decision outcome (FIFO ready queue + bounded parallel arenas + `current` slot) is unchanged by these; only the call shape and the field set evolved. + ## References - [ADR-0017: IPC primitive set](0017-ipc-primitive-set.md) — the IPC layer this scheduler wires up. diff --git a/docs/decisions/0020-cpu-trait-v2-context-switch.md b/docs/decisions/0020-cpu-trait-v2-context-switch.md index 4e6c7ef..ddfda21 100644 --- a/docs/decisions/0020-cpu-trait-v2-context-switch.md +++ b/docs/decisions/0020-cpu-trait-v2-context-switch.md @@ -315,6 +315,14 @@ impl ContextSwitch for FakeCpu { - **2026-04-27 — pointer to architecture doc.** [T-008](../analysis/tasks/phase-b/T-008-architecture-docs.md) added [`docs/architecture/scheduler.md`](../architecture/scheduler.md), which synthesises this ADR's `Cpu` / `ContextSwitch` split alongside [ADR-0019](0019-scheduler-shape.md) into a single readable picture. The ADR body is unchanged; this rider provides the bidirectional cross-reference T-008's DoD asks for. +- **2026-05-22 — `Aarch64TaskContext` is 168 bytes, not 104; `d8`–`d15` ARE saved (not deferred).** The shipped struct in [`bsp-qemu-virt/src/cpu.rs`](../../bsp-qemu-virt/src/cpu.rs) adds a field `pub d8_d15: [u64; 8]` (8 × 8 = 64 bytes), making the total **168 bytes** — confirmed by the compile-time `const _: () = assert!(core::mem::size_of::() == 168)` and by `context_switch_asm`, which saves and restores `d8`–`d15` on every switch. + + This **supersedes the §Neutral notes above** that state `Aarch64TaskContext` is 104 bytes and that "NEON / FP registers deferred … `d8`–`d15` are not saved in v1," and the §Open-questions "FP / NEON context save … Deferred to Phase B" line. The `d8`–`d15` save was implemented in the **same arc** as the initial draft, not deferred to a later Phase B ADR. + + Rationale: `d8`–`d15` are the lower 64 bits of the AAPCS64 callee-saved SIMD/FP registers `v8`–`v15`, and AAPCS64 requires them to be preserved across calls whenever FP is enabled — i.e. whenever `CPACR_EL1.FPEN ≠ 0`, which the BSP sets. Omitting them would silently corrupt FP/NEON state across context switches (wrong results, not a crash). + + The §Decision-outcome `ContextSwitch` *trait* and the `Cpu` v2 split are unchanged by this rider; only the concrete `Aarch64TaskContext` size and register-save set are corrected. (Code-side note: the normative `# Safety` contract in [`hal/src/context_switch.rs`](../../hal/src/context_switch.rs) likewise under-enumerates the callee-saved set and is corrected separately under the same review; this rider records only the ADR-side correction.) + ## References - [ADR-0008: `Cpu` HAL trait signature (v1)](0008-cpu-trait.md) — the existing trait this ADR extends by addition. diff --git a/docs/decisions/0023-cross-table-capability-revocation-policy.md b/docs/decisions/0023-cross-table-capability-revocation-policy.md index 2f76b52..834c8aa 100644 --- a/docs/decisions/0023-cross-table-capability-revocation-policy.md +++ b/docs/decisions/0023-cross-table-capability-revocation-policy.md @@ -35,7 +35,7 @@ When the trigger fires, the replacement ADR will need a Decision outcome that pi - **Option C — Explicit `revoke_transferred(token)` syscall.** Sender records a transfer token at transfer time; cancel-token primitive walks every table and revokes any entry matching the token. Lightweight per-transfer; expensive at revoke time (linear scan over all tables). - **Option D — Defer indefinitely with userspace responsibility** (what v1 implements). Userspace patterns that need post-transfer revocation must implement it in protocol (e.g., periodic re-authentication; supervisor-mediated indirection). -A real ADR-0023 will need a *Simulation* table per the [Decision outcome](#decision-outcome-not-applicable-deferred) discipline introduced by [ADR-0026](0026-idle-dispatch-fallback.md) and codified in the [write-adr skill](../../.claude/skills/write-adr/SKILL.md): walk the worst-case (sender transfers cap to receiver A → A re-derives a sub-cap to B → sender revokes original → both A's copy and B's sub-derivation must die under the chosen Option). The Simulation is what surfaces the cross-table-CDT-vs-back-pointer-vs-token tradeoff that prose alone hides. +A real ADR-0023 will need a *Simulation* table per the [Decision outcome](#decision-outcome-not-applicable--deferred) discipline introduced by [ADR-0026](0026-idle-dispatch-fallback.md) and codified in the [write-adr skill](../../.agents/skills/write-adr/SKILL.md): walk the worst-case (sender transfers cap to receiver A → A re-derives a sub-cap to B → sender revokes original → both A's copy and B's sub-derivation must die under the chosen Option). The Simulation is what surfaces the cross-table-CDT-vs-back-pointer-vs-token tradeoff that prose alone hides. ## Decision drivers @@ -48,7 +48,7 @@ These are the drivers a real ADR-0023 will need to weigh; recorded here so the p ## Decision outcome (not applicable — Deferred) -No decision today. The ADR is a placeholder; when a trigger fires (see *Why deferred* above), a real ADR-0023 supersedes this body via the [supersede-adr skill](../../.claude/skills/supersede-adr/SKILL.md), or — preferred for placeholders — this body is rewritten in place with a Status flip from `Deferred` to `Proposed` (then `Accepted`). The Status flip is not subject to the append-only rule that protects original Accepted bodies, because a `Deferred` placeholder is not a load-bearing decision artefact. +No decision today. The ADR is a placeholder; when a trigger fires (see *Why deferred* above), a real ADR-0023 supersedes this body via the [supersede-adr skill](../../.agents/skills/supersede-adr/SKILL.md), or — preferred for placeholders — this body is rewritten in place with a Status flip from `Deferred` to `Proposed` (then `Accepted`). The Status flip is not subject to the append-only rule that protects original Accepted bodies, because a `Deferred` placeholder is not a load-bearing decision artefact. ### Simulation diff --git a/docs/decisions/0025-adr-governance-amendments.md b/docs/decisions/0025-adr-governance-amendments.md index a10cf1f..c5f1e80 100644 --- a/docs/decisions/0025-adr-governance-amendments.md +++ b/docs/decisions/0025-adr-governance-amendments.md @@ -133,7 +133,7 @@ For the two rules above to be fully in effect: - [ADR-0013 — Roadmap and planning process](0013-roadmap-and-planning.md) — the parent ADR these rules amend. - [`docs/standards/unsafe-policy.md §3`](../standards/unsafe-policy.md) — the audit-log append-only policy whose pattern the ADR rider rule mirrors. -- [`.claude/skills/write-adr/SKILL.md`](../../.claude/skills/write-adr/SKILL.md) — updated in commit `56fd9eb` to encode the dependency-chain procedure; cool-down step removed in the follow-up. +- [`.agents/skills/write-adr/SKILL.md`](../../.agents/skills/write-adr/SKILL.md) — updated in commit `56fd9eb` to encode the dependency-chain procedure; cool-down step removed in the follow-up. - [`docs/decisions/template.md`](template.md) — updated in commit `56fd9eb` to include the "Dependency chain" subsection. - [T-009 mini-retro](../analysis/reviews/business-reviews/2026-04-27-T-009-mini-retro.md) — the retrospective that produced the rules. - [ADR-0021](0021-raw-pointer-scheduler-ipc-bridge.md) and [ADR-0022](0022-idle-task-and-typed-scheduler-deadlock.md) — the four-rider data points that motivated all three drafts (including the withdrawn cool-down). diff --git a/docs/decisions/0026-idle-dispatch-fallback.md b/docs/decisions/0026-idle-dispatch-fallback.md index 872badb..808fce7 100644 --- a/docs/decisions/0026-idle-dispatch-fallback.md +++ b/docs/decisions/0026-idle-dispatch-fallback.md @@ -166,8 +166,8 @@ UNSAFE-2026-0021's `Pending QEMU smoke verification` clearance is **not** in T-0 ## Revision notes -- **2026-05-07 — §Simulation rule was retro-extracted from this ADR's *Decision outcome* table.** The discipline now codified in [`write-adr` skill §5 + §Acceptance criteria](../../.claude/skills/write-adr/SKILL.md) (commit `77a578a`, "*for every multi-step state-machine ADR, the body must include a 3–5 row table walking the worst-case interaction through the chosen shape*") was extracted from this ADR's *Decision outcome* §Simulation table on 2026-05-07, **after** the table itself had landed (2026-05-06, single-commit `10dea48`). The 2026-05-07 codifying commit landed 84 seconds before commit `4aa4b24` proposed [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md) — the first ADR drafted *under* the new rule. Read order matters: this ADR's table is the empirical source; the skill rule is the retro-extraction; ADR-0032's table is the first forward application. The [B1 closure retrospective (2026-05-07) §"What we learned"](../analysis/reviews/business-reviews/2026-05-07-B1-closure.md) records the same chronology in narrative form. Recorded here at the artefact layer so future readers do not infer that the simulation-table discipline pre-existed both ADRs. -- **2026-05-07 — single-commit Propose+Accept landing reconciliation.** This ADR's Propose+Accept landed in a single commit (`10dea48`) under the [`supersede-adr` skill §7 solo-phase combined-commit clause](../../.claude/skills/supersede-adr/SKILL.md), which permits one combined commit "if the decision is already settled" — the smoke-regression hotfix framing made the supersession near-mechanical (the *typed-error* axis stands; only the *idle-task-location* axis flips A → B). The substance the [`write-adr` skill §10 separate-Accept-commit rule](../../.claude/skills/write-adr/SKILL.md) protects (a careful-re-read pass showing up as its own diff) was instead executed by the multi-agent comprehensive-review's Track A (kernel correctness) running the *same day* and approving the dispatch-path simulation table. The two skill clauses are in tension by their plain text; the `supersede-adr` exception applies here because the supersession is *reorganisational* (no new state-machine surface; the dispatcher chain `ready.dequeue().or(s.idle)` is mechanically derivable from ADR-0022's "idle is in the queue" framing) rather than *introductory*. **Future supersessions should follow the `supersede-adr` §7 combined-commit form only when the same reorganisational property holds; introductory state-machine ADRs (e.g. ADR-0032) follow `write-adr` §10's separate-commit discipline.** [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md)'s separate Propose / Accept commits (`4aa4b24` / `db24d6d`) are the project's first execution of the latter form. The [2026-05-07 PR #12-#17 multi-axis review's Track G §MIN-G1](../analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-g-process.md) is the codifying source of this reconciliation rider. +- **2026-05-07 — §Simulation rule was retro-extracted from this ADR's *Decision outcome* table.** The discipline now codified in [`write-adr` skill §5 + §Acceptance criteria](../../.agents/skills/write-adr/SKILL.md) (commit `77a578a`, "*for every multi-step state-machine ADR, the body must include a 3–5 row table walking the worst-case interaction through the chosen shape*") was extracted from this ADR's *Decision outcome* §Simulation table on 2026-05-07, **after** the table itself had landed (2026-05-06, single-commit `10dea48`). The 2026-05-07 codifying commit landed 84 seconds before commit `4aa4b24` proposed [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md) — the first ADR drafted *under* the new rule. Read order matters: this ADR's table is the empirical source; the skill rule is the retro-extraction; ADR-0032's table is the first forward application. The [B1 closure retrospective (2026-05-07) §"What we learned"](../analysis/reviews/business-reviews/2026-05-07-B1-closure.md) records the same chronology in narrative form. Recorded here at the artefact layer so future readers do not infer that the simulation-table discipline pre-existed both ADRs. +- **2026-05-07 — single-commit Propose+Accept landing reconciliation.** This ADR's Propose+Accept landed in a single commit (`10dea48`) under the [`supersede-adr` skill §7 solo-phase combined-commit clause](../../.agents/skills/supersede-adr/SKILL.md), which permits one combined commit "if the decision is already settled" — the smoke-regression hotfix framing made the supersession near-mechanical (the *typed-error* axis stands; only the *idle-task-location* axis flips A → B). The substance the [`write-adr` skill §10 separate-Accept-commit rule](../../.agents/skills/write-adr/SKILL.md) protects (a careful-re-read pass showing up as its own diff) was instead executed by the multi-agent comprehensive-review's Track A (kernel correctness) running the *same day* and approving the dispatch-path simulation table. The two skill clauses are in tension by their plain text; the `supersede-adr` exception applies here because the supersession is *reorganisational* (no new state-machine surface; the dispatcher chain `ready.dequeue().or(s.idle)` is mechanically derivable from ADR-0022's "idle is in the queue" framing) rather than *introductory*. **Future supersessions should follow the `supersede-adr` §7 combined-commit form only when the same reorganisational property holds; introductory state-machine ADRs (e.g. ADR-0032) follow `write-adr` §10's separate-commit discipline.** [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md)'s separate Propose / Accept commits (`4aa4b24` / `db24d6d`) are the project's first execution of the latter form. The [2026-05-07 PR #12-#17 multi-axis review's Track G §MIN-G1](../analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-g-process.md) is the codifying source of this reconciliation rider. ## References diff --git a/docs/decisions/0027-kernel-virtual-memory-layout.md b/docs/decisions/0027-kernel-virtual-memory-layout.md index e1cc282..3784a5f 100644 --- a/docs/decisions/0027-kernel-virtual-memory-layout.md +++ b/docs/decisions/0027-kernel-virtual-memory-layout.md @@ -12,9 +12,9 @@ The B2 milestone scope, per [phase-b.md §B2](../roadmap/phases/phase-b.md), is 1. **Identity vs. high-half split.** Where does the kernel image live in virtual address space the moment the MMU comes on? Identity-mapped at its physical load address, or relocated to a high-half base (ARM convention `0xFFFF_FFFF_8000_0000+`)? 2. **Memory-type attributes.** `MAIR_EL1` carries up to eight named attribute encodings; the kernel must commit to which indices represent which memory types — at minimum *normal cached* for RAM and *device-nGnRnE* for MMIO — so that page-table entries can encode the right `AttrIndx` value. -3. **TLB-invalidation discipline.** Every mapping mutation needs a matching TLB invalidate. Forgetting that step is a class-of-bug that produces stale-translation hazards which only surface under load. The HAL's [`Mmu`](../../hal/src/mmu.rs) trait currently returns `Result<(), MmuError>` from `map` / `unmap` — leaving the *did-the-caller-flush?* question to reviewer judgement. Should the trait surface make the responsibility unmissable? +3. **TLB-invalidation discipline.** Every mapping mutation needs a matching TLB invalidate. Forgetting that step is a class-of-bug that produces stale-translation hazards which only surface under load. The HAL's [`Mmu`](../../hal/src/mmu/mod.rs) trait currently returns `Result<(), MmuError>` from `map` / `unmap` — leaving the *did-the-caller-flush?* question to reviewer judgement. Should the trait surface make the responsibility unmissable? -The MMU-activation moment is itself a multi-step state-machine transition: the kernel runs at PA `0x4008_0000` with `SCTLR_EL1.M = 0`; we build page tables; we configure `MAIR_EL1`, `TCR_EL1`, `TTBR0_EL1`; we set `SCTLR_EL1.M = 1`; from the next instruction onwards the PC and every load go through the TLB. Getting any of those steps in the wrong order produces an instruction-fetch fault on the very next instruction — a class of hazard the [2026-05-06 B1 smoke regression](../analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md) taught the project to walk through with a §Simulation table before Accept. ADR-0027 is the **first non-recovery-primitive state-machine ADR drafted under the [`write-adr` skill](../../.claude/skills/write-adr/SKILL.md) §Simulation discipline** — ADR-0026's table was the empirical retro-source for the rule (it landed alongside the rule's codification); [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md)'s Propose commit was the first ADR drafted *under* the rule but its subject is a recovery primitive (the [`ipc_cancel_recv`](../../kernel/src/ipc/mod.rs) error-rollback path), so the rule's "multi-step state machine in the productive design" target lands here for the first time. +The MMU-activation moment is itself a multi-step state-machine transition: the kernel runs at PA `0x4008_0000` with `SCTLR_EL1.M = 0`; we build page tables; we configure `MAIR_EL1`, `TCR_EL1`, `TTBR0_EL1`; we set `SCTLR_EL1.M = 1`; from the next instruction onwards the PC and every load go through the TLB. Getting any of those steps in the wrong order produces an instruction-fetch fault on the very next instruction — a class of hazard the [2026-05-06 B1 smoke regression](../analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md) taught the project to walk through with a §Simulation table before Accept. ADR-0027 is the **first non-recovery-primitive state-machine ADR drafted under the [`write-adr` skill](../../.agents/skills/write-adr/SKILL.md) §Simulation discipline** — ADR-0026's table was the empirical retro-source for the rule (it landed alongside the rule's codification); [ADR-0032](0032-endpoint-rollback-and-cancel-recv.md)'s Propose commit was the first ADR drafted *under* the rule but its subject is a recovery primitive (the [`ipc_cancel_recv`](../../kernel/src/ipc/mod.rs) error-rollback path), so the rule's "multi-step state machine in the productive design" target lands here for the first time. The decision is load-bearing for the next four ADRs the phase-b ledger reserves: ADR-0028 (address-space data structure) inherits the `AddressSpace` shape from this ADR's TTBR / page-table topology; ADR-0029 (initial userspace image format) inherits the kernel-vs-user VA boundary settled here; ADR-0030 (syscall ABI) inherits the page-fault / capability-grant story; ADR-0031 / future MMU follow-ups (ASID assignment, copy-on-write, huge pages) all build on the same layout. @@ -99,7 +99,7 @@ The 5-step shape is identical to Linux's aarch64 boot's `__cpu_setup` → `__pri For this decision to be fully in effect: ```text -1. Extend [`hal::mmu`](../../hal/src/mmu.rs) with the `MapperFlush` +1. Extend [`hal::mmu`](../../hal/src/mmu/mod.rs) with the `MapperFlush` typed flush token; change `Mmu::map` / `Mmu::unmap` return types to thread the token. Update the in-tree `test-hal` impl (currently `tyrne-test-hal::TestMmu`) to return tokens. — T-016 @@ -181,7 +181,7 @@ T-016's `Done` flip gates only on its own DoD (host-tests + miri + clippy + kern - **No change to ADR-0017's IPC primitive set.** The MMU surface is internal infrastructure; user-observable IPC primitives (`send` / `recv` / `notify`) are untouched. ADR-0017 §Revision notes does not need a rider. - **No change to `SchedError` / `IpcError` taxonomies.** MMU faults raise CPU exceptions handled by [T-012](../analysis/tasks/phase-b/T-012-exception-and-irq-infrastructure.md)'s vector table; they do not surface as scheduler / IPC errors in v1. A future ADR (preemption / fault-handling ABI) defines how MMU faults from userspace map to capability-system errors. - **Bootstrap `mmu_bootstrap` runs once per boot.** It is **not** part of `Mmu` trait. The trait's `create_address_space` / `activate` are for *post-bootstrap* address-space management (dynamic mappings, B3+); bootstrap is BSP-internal. -- **No new ADR governance burden.** This ADR follows the [`write-adr` skill](../../.claude/skills/write-adr/SKILL.md) §Simulation discipline (codified in commit `77a578a`); the §Dependency chain section satisfies [ADR-0025 §Rule 1](0025-adr-governance-amendments.md) (every forward-reference is grounded in T-016 which opens with this ADR's Propose commit). +- **No new ADR governance burden.** This ADR follows the [`write-adr` skill](../../.agents/skills/write-adr/SKILL.md) §Simulation discipline (codified in commit `77a578a`); the §Dependency chain section satisfies [ADR-0025 §Rule 1](0025-adr-governance-amendments.md) (every forward-reference is grounded in T-016 which opens with this ADR's Propose commit). ## Pros and cons of the options @@ -213,6 +213,10 @@ T-016's `Done` flip gates only on its own DoD (host-tests + miri + clippy + kern - Pro: ADR-0033 placeholder gives a B5 reader a clear forward-pointer. - Con: Adds one named-but-not-yet-opened ADR slot to the project's mental load. *Mitigation:* the slot is named, not allocated; per [ADR-0025 §Rule 1](0025-adr-governance-amendments.md), no T-NNN is opened today, and the ADR-0033 file does not exist until B5 surfaces the requirement (mirrors the ADR-0023 placeholder pattern, which has the file but explicitly Deferred status). +## Revision notes + +- **2026-05-22 — ADR-0030 / ADR-0031 §Context references are reserved slots, not yet-filed files.** §Context names ADR-0030 (syscall ABI) and ADR-0031 (MMU follow-ups / ASID assignment) as future ADRs. These are **reserved slot numbers** — the `phase-b.md` §B5 ADR ledger formally reserves ADR-0030 (Syscall ABI) and ADR-0031 (Initial syscall set / MMU follow-ups) — not claims that files exist today. No `docs/decisions/0030-*.md` or `0031-*.md` file exists yet; both open with B5 userspace work. This matches the §Dependency-chain treatment of ADR-0033 / ADR-0034 (named-but-unallocated placeholders). If the syscall ABI eventually lands under a different number, the §Context references here and in [ADR-0017](0017-ipc-primitive-set.md) / [ADR-0028](0028-address-space-data-structure.md) / [ADR-0029](0029-initial-userspace-image-format.md) are the ones to update. + ## References - [ADR-0009 — `Mmu` HAL trait signature (v1)](0009-mmu-trait.md) — the trait this ADR extends with the `MapperFlush` token return type. diff --git a/docs/decisions/0028-address-space-data-structure.md b/docs/decisions/0028-address-space-data-structure.md index 61ec704..b71d561 100644 --- a/docs/decisions/0028-address-space-data-structure.md +++ b/docs/decisions/0028-address-space-data-structure.md @@ -180,6 +180,10 @@ Forward-flag (not blocking Accept, per [ADR-0025 §Rule 1](0025-adr-governance-a - Con: Either heap-allocation (rejected; kernel has no heap) or a typed-arena-of-trait-objects pattern that [ADR-0016](0016-kernel-object-storage.md) doesn't have today. The typed-arena variant would require either `Box` boxed-into-arena-slot (heap) or non-trivial dyn-trait-arena infrastructure (new audit weight). - Con: The trait-object design premise — "the implementation set is open and runtime-variable" — is wrong for the kernel: exactly one `Mmu` impl is statically linked per BSP. Trait objects are the wrong tool for the static-dispatch use case. +## Revision notes + +- **2026-05-22 — activation TLB-flush behaviour: read §Simulation row 3 *together with* §Consequences→Negative.** The implementation landed strictly more conservative than the original "no auto-flush" sketch: `QemuVirtMmu::activate` issues `MSR TTBR0_EL1; ISB; DSB ISHST; TLBI VMALLE1; DSB ISH; ISB` on every address-space switch (a full TLB flush), because single-core v1 runs with `TCR_EL1.AS = 0` (no per-task ASID isolation), so a global invalidate is the safe choice. This correction is **already captured** in the §Simulation row 3 "State post" cell ("TLB **flushed** … more conservative than the 'no auto-flush' note in the original design") and in the §Negative bullet ("the original ADR description called this row 'activation-without-TLB-flush', which was advisory; the implementation landed strictly more conservative in T-018 commit 2"). This rider exists only to make the correction discoverable from a single place: the two passages must be read together. [ADR-0033](0027-kernel-virtual-memory-layout.md#decision-outcome) (high-half migration placeholder) will revisit when B5+ introduces ASID-based isolation. + ## References - [ADR-0009 — `Mmu` HAL trait signature (v1)](0009-mmu-trait.md) — the trait this ADR consumes; defines the [`AddressSpace`](../../hal/src/mmu/mod.rs) associated type and the `create_address_space` / `address_space_root` / `activate` / `map` / `unmap` surface. diff --git a/docs/decisions/0029-initial-userspace-image-format.md b/docs/decisions/0029-initial-userspace-image-format.md index 3b65867..09f00c6 100644 --- a/docs/decisions/0029-initial-userspace-image-format.md +++ b/docs/decisions/0029-initial-userspace-image-format.md @@ -39,7 +39,7 @@ Specifically: - **Mapping flags choice** is owned by **T-019**, not this ADR — this ADR settles the **format** (raw flat bytes), not how those bytes get mapped. T-019 §Approach pins the v1 flags as `MappingFlags::USER | MappingFlags::EXECUTE` for the image region and `MappingFlags::USER | MappingFlags::WRITE` for the stack region; the per-section R-only / RX-only / RW-only discipline is the future [ADR-0034 (kernel-image section permissions)][adr-0034-placeholder] placeholder's responsibility, gated on the first attacker-observable execution context (B5+). - **Entry point.** Always at the start of the embedded blob (offset 0 ↔ VA = ``). No `e_entry`-style indirection. -[adr-0034-placeholder]: 0027-kernel-virtual-memory-layout.md +[adr-0034-placeholder]: 0027-kernel-virtual-memory-layout.md#decision-outcome - **VA placement.** A fixed userspace base VA is implementation-detail of T-019, not this ADR — the VA range scoping decision is owned by [T-019's Approach](../analysis/tasks/phase-b/T-019-task-loader.md) and bounded by [ADR-0027 §Decision outcome (a)](0027-kernel-virtual-memory-layout.md)'s `TTBR0_EL1` range. The loader maps the blob at a single contiguous VA range determined at compile time per the userspace linker script. - **Build pipeline (B4 / T-019 — placeholder blob).** T-019 ships with a **hand-coded** placeholder blob: a small `&[u8]` literal (e.g. `[0x40, 0x00, 0x80, 0xd2, 0xc0, 0x03, 0x5f, 0xd6]` for `mov w0, #42; ret`) embedded into the BSP at compile time — sufficient to exercise the loader's `cap_create_address_space` + `cap_map` + `LoadedImage`-return path under host tests + the smoke trace without depending on a userspace toolchain. **No `cargo build`-to-`objcopy` pipeline lands with T-019.** (The bytes shown here are the Accept-state illustrative literal; see §Revision notes for the post-Accept byte-encoding correction.) - **Build pipeline (B6 — real userspace crate).** B6's `userland/hello/` crate (separate, future task — not opened in B4) is the first **real** userspace binary: a `no_std, no_main` aarch64 crate built via `cargo build --target aarch64-unknown-none` (`cargo build` default for the userland workspace member) and stripped to raw bytes via `objcopy -O binary` (or the equivalent `cargo-binutils` invocation) as a userland-crate build-script step. The kernel embeds the resulting `.bin` via `include_bytes!("../../userland/hello/target/.../hello.bin")` (or similar — the exact path lands with B6). Until B6 lands, T-019's placeholder blob is the only userspace image in tree. diff --git a/docs/decisions/0035-physical-memory-manager.md b/docs/decisions/0035-physical-memory-manager.md index ade0908..1f819c6 100644 --- a/docs/decisions/0035-physical-memory-manager.md +++ b/docs/decisions/0035-physical-memory-manager.md @@ -154,7 +154,7 @@ T-017's `Done` flip gates only on its own DoD (host-tests + miri + clippy + kern - **No change to ADR-0017's IPC primitive set.** The PMM is internal infrastructure; user-observable IPC primitives (`send` / `recv` / `notify`) are untouched. - **No change to `SchedError` / `IpcError` taxonomies.** PMM failures surface as `MmuError::OutOfFrames` (already in the enum) when called via `FrameProvider`; direct PMM API failures surface as `PmmError` (a new enum, scoped to PMM-internal callers). - **The `FrameProvider` trait is unchanged.** v1's ADR-0009 surface accepts the new PMM impl without any Revision rider. The PMM is the first real `FrameProvider` impl outside the host-test `VecFrameProvider`; integration is via the existing trait method, no new HAL surface. -- **No new ADR governance burden.** This ADR follows the [`write-adr` skill §Simulation](../../.claude/skills/write-adr/SKILL.md) discipline (codified in commit `77a578a`); the §Dependency chain section satisfies [ADR-0025 §Rule 1](0025-adr-governance-amendments.md) (every forward-reference is grounded in T-017 which opens with this ADR's Propose commit). +- **No new ADR governance burden.** This ADR follows the [`write-adr` skill §Simulation](../../.agents/skills/write-adr/SKILL.md) discipline (codified in commit `77a578a`); the §Dependency chain section satisfies [ADR-0025 §Rule 1](0025-adr-governance-amendments.md) (every forward-reference is grounded in T-017 which opens with this ADR's Propose commit). ## Pros and cons of the options @@ -205,4 +205,8 @@ T-017's `Done` flip gates only on its own DoD (host-tests + miri + clippy + kern - xv6 PMM (in-frame linked-list of free frames) — prior art for Option C; rejected for forward-compat reasons above. - Linux's `bootmem` allocator (`mm/bootmem.c` / `mm/memblock.c`) — direct prior art for the bitmap shape Option A adopts; Linux replaces bootmem with `buddy` post-init, which is the natural future ADR slot if v1's bitmap surfaces a fragmentation hot path. - seL4's untyped-region model (`src/object/untyped.c`) — capability-mediated frame ownership; forward-flag for B5+ `MemoryRegionCap` work that sits on top of this PMM. + +## Revision notes + +- **2026-05-22 — the "ADR-0028 … no file today" §Context parenthetical is now stale.** §Context says the ADR-0028 slot "is reserved for the address-space data structure … no file today, opens with the second B3 ADR." That was true when this ADR was accepted (2026-05-09); [ADR-0028 (Address-space data structure)](0028-address-space-data-structure.md) was subsequently authored and **Accepted 2026-05-11**. The §Context narrative is preserved (append-only); this rider records that ADR-0028 now exists and is the second B3 ADR the parenthetical anticipated. - Hubris's allocator-less stance (`hubris/lib/userlib/src/lib.rs`) — Cortex-M target with no MMU; direct comparison shows that the PMM only matters when the architecture has an MMU active, which Tyrne does post-T-016. diff --git a/docs/decisions/0036-qemu-virt-gicv2-no-iommu-v1.md b/docs/decisions/0036-qemu-virt-gicv2-no-iommu-v1.md new file mode 100644 index 0000000..2a8bf41 --- /dev/null +++ b/docs/decisions/0036-qemu-virt-gicv2-no-iommu-v1.md @@ -0,0 +1,207 @@ +# 0036 — QEMU virt is GICv2 / no-IOMMU in v1; corrects GICv3/SMMUv3 in ADR-0004/0006/0012 + +- **Status:** Accepted +- **Date:** 2026-05-22 +- **Deciders:** @cemililik + +## Context + +Three foundational platform ADRs describe the QEMU `virt` interrupt controller +and IOMMU incorrectly: + +- [ADR-0004 §Decision outcome](0004-target-platforms.md) calls the QEMU `virt` + controller "GICv3". +- [ADR-0006 §Decision outcome](0006-workspace-layout.md) lists the + `tyrne-bsp-qemu-virt` role as implementing "GICv3 + PL011 + SMMUv3". +- [ADR-0012 §Decision drivers](0012-boot-flow-qemu-virt.md) names the + "`GICv3` distributor at `0x0800_0000`". + +The shipped code contradicts all three. The BSP driver is a GICv2 driver +([`bsp-qemu-virt/src/gic.rs:1`](../../bsp-qemu-virt/src/gic.rs) — "GIC v2 driver +for QEMU virt aarch64") that programs the memory-mapped `GICD_*` / `GICC_*` +interface of GICv2 (per ARM IHI 0048B); it does **not** use the GICv3 system-register +interface (`ICC_IAR1_EL1`, `ICC_EOIR1_EL1`, etc.). QEMU's `virt` machine defaults +to a GICv2 (GIC-400-class) controller; GICv3 is only provided when the machine is +launched with `-machine gic-version=3`, which the project's runner does not pass. + +For the IOMMU, [`hal/src/lib.rs:62`](../../hal/src/lib.rs) defines `pub trait Iommu {}` — +an empty marker with no methods and no implementation anywhere in `bsp-qemu-virt/`. +There is no SMMUv3 driver. QEMU `virt` can expose an SMMUv3 only when launched with +`-device smmuv3`, which the project does not do today. + +These are accuracy errors in Accepted ADR bodies. Under the append-only rule +([CLAUDE.md rule 5](../../CLAUDE.md)) the bodies of those ADRs cannot be edited to +match reality — doing so would rewrite the historical record. Worse, the append-only +policy has *frozen* the contradiction in place: a reader following the conflict-resolution +convention ("disagree with a decision by writing a new ADR that supersedes the old one" +— [decisions/README.md](README.md)) has no forward pointer telling them the GICv3/SMMUv3 +statements are wrong. This ADR is the corrective record those three ADRs need, and it +authorises one-line top-of-file redirect riders on each (append-only-legal — the riders +do not alter the original bodies). + +This is a **retroactive-recovery ADR** in the sense of the [write-adr skill](../../.agents/skills/write-adr/SKILL.md) +§Procedure step 4: it records a correction after the fact, marked explicitly here, rather +than gating future work. + +## Decision drivers + +- **The build is the source of truth for what hardware v1 targets.** The driver, the + MMIO register set, and the empty `Iommu` trait are unambiguous; the ADRs are the side + that drifted. +- **Append-only preservation of the historical record.** ADR-0004/0006/0012 must keep + their original bodies. A correction belongs in a new ADR plus append-only riders, never + in an edit to the frozen Decision outcome. +- **A reader who hits the stale line must be redirected, not silently misled.** Without a + forward pointer the contradiction is undetectable from inside the old ADR. +- **Honesty about aspirational invariants.** The security model's DMA-capability-scoping + invariant ("a device may only DMA to memory it holds a capability for") depends on an + IOMMU that does not exist on QEMU virt in v1. It must be stated as *future-on-QEMU* + (currently aspirational), not as a property the running system enforces. +- **No weakening of any security guarantee.** Correcting the record to say "no IOMMU in + v1" does not remove a guarantee the kernel ever made; it documents that the guarantee is + not yet in force on this target, which is the more conservative statement. + +## Considered options + +1. **Edit the three ADR bodies in place.** Change "GICv3" → "GICv2" and remove the SMMUv3 + claim directly in ADR-0004/0006/0012. +2. **New corrective ADR + append-only top-of-file redirect riders (chosen).** Record the + correction here; append a one-line redirect to each affected ADR pointing readers to + this record, leaving the original bodies intact. +3. **Leave it alone; rely on the architecture docs being correct.** The architecture docs + (`overview.md`, `exceptions.md`, `hal.md`, `phase-b.md`) already say GICv2; treat the + ADR statements as known-stale and do nothing. + +## Decision outcome + +Chosen option: **Option 2 — a new corrective ADR plus append-only top-of-file redirect +riders on ADR-0004, ADR-0006, and ADR-0012.** + +The corrected facts for QEMU `virt` v1 are: + +- **Interrupt controller: GICv2.** QEMU `virt` is GICv2 (GIC-400-class) by default; the + BSP ships a GICv2-only driver using the `GICD_*` / `GICC_*` MMIO interface. GICv3 would + require `-machine gic-version=3` and a system-register driver, neither of which exists in + v1. The Raspberry Pi 4 first-hardware target is also GICv2 (its GIC-400). The address + `0x0800_0000` named in ADR-0012 is **correct** — only the version label "GICv3" is wrong. + +- **IOMMU: none in v1.** **QEMU virt is GICv2 / no IOMMU in v1; the `Iommu` trait is a + stub reserved for a future SMMUv3 ADR.** The empty `pub trait Iommu {}` in + `hal/src/lib.rs` is a deliberate placeholder, not an implemented surface. No SMMUv3 + driver is built; QEMU virt would require `-device smmuv3` and a future ADR to introduce + one. + +- **DMA-capability-scoping invariant is future-on-QEMU (currently aspirational).** The + security model's intent that device DMA be confined to capability-granted memory cannot be + enforced without an IOMMU. On QEMU virt v1 there is no IOMMU, so the invariant is not in + force; it becomes enforceable only when a future SMMUv3 ADR lands an `Iommu` + implementation and the kernel programs it. Until then it is documented as aspirational, + not as a running guarantee. This is consistent with `security-model.md`, which already + frames SMMUv3 as future/conditional ("ADR required before the first driver that enables + bus-master DMA"). + +The three affected ADRs keep their Accepted status: each contains exactly one stale clause, +not a wrong decision, so a status flip to `Superseded by 0036` would overstate the change +(their target-platform, workspace-layout, and boot-flow decisions all stand). Instead each +gains a one-line top-of-file redirect rider added in the same change that lands this ADR. + +### Simulation + +Not applicable — this ADR settles a single-shape factual correction; there is no +state-machine to simulate. + +### Dependency chain + +For this decision to be fully in effect: + +```text +1. This corrective ADR exists and is Accepted — ADR-0036 (this file). +2. One-line redirect riders appended to the top of the affected ADR bodies — + ADR-0004, ADR-0006, ADR-0012 (added in the same change as this ADR; + append-only, bodies otherwise unchanged). +3. The future SMMUv3 / IOMMU ADR that gives the DMA-scoping invariant teeth — + no T-NNN today; opens with the first driver that enables bus-master DMA + (per security-model.md), at which point the `Iommu` trait gains a concrete + implementation. Reserved as a forward-flag only; no implementation work + depends on it before that driver exists. +``` + +Steps 1 and 2 are discharged by the change that lands this ADR. Step 3 is a named +forward-flag with no slot opened today, mirroring the ADR-0033/0034 placeholder pattern, +because no current implementation work depends on it. + +## Consequences + +### Positive + +- A reader who hits "GICv3" or "SMMUv3" in ADR-0004/0006/0012 now has a forward pointer to + the corrected record, restoring the conflict-resolution path the append-only rule had + frozen. +- The ADR corpus stops contradicting the build and the architecture docs on the interrupt + controller and IOMMU. +- The DMA-scoping invariant is stated honestly as aspirational-on-QEMU, so no reader + assumes a protection the hardware does not provide in v1. + +### Negative + +- One more ADR and three riders to maintain. *Mitigation:* the riders are one line each and + the correction is mechanical; the alternative (in-place edits) would violate the + append-only rule, which is the higher cost. +- The numbering jumps to 0036 (slots 0030/0031/0033/0034 remain reserved placeholders). + *Mitigation:* the gaps are intentional and documented in [decisions/README.md](README.md); + ADR numbers are stable history and are never renumbered. + +### Neutral + +- This ADR makes no new platform decision; it corrects the record of decisions already made. + The first-hardware target (Pi 4) and the QEMU-virt-first strategy are unchanged. +- Phase-C and Phase-D plan files that reuse numbers 0027–0036 for future subjects must + renumber above the live Phase-B ceiling; this ADR consumes 0036, so those plans renumber + to 0037+ (coordinated separately; recorded here so the cross-stream renumbering base is + unambiguous). + +## Pros and cons of the options + +### Option 1 — edit the three ADR bodies in place + +- Pro: a single reader of the old ADR sees the corrected fact with no second hop. +- Con: violates the append-only rule (CLAUDE.md rule 5) — it rewrites Accepted Decision + outcomes and destroys the historical record of what was originally believed. +- Con: leaves no trace that a correction happened, so the *why* (build is GICv2/no-IOMMU) is + lost. + +### Option 2 — new corrective ADR + redirect riders (chosen) + +- Pro: append-only-legal; original bodies preserved. +- Pro: records the corrected facts and their rationale in one citable place. +- Pro: gives the reader of any stale line a forward pointer. +- Con: two-hop read (old ADR → redirect → this ADR) and a small maintenance surface. + +### Option 3 — leave it alone + +- Pro: zero work. +- Con: the contradiction stays frozen; an agent or contributor reading the foundational ADRs + is actively misled about the interrupt controller and IOMMU, and the conflict-resolution + convention provides no escape hatch. + +## References + +- [ADR-0004: Target hardware platforms and support tiers](0004-target-platforms.md) — + corrected here (GICv3 → GICv2). +- [ADR-0006: Workspace layout and initial crate boundaries](0006-workspace-layout.md) — + corrected here (GICv3 + SMMUv3 → GICv2; `Iommu` is a stub). +- [ADR-0012: Boot flow and memory layout for `bsp-qemu-virt`](0012-boot-flow-qemu-virt.md) — + corrected here (GICv3 distributor → GICv2 distributor; the `0x0800_0000` address is + correct). +- [ADR-0011: `IrqController` HAL trait signature (v1)](0011-irq-controller-trait.md) — + abstracts both GICv2 and GICv3; its trait surface is unaffected by this correction. +- [`bsp-qemu-virt/src/gic.rs`](../../bsp-qemu-virt/src/gic.rs) — the GICv2 driver (source of + truth for the controller version). +- [`hal/src/lib.rs`](../../hal/src/lib.rs) — `pub trait Iommu {}` (the empty stub). +- [`docs/architecture/security-model.md`](../architecture/security-model.md) — frames + SMMUv3 / DMA scoping as future/conditional; consistent with this ADR. +- ARM *GIC Architecture Specification* — GICv2 `GICD_*` / `GICC_*` MMIO vs. GICv3 `ICC_*` + system-register interface. +- ARM *GIC-400 Technical Reference Manual* — the GICv2 implementation in QEMU virt and Pi 4. +- QEMU `virt` machine documentation — https://qemu.readthedocs.io/en/latest/system/arm/virt.html + (default `gic-version=2`; `-device smmuv3` for the IOMMU). diff --git a/docs/decisions/README.md b/docs/decisions/README.md index 5b5e209..3b5a615 100644 --- a/docs/decisions/README.md +++ b/docs/decisions/README.md @@ -60,11 +60,17 @@ Each ADR contains: | 0029 | [Initial userspace image format (B4 — raw flat binary)](0029-initial-userspace-image-format.md) | Accepted | 2026-05-14 | | 0032 | [Endpoint state rollback on `ipc_recv_and_yield` Deadlock + `ipc_cancel_recv` primitive](0032-endpoint-rollback-and-cancel-recv.md) | Accepted | 2026-05-07 | | 0035 | [Physical Memory Manager (B3 prerequisite — bitmap allocator)](0035-physical-memory-manager.md) | Accepted | 2026-05-09 | +| 0036 | [QEMU virt is GICv2 / no-IOMMU in v1; corrects GICv3/SMMUv3 in ADR-0004/0006/0012](0036-qemu-virt-gicv2-no-iommu-v1.md) | Accepted | 2026-05-22 | + +> **Numbering gaps.** Slots **0030**, **0031**, **0033**, **0034** are intentionally reserved, not missing: 0030 (syscall ABI) and 0031 (initial syscall set / MMU follow-ups) are reserved for B5 per the `phase-b.md` §B5 ADR ledger; 0033 (high-half migration) and 0034 (kernel-image section permissions) are named-but-unallocated placeholders forward-flagged in ADR-0027/0028/0029. No files exist for these yet; they open when the corresponding work surfaces. ADR numbers are stable history and are never renumbered. ## Creating a new ADR +The authoritative, step-by-step procedure is the [`write-adr` skill](../../.agents/skills/write-adr/SKILL.md) (and [`supersede-adr`](../../.agents/skills/supersede-adr/SKILL.md) when overriding an old ADR). Read it in full before drafting; the summary below is a reminder, not a substitute. + 1. Copy [template.md](template.md) to the next available number: `NNNN-your-slug.md`. 2. Fill it in. Start with status `Proposed`. -3. Open a PR (once the PR process is established) or, in the solo phase, commit directly with a descriptive commit message referencing the ADR number. -4. When the decision is settled, change the status to `Accepted`. -5. If a later ADR overrides this one, mark the old one `Superseded by NNNN` and link forward to the new record. Do **not** delete or rewrite the old ADR — the historical reasoning is the point. +3. For an ADR whose subject is a multi-step state machine (capability flows, IPC handshakes, scheduler dispatch, exception/IRQ entry, MMU/TLB transitions, syscall ABI handshakes), include a **§Simulation** table (3–5 rows walking the worst-case interaction) per [ADR-0025](0025-adr-governance-amendments.md) and the `write-adr` skill; other ADR subjects use the one-line "Not applicable" note. Every ADR's §Decision outcome must also include a **§Dependency chain** grounding each forward-reference in a real `T-NNN` (per ADR-0025 §Rule 1). +4. Open a PR (once the PR process is established) or, in the solo phase, commit directly with a descriptive commit message referencing the ADR number. +5. When the decision is settled, re-read end-to-end (per `write-adr` skill §10) and change the status to `Accepted` in a separate commit from the initial `Proposed` draft. +6. If a later ADR overrides this one, mark the old one `Superseded by NNNN` and link forward to the new record. Do **not** delete or rewrite the old ADR — the historical reasoning is the point. diff --git a/docs/decisions/template.md b/docs/decisions/template.md index 2a3a07c..56ce153 100644 --- a/docs/decisions/template.md +++ b/docs/decisions/template.md @@ -8,7 +8,7 @@ Status enum (use one): - Proposed — drafted, awaiting Accept after careful re-read. - Accepted — settled; the project follows this decision. - - Deferred — recognised as needed but explicitly postponed; no file body required if filed-but-deferred (see ADR-0018, ADR-0023). + - Deferred — recognised as needed but explicitly postponed; no file body required if filed-but-deferred (see ADR-0023). - Deprecated — historical; followed for a time but no longer. - Superseded by NNNN — overridden by a later ADR; old body preserved for the historical record (per supersede-adr skill). --> diff --git a/docs/glossary.md b/docs/glossary.md index 9adf571..c171bd7 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -12,6 +12,8 @@ Terminology used throughout Tyrne. Entries are alphabetical. If a term appears i **Arena.** A fixed-capacity slot array that backs a specific kernel-object kind (tasks, endpoints, notifications). Per [ADR-0016](decisions/0016-kernel-object-storage.md), every kernel-object type has its own arena; slots are handed out and returned without heap involvement. See also *Generation tag*. +**Badge.** An immutable value embedded in a derived capability that lets the holder of the original object identify *which* derived capability a message arrived through. Badges are stamped at derivation time and cannot be altered by the holder, so a receiver can distinguish callers without trusting them. Borrowed from seL4, where a badge on an endpoint capability identifies the sender. In Tyrne the badge scheme is specified by [ADR-0018](decisions/0018-badge-scheme-and-reply-recv-deferral.md). See also *Reply capability*. + **BSP (Board Support Package).** The concrete implementation of HAL trait surfaces for a specific board. A BSP plugs into the kernel at build time and provides drivers for on-board peripherals. **Capability.** An unforgeable token, held by a subject (process, task, thread), that authorizes a specific operation on a specific object. In a capability-based system, *having the capability is the permission*; there is no separate access control list to consult. @@ -74,6 +76,8 @@ Terminology used throughout Tyrne. Entries are alphabetical. If a term appears i **Ready queue.** The scheduler's bounded FIFO of task handles that are runnable and waiting for the CPU. Tyrne's queue capacity equals the task arena capacity, so it can never refuse an enqueue when the total task count is within the limit. See [ADR-0019](decisions/0019-scheduler-shape.md). +**Reply capability.** A single-use send capability that the kernel auto-issues to a receiver when it accepts a call-style IPC message, naming the original sender so the receiver can reply exactly once without holding a standing capability to that sender. Using or dropping it consumes it. Borrowed from seL4's reply-capability mechanism. In Tyrne the reply-capability scheme is specified — and its eager-versus-lazy issuance deferred — by [ADR-0018](decisions/0018-badge-scheme-and-reply-recv-deferral.md). See also *Badge* and *Endpoint*. + **Rendezvous IPC.** A synchronous IPC model where `ipc_send` and `ipc_recv` meet at an endpoint: the first caller records a waiter, the second delivers and unblocks it, both return with the transfer complete. Tyrne uses rendezvous IPC per [ADR-0017](decisions/0017-ipc-primitive-set.md). **seL4.** A formally verified microkernel in the L4 family. Its verified correctness and capability-based design are reference points for Tyrne, even though Tyrne is not aiming for full formal verification in its first years. @@ -82,6 +86,8 @@ Terminology used throughout Tyrne. Entries are alphabetical. If a term appears i **StaticCell.** A BSP helper in [bsp-qemu-virt](../bsp-qemu-virt/src/main.rs) that wraps `UnsafeCell>` to provide write-once-at-boot, share-afterwards static storage for kernel state. It exposes `as_mut_ptr` so callers can derive raw pointers without materialising a `&mut` (see [ADR-0021](decisions/0021-raw-pointer-scheduler-ipc-bridge.md)). +**TCB (Trusted Computing Base).** The set of components that must be correct for the system's security guarantees to hold — code whose compromise would compromise everything. Tyrne keeps the TCB deliberately small by running drivers, filesystems, and network stacks in userspace rather than in the kernel, so that adding a feature does not enlarge the trusted core unless it strictly must; the README frames this as "the entire trusted computing base can be audited line by line." The boundary of the TCB is drawn in [architecture/security-model.md](architecture/security-model.md). See also *Microkernel* and *Trust boundary*. + **Trust boundary.** A line in the system at which assumptions about integrity, confidentiality, or availability change. Crossing a trust boundary should require an explicit capability check. Trust boundaries are drawn in [architecture/security-model.md](architecture/security-model.md). **Unsafe (Rust).** A block of Rust code that opts out of some compiler-enforced invariants (e.g., to dereference raw pointers or call FFI). In Tyrne, every `unsafe` block is commented with justification (invariants, rejected alternatives, audit tag) per [`unsafe-policy.md`](standards/unsafe-policy.md), and tracked in the audit log. diff --git a/docs/guides/ci.md b/docs/guides/ci.md index 4a47fae..5935881 100644 --- a/docs/guides/ci.md +++ b/docs/guides/ci.md @@ -6,13 +6,26 @@ This guide describes the GitHub Actions pipeline configured in [`.github/workflo | Job | Toolchain | Wall time (expected) | Fails on | |-----|-----------|----------------------|----------| -| `lint-and-host-test` | stable + `rustfmt` + `clippy` | ~2 min | `cargo fmt --check` diff, any clippy warning, any failing host test | -| `kernel-build` | stable + `aarch64-unknown-none` + `clippy` | ~1 min | `cargo kernel-build` error, any kernel-clippy warning | -| `miri` | nightly + `miri` component | ~10–15 min | Any Stacked Borrows violation in `cargo +nightly miri test --workspace --exclude tyrne-bsp-qemu-virt` | -| `coverage` | nightly + `llvm-tools-preview` + `cargo-llvm-cov` | ~3–5 min | **Never** (informational only, `continue-on-error: true`) | +| `lint-and-host-test` | pinned nightly + `rustfmt` + `clippy` | ~2 min | `cargo fmt --check` diff, any clippy warning, any failing host test | +| `kernel-build` | pinned nightly + `aarch64-unknown-none` + `clippy` | ~1 min | `cargo kernel-build` error, any kernel-clippy warning | +| `host-stable-check` | stable (no extra components) | ~2 min | host crates failing to build or test on stable Rust | +| `miri` | pinned nightly + `miri` component | ~10–15 min | Any Stacked Borrows violation in `cargo +nightly miri test --workspace --exclude tyrne-bsp-qemu-virt` | +| `coverage` | pinned nightly + `llvm-tools-preview` + `cargo-llvm-cov` | ~3–5 min | **Never** (informational only, `continue-on-error: true`) | All jobs run on `ubuntu-latest`. Each caches cargo registry + build artefacts keyed by `Cargo.lock` hash, so warm runs are far faster than first runs. +### Why do the kernel jobs run on the pinned nightly, not stable? + +The kernel needs nightly to build (inline asm intrinsics / lang items — see `rust-toolchain.toml` and ADR-0002). `rust-toolchain.toml` pins `nightly-2026-01-15` at the repo root, and rustup's override precedence means that file selects the pinned nightly for **every** in-repo `cargo` invocation regardless of `rustup default`. So `lint-and-host-test`, `kernel-build`, `miri`, and `coverage` all select the pin explicitly (`cargo +$NIGHTLY_PIN …`) — the same toolchain a contributor's local `cargo` uses. + +### What does `host-stable-check` give us, then? + +A genuine "the host-buildable crates compile and pass tests on **stable** Rust" signal. It runs `cargo +stable build` and `cargo +stable host-test` over the workspace `default-members` (kernel, hal, test-hal) — the bare-metal BSP is excluded because it needs nightly. The `+stable` prefix bypasses the `rust-toolchain.toml` override so this job exercises stable for real. It deliberately does **not** run clippy/fmt with `-D warnings`: `clippy::pedantic` is `warn` workspace-wide and stable is a rolling toolchain, so a future stable pedantic lint could redden the gate with no code change of ours — lint/format enforcement lives only on the pinned-nightly jobs. If a host crate ever grows a `#![feature(...)]` it does not strictly need, this job is the one that goes red on stable. + +### How the action versions are pinned + +Every third-party GitHub Action is pinned to a full 40-character commit SHA with the human-readable version in a trailing comment (e.g. `actions/checkout@11bd719… # v4.2.2`). Tags are mutable; SHAs are not. This applies the same anti-silent-drift discipline used for `NIGHTLY_PIN` and `cargo-llvm-cov` to the actions that wrap them — `taiki-e/install-action` in particular downloads and executes a prebuilt binary, so a tag repoint would be arbitrary code execution in CI. See [`docs/standards/infrastructure.md`](../standards/infrastructure.md) §"Supply-chain security" → "GitHub Actions pinning" for the refresh path. + ## Triggers - **Every push** to `main` or `development`. @@ -31,10 +44,11 @@ The CI matrix mirrors what a contributor should run locally before opening a PR: | `cargo host-test` | `lint-and-host-test` (step 3) | | `cargo kernel-build` | `kernel-build` | | `cargo kernel-clippy` | `kernel-build` | +| `cargo +stable build` + `cargo +stable host-test` (host crates) | `host-stable-check` | | `cargo +nightly miri test --workspace --exclude tyrne-bsp-qemu-virt` | `miri` | | `cargo llvm-cov --workspace --exclude tyrne-bsp-qemu-virt --summary-only` | `coverage` | -If you pass all seven locally, CI should pass too. If CI fails on something you passed locally, the most common cause is that your local `rustup default` is pinned to a version CI doesn't have — run `rustup update stable` and retry. +If you pass these locally, CI should pass too. Your local `cargo` runs the pinned nightly automatically (the `rust-toolchain.toml` override), which is the same toolchain the kernel CI jobs use — so a local/CI divergence on those jobs is rare. If you see one, confirm your pinned nightly is installed (`rustup toolchain list` should show `nightly-2026-01-15`); rustup installs it on first use, but a stale or partial install can drift. For the `host-stable-check` job, reproduce with the explicit prefix: `cargo +stable build` (install stable first with `rustup toolchain install stable` if needed). ## Why is `tyrne-bsp-qemu-virt` excluded from Miri and coverage? @@ -73,5 +87,5 @@ To bump either pin: The `coverage` job is marked `continue-on-error: true`. GitHub's UI renders this as a **neutral / yellow** verdict rather than green or red. Be deliberate when configuring branch-protection rules: - **Do not add `coverage` to the required-checks list** while it is informational; the neutral result does not satisfy `required == passing`, so every push would be blocked even when coverage is fine. -- **Do add `lint-and-host-test`, `kernel-build`, and `miri`** to required checks — those three are the real gates. +- **Do add the four real gates** to required checks. GitHub matches each job's display **name**, not its id, so add these exact strings: `fmt + clippy + host tests (nightly)`, `aarch64-unknown-none kernel build (nightly)`, `host crates on stable`, and `miri (Stacked Borrows)`. - When coverage flips from informational to enforcing (planned post-T-011), remove `continue-on-error: true` first, confirm a full run is green, then add the job to branch-protection. diff --git a/docs/guides/run-under-qemu.md b/docs/guides/run-under-qemu.md index d8d46b3..ac5187e 100644 --- a/docs/guides/run-under-qemu.md +++ b/docs/guides/run-under-qemu.md @@ -34,7 +34,7 @@ Build the Phase 4c kernel image and boot it under `qemu-system-aarch64` on the ` 1. **Clone the repository** if you have not already. ```sh - git clone https://github.com/cemililik/Tyrne.git + git clone https://github.com/HodeTech/Tyrne.git cd Tyrne ``` diff --git a/docs/roadmap/README.md b/docs/roadmap/README.md index 2ef8b71..73b4418 100644 --- a/docs/roadmap/README.md +++ b/docs/roadmap/README.md @@ -22,7 +22,7 @@ The plan is separate from the work. Individual tasks and reviews live under [`.. - **Reviews** — they live under [`../analysis/reviews//`](../analysis/reviews/). - **Design rationale for architectural choices** — [`../decisions/`](../decisions/) (ADRs). - **Procedural rules** — [`../standards/`](../standards/). -- **Repeatable procedures** — [`../../.claude/skills/`](../../.claude/skills/). +- **Repeatable procedures** — [`../../.agents/skills/`](../../.agents/skills/). ## How to read the roadmap @@ -42,5 +42,5 @@ See [ADR-0013](../decisions/0013-roadmap-and-planning.md) for the full identifie - **Tweaking a phase's milestones or sub-breakdowns** — edit the phase file. - **Adding or dropping a phase, or reordering phases at the top level** — structural change; requires an ADR that supersedes the affected statements. -- **Opening a new task** — [`start-task`](../../.claude/skills/start-task/SKILL.md) skill. -- **Running a review** — [`conduct-review`](../../.claude/skills/conduct-review/SKILL.md) skill. +- **Opening a new task** — [`start-task`](../../.agents/skills/start-task/SKILL.md) skill. +- **Running a review** — [`conduct-review`](../../.agents/skills/conduct-review/SKILL.md) skill. diff --git a/docs/roadmap/current.md b/docs/roadmap/current.md index 1efd4c8..b34fcf5 100644 --- a/docs/roadmap/current.md +++ b/docs/roadmap/current.md @@ -4,11 +4,13 @@ A short pointer file updated as work progresses. For the full plan see [`phases/ --- -> **2026-05-15 update — T-019 implementation In Review on PR #31; branch `t-019-task-loader`.** Implementation arc lands across **7 bisectable commits** ending at `95efd62` (review-round 4 follow-up — the final substantive commit of the arc; subsequent commits on the branch are post-review doc/style polish): (1) `911f2ad` — `task_loader` module skeleton; (2) `5711756` — `load_image` + UNSAFE-2026-0027; (3) `ae31bc8` — BSP wiring + arch doc + UNSAFE-2026-0025/0026 smoke-verification Amendments; (4) `196d3fb` — review-round 1 follow-up (PA-overlap preflight + `ImageOverlapsAllocatableMemory` variant + `mov w0, #42` placeholder bytes correction); (5) `164522d` — review-round 2 follow-up (`phys_frame_kernel_ptr` helper + VA-range preflight + `InvalidImageBaseVa` variant + BSP `CapRights::empty()` justification); (6) `5b1f153` — review-round 3 follow-up (`intermediate_frame_count` exact-count helper replaces off-by-one constant; VA-range check reordered before frame-budget; doc sync); (7) `95efd62` — review-round 4 follow-up (alignment preflight at row 1 + new `MisalignedImageBaseVa(VirtAddr)` variant closing a preventable root-frame-leak path on internal-API misuse; `accepts_image_disjoint_from_pmm_extent` test made deterministic via `.rodata` static; `FrameBudgetExceeded` variant doc-comment refreshed). The `LoadError` taxonomy is **10 variants**: `InvalidImage`, `InvalidStackSize`, `MisalignedImageBaseVa(VirtAddr)`, `InvalidImageBaseVa { base, end }`, `InvalidParentCap(CapError)`, `FrameBudgetExceeded { needed, available }`, `ImageOverlapsAllocatableMemory`, `AddressSpaceCreationFailed(AddressSpaceError)`, `OutOfFrames`, `MapFailed(AddressSpaceError)`. **First runtime exerciser of UNSAFE-2026-0025 (post-bootstrap `Mmu::map` page-table descriptor writes) + UNSAFE-2026-0026 (PMM `alloc_frame` zero-fill) + UNSAFE-2026-0027 (new — task-loader `copy_nonoverlapping`)**; all three smoke-verified at runtime via the `tyrne: image loaded (entry = 0x800000; sp = 0x802000; image bytes 8; stack bytes 4096; AS cap = idx 1)` boot line. New `docs/architecture/task-loader.md` chapter synthesises the loader sequence + rollback contract + v1 baseline leaks. Tests at HEAD: **259/259** host-test count (round-4 commit reframes the misaligned-VA test rather than adding new ones; the distinctness assertion gained 2 sub-cases but those land inside an existing test). All gates clean: `cargo fmt --check`, `cargo host-test`, `cargo host-clippy -D warnings`, `cargo kernel-clippy -D warnings`, `cargo kernel-build`. Smoke trace byte-stable through full demo to `tyrne: all tasks complete`; `-d int,unimp,guest_errors` reports only the pre-existing 629 PL011-disabled-UART warnings. PR #31 awaiting reviewer pass; on merge, B4 implementation half closes and B5 (syscall ABI per ADR-0030) opens for the runnability prerequisites. +> **2026-05-16 update — T-019 merged; B4 implementation-complete; closure trio pending.** PR #31 merged into `main` at commit `7f876af` ("Merge pull request #31 from cemililik/t-019-task-loader"), landing T-019 (task loader) on `main`. The branch arc continued past the review-round-4 commit named in the 2026-05-15 banner below with two further follow-up commits: `5078944` (review-round 5 — added one PMM host test, taking the suite to **260/260**) and `eb14c51` (review-round 6 — 5 valid findings). T-019 status flips `In Review → Done` (`date_done: 2026-05-16`). **Host-test count at HEAD: 260/260** (42 hal + 175 kernel + 43 test-hal); the 2026-05-15 banner's "259/259" was accurate when written, before the round-5 PMM test landed. B4 is now **implementation-complete**; the **B4 closure trio (business + security + performance reviews) has NOT yet fired** and is the next review trigger (the maintainer sequences it separately). This banner resolves the pre-merge "In Review" state recorded below — that banner is retained as a point-in-time record. +> +> **2026-05-15 update — T-019 implementation In Review on PR #31; branch `t-019-task-loader` (pre-merge snapshot; superseded by the 2026-05-16 banner above).** Implementation arc lands across **7 bisectable commits** ending at `95efd62` (review-round 4 follow-up — the final substantive commit of the arc; subsequent commits on the branch are post-review doc/style polish): (1) `911f2ad` — `task_loader` module skeleton; (2) `5711756` — `load_image` + UNSAFE-2026-0027; (3) `ae31bc8` — BSP wiring + arch doc + UNSAFE-2026-0025/0026 smoke-verification Amendments; (4) `196d3fb` — review-round 1 follow-up (PA-overlap preflight + `ImageOverlapsAllocatableMemory` variant + `mov w0, #42` placeholder bytes correction); (5) `164522d` — review-round 2 follow-up (`phys_frame_kernel_ptr` helper + VA-range preflight + `InvalidImageBaseVa` variant + BSP `CapRights::empty()` justification); (6) `5b1f153` — review-round 3 follow-up (`intermediate_frame_count` exact-count helper replaces off-by-one constant; VA-range check reordered before frame-budget; doc sync); (7) `95efd62` — review-round 4 follow-up (alignment preflight at row 1 + new `MisalignedImageBaseVa(VirtAddr)` variant closing a preventable root-frame-leak path on internal-API misuse; `accepts_image_disjoint_from_pmm_extent` test made deterministic via `.rodata` static; `FrameBudgetExceeded` variant doc-comment refreshed). The `LoadError` taxonomy is **10 variants**: `InvalidImage`, `InvalidStackSize`, `MisalignedImageBaseVa(VirtAddr)`, `InvalidImageBaseVa { base, end }`, `InvalidParentCap(CapError)`, `FrameBudgetExceeded { needed, available }`, `ImageOverlapsAllocatableMemory`, `AddressSpaceCreationFailed(AddressSpaceError)`, `OutOfFrames`, `MapFailed(AddressSpaceError)`. **First runtime exerciser of UNSAFE-2026-0025 (post-bootstrap `Mmu::map` page-table descriptor writes) + UNSAFE-2026-0026 (PMM `alloc_frame` zero-fill) + UNSAFE-2026-0027 (new — task-loader `copy_nonoverlapping`)**; all three smoke-verified at runtime via the `tyrne: image loaded (entry = 0x800000; sp = 0x802000; image bytes 8; stack bytes 4096; AS cap = idx 1)` boot line. New `docs/architecture/task-loader.md` chapter synthesises the loader sequence + rollback contract + v1 baseline leaks. Tests at HEAD: **259/259** host-test count (round-4 commit reframes the misaligned-VA test rather than adding new ones; the distinctness assertion gained 2 sub-cases but those land inside an existing test) — *correction: 259 was accurate at this commit; review-round 5 (`5078944`) later added one PMM test, so the count at the PR #31 merge is **260/260** per the 2026-05-16 banner above*. All gates clean: `cargo fmt --check`, `cargo host-test`, `cargo host-clippy -D warnings`, `cargo kernel-clippy -D warnings`, `cargo kernel-build`. Smoke trace byte-stable through full demo to `tyrne: all tasks complete`; `-d int,unimp,guest_errors` reports only the pre-existing 629 PL011-disabled-UART warnings. PR #31 awaiting reviewer pass; on merge, B4 implementation half closes and B5 (syscall ABI per ADR-0030) opens for the runnability prerequisites. --- -> **2026-05-14 update — PR #28 merged; T-018 live on `main`; B3 implementation closed; closure trio next.** [PR #28](https://github.com/cemililik/Tyrne/pull/28) merged into `main` as commit `47b0a86` after five review rounds (round-1: 2 valid; round-2: 1 valid + 1 skipped-with-reason; round-3: 6 valid + 1 skipped; round-4 follow-on: CodeRabbit findings on commits `0d4e62c..8b9f52e` — 6 valid + 1 nit skipped; round-5 follow-on: 2 valid). The merge brings T-018 (`AddressSpace` kernel object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch) live on `main`. Headline numbers: **226 host tests pass** workspace-wide (was 200 at PR #27 merge; +26 from the T-018 arc — 18 `AddressSpace` cap-wrapper tests from T-018 commits 1–3 + 3 scheduler activation-hook tests + 1 `Task::address_space_handle` round-trip test + 4 review-round regression tests covering cap-derive revocation cascade, depth preflight PMM-leak closure, no-current-task endpoint-state leak, and idle-self deadlock guard). Two parallel side-effects landed in the same arc: (i) commit `8b9f52e` introduced `MmuError::BlockMapped` (additive variant; `Mmu::unmap` returns it instead of `AlreadyMapped` on a block-descriptor walk) + the `CapabilityTable::depth_of` `pub(crate)` preflight helper closing the PMM-leak path in `cap_create_address_space`; (ii) commit `77d3e7e` finished the `.claude/skills/` → `.agents/skills/` migration (deleted the 16-entry duplicate, updated CLAUDE.md / AGENTS.md / 12 live cross-references; the dated review snapshots under `docs/analysis/reviews/code-reviews/2026-05-0{6,7}-*` deliberately retain their `.claude/skills/...` links as point-in-time records). **B3 milestone status:** implementation closed; **next:** B3 closure trio (business + consolidated security + performance baseline), modelled on the [2026-05-09 B2 closure trio](../analysis/reviews/business-reviews/2026-05-09-B2-closure.md). +> **2026-05-14 update — PR #28 merged; T-018 live on `main`; B3 implementation closed; closure trio next.** [PR #28](https://github.com/HodeTech/Tyrne/pull/28) merged into `main` as commit `47b0a86` after five review rounds (round-1: 2 valid; round-2: 1 valid + 1 skipped-with-reason; round-3: 6 valid + 1 skipped; round-4 follow-on: CodeRabbit findings on commits `0d4e62c..8b9f52e` — 6 valid + 1 nit skipped; round-5 follow-on: 2 valid). The merge brings T-018 (`AddressSpace` kernel object + cap-gated `Mmu::map`/`unmap` wrappers + activation-on-context-switch) live on `main`. Headline numbers: **226 host tests pass** workspace-wide (was 200 at PR #27 merge; +26 from the T-018 arc — 18 `AddressSpace` cap-wrapper tests from T-018 commits 1–3 + 3 scheduler activation-hook tests + 1 `Task::address_space_handle` round-trip test + 4 review-round regression tests covering cap-derive revocation cascade, depth preflight PMM-leak closure, no-current-task endpoint-state leak, and idle-self deadlock guard). Two parallel side-effects landed in the same arc: (i) commit `8b9f52e` introduced `MmuError::BlockMapped` (additive variant; `Mmu::unmap` returns it instead of `AlreadyMapped` on a block-descriptor walk) + the `CapabilityTable::depth_of` `pub(crate)` preflight helper closing the PMM-leak path in `cap_create_address_space`; (ii) commit `77d3e7e` finished the `.claude/skills/` → `.agents/skills/` migration (deleted the 16-entry duplicate, updated CLAUDE.md / AGENTS.md / 12 live cross-references; the dated review snapshots under `docs/analysis/reviews/code-reviews/2026-05-0{6,7}-*` deliberately retain their `.claude/skills/...` links as point-in-time records). **B3 milestone status:** implementation closed; **next:** B3 closure trio (business + consolidated security + performance baseline), modelled on the [2026-05-09 B2 closure trio](../analysis/reviews/business-reviews/2026-05-09-B2-closure.md). --- @@ -39,7 +41,7 @@ A short pointer file updated as work progresses. For the full plan see [`phases/ > **2026-05-08 update — B2 prep bundle landed: ADR-0027 Accepted, T-016 Draft, P10 harness live, prior-review path-drift swept.** Three artefacts land together for B2's MMU-activation milestone in a single integration PR (replaces the 3 originally-opened PRs #19 / #20 / #21): > -> 1. **[ADR-0027 (kernel virtual memory layout)](../decisions/0027-kernel-virtual-memory-layout.md) `Accepted` 2026-05-08** (Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../.claude/skills/write-adr/SKILL.md)) with companion [`docs/architecture/memory-management.md`](../architecture/memory-management.md) (design-first, mirroring T-008's `scheduler.md`/`ipc.md` and T-012's `exceptions.md` precedents). The ADR commits to **identity-only mapping in B2** (kernel in `TTBR0_EL1`, `TTBR1_EL1` reserved for the future high-half ADR-0033 placeholder when B5 surfaces per-task `TTBR0_EL1` swap), MAIR indices 0/1 for device-nGnRnE / normal-cached, and a typed `MapperFlush` flush-token discipline at the [`Mmu`](../../hal/src/mmu.rs) trait surface (additive `Result` return type). It is the **first non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../.claude/skills/write-adr/SKILL.md) discipline** (ADR-0026 was the retro-source; ADR-0032 was the first application but its subject is a recovery primitive). [T-016 (MMU activation)](../analysis/tasks/phase-b/T-016-mmu-activation.md) opens at `Draft` in the same commit per [ADR-0025 §Rule 1](../decisions/0025-adr-governance-amendments.md); implementation moves to `In Progress` post-merge. +> 1. **[ADR-0027 (kernel virtual memory layout)](../decisions/0027-kernel-virtual-memory-layout.md) `Accepted` 2026-05-08** (Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../.agents/skills/write-adr/SKILL.md)) with companion [`docs/architecture/memory-management.md`](../architecture/memory-management.md) (design-first, mirroring T-008's `scheduler.md`/`ipc.md` and T-012's `exceptions.md` precedents). The ADR commits to **identity-only mapping in B2** (kernel in `TTBR0_EL1`, `TTBR1_EL1` reserved for the future high-half ADR-0033 placeholder when B5 surfaces per-task `TTBR0_EL1` swap), MAIR indices 0/1 for device-nGnRnE / normal-cached, and a typed `MapperFlush` flush-token discipline at the [`Mmu`](../../hal/src/mmu/mod.rs) trait surface (additive `Result` return type). It is the **first non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../.agents/skills/write-adr/SKILL.md) discipline** (ADR-0026 was the retro-source; ADR-0032 was the first application but its subject is a recovery primitive). [T-016 (MMU activation)](../analysis/tasks/phase-b/T-016-mmu-activation.md) opens at `Draft` in the same commit per [ADR-0025 §Rule 1](../decisions/0025-adr-governance-amendments.md); implementation moves to `In Progress` post-merge. > 2. **[P10 wall-clock benchmark harness](../../tools/perf-harness.sh) live.** First measured baseline at HEAD pre-T-016 (debug build, 20 iterations, 5 s per-run timeout, QEMU TCG): **p10=3.884 ms / p50=4.642 ms / p90=5.584 ms / p99=6.558 ms** with mean 4.711 ms, stddev 0.709 ms — see [`docs/analysis/reports/perf-baseline-2026-05-08-post-pr-19-pre-adr-0027.md`](../analysis/reports/perf-baseline-2026-05-08-post-pr-19-pre-adr-0027.md). The band brackets the previous "~4–6.5 ms typical" anecdote tightly, but it is now a *measured* band on this host rather than an order-of-magnitude observation; B2 perf changes will be evaluated against it. Single-run boot-to-end claims in PR bodies are deprecated per the new [`infrastructure.md` §"Performance harness"](../standards/infrastructure.md#performance-harness) policy. P10 was promoted from "queued" to "load-bearing before B2 ADR-0027 implementation" by the [2026-05-07 multi-axis review §D2](../analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-d-perf.md). > 3. **Path-drift sweep on the 2026-05-06 comprehensive review.** Broken relative-path links across 7 affected track files corrected per the same path-math rules PR #18 applied to the 2026-05-07 review's files (~180 link *instances*; the 2026-05-08 multi-axis review re-counted at 193 instances over 83 unique targets — every broken link resolves post-sweep regardless of the count metric chosen); closes the cross-cutting drift the 2026-05-07 review's gemini disposition recorded as "out-of-scope but follow-up worthy". Mechanical-only sed sweep + realpath validation; no content changes. @@ -50,13 +52,13 @@ A short pointer file updated as work progresses. For the full plan see [`phases/ --- - **Active phase:** B — opened 2026-04-21. **B0 closed 2026-04-27**; **B1 closed 2026-05-07**; **B2 closed 2026-05-09**; **B3 closed 2026-05-14** via PR #29's closure trio (business + security + performance baseline; merge commit `b425dc1`). All four closures lifted `Done` after a verbatim QEMU smoke trace + clean `-d guest_errors` count per the [business master-plan §Acceptance criteria](../analysis/reviews/business-reviews/master-plan.md#acceptance-criteria) rule. **The 2026-04-28 implementation-complete claim for B1 was rolled back on 2026-05-06 by the smoke regression and re-issued 2026-05-07 as a smoke-verified Done** — that remains the only re-open arc to date; B2 and B3 both closed cleanly on first attempt. -- **Active milestone:** **B4 — Task loader.** Opened 2026-05-14 with the ADR-0029 propose commit; gates on ADR-0029 `Accepted` before implementation begins. [phase-b.md §B4](phases/phase-b.md#milestone-b4--task-loader): load a userspace binary into a fresh AS, set entry point + initial SP, produce a `TaskCap`. The binary is statically embedded in the kernel image (`include_bytes!`); filesystem / dynamic loading is Phase C / D. Per [phase-b plan §B4 §4](phases/phase-b.md#milestone-b4--task-loader), the loader produces a task but does **not** run it — running gates on B6's syscall-ABI work via B5. -- **Active task:** **T-019 — Task loader** (In Review on PR #31; branch `t-019-task-loader`; ADR-0029 Accepted 2026-05-14 and merged via PR #30; implementation arc lands across **7 bisectable commits** — see the dated banner above for the commit chain summary). Implementation scope per [T-019 §Acceptance criteria](../analysis/tasks/phase-b/T-019-task-loader.md#acceptance-criteria): `pub fn load_image(image, pmm, mmu, table, as_arena, parent_as_cap, new_rights, image_base_va, stack_size_pages) -> Result` lives in `kernel/src/obj/task_loader.rs`; returns a `LoadedImage { as_cap, entry_va, stack_top_va, image_bytes, stack_bytes }` opaque descriptor — **not** a `CapHandle{CapObject::Task(...)}` (runnability prerequisites — kernel mappings in userspace AS + EL0 context + syscall entry — gate on B5/B6 per phase-b §B4 §Revision-notes); leak-path-closure preflight discipline (every rejectable check before first `pmm.alloc_frame`; cap-side rollback uses `cap_drop(loaded_as_cap)`, not `cap_revoke`, for the freshly-minted leaf cap); typed **10-variant** `LoadError` enum (`InvalidImage` / `InvalidStackSize` / `MisalignedImageBaseVa(VirtAddr)` / `InvalidImageBaseVa { base, end }` / `InvalidParentCap(CapError)` / `FrameBudgetExceeded { needed, available }` / `ImageOverlapsAllocatableMemory` / `AddressSpaceCreationFailed(AddressSpaceError)` / `OutOfFrames` / `MapFailed(AddressSpaceError)`); host tests pin every row of the T-019 §Approach §Simulation table per the [`write-adr` skill row-to-verification mapping discipline](../../.agents/skills/write-adr/SKILL.md#procedure); smoke trace gains exactly one new banner line; no userspace execution (B6 trigger). UNSAFE-2026-0025 / 0026's `Pending QEMU smoke verification` notes lifted via 2026-05-14 Amendments (T-019 BSP wiring is the first runtime exerciser of both paths post-bootstrap); audit entry UNSAFE-2026-0027 opened standalone for the loader's `core::ptr::copy_nonoverlapping` byte-copy site, with 2026-05-15 Amendments recording: (a) the `Pmm::could_yield_pa_overlapping` PA-overlap preflight runtime-enforcing the non-overlap invariant (review-round 1), (b) the `crate::mm::phys_frame_kernel_ptr` helper centralising the identity-mapping invariant for future ADR-0033 high-half migration (review-round 2 F1), (c) the row-3 VA-range preflight + frame-budget reordering and the `intermediate_frame_count` exact-count helper (review-round 3 F1+F2), (d) the row-1 alignment preflight + `MisalignedImageBaseVa(VirtAddr)` variant closing the preventable root-frame-leak path on misaligned `image_base_va` (review-round 4 P2). -- **In review:** **T-019 — Task loader** (PR #31, branch `t-019-task-loader`; commits `911f2ad` + `5711756` + `ae31bc8` + `196d3fb` + `164522d` + `5b1f153` + this commit; awaiting reviewer pass + maintainer Done flip). +- **Active milestone:** **B4 — Task loader (implementation-complete 2026-05-16; closure trio pending).** Opened 2026-05-14 with the ADR-0029 propose commit; ADR-0029 `Accepted` 2026-05-14 (merged via PR #30); T-019 implementation merged to `main` 2026-05-16 via PR #31 (merge commit `7f876af`). [phase-b.md §B4](phases/phase-b.md#milestone-b4--task-loader): load a userspace binary into a fresh AS, set entry point + initial SP, and produce a `LoadedImage` descriptor (the `task_create_from_image` wrapper that turns it into a runnable `TaskCap` gates on B5/B6 per phase-b §B4 §Revision-notes). The binary is statically embedded in the kernel image (`include_bytes!`); filesystem / dynamic loading is Phase C / D. Per [phase-b plan §B4 §4](phases/phase-b.md#milestone-b4--task-loader), the loader produces a populated address space but does **not** run it — running gates on B6's syscall-ABI work via B5. **The B4 closure trio (business + security + performance reviews) has NOT yet fired** — it is the next review trigger, sequenced separately by the maintainer. +- **Active task:** **T-019 — Task loader: Done 2026-05-16** (merged to `main` via PR #31, merge commit `7f876af`; branch `t-019-task-loader` retired; ADR-0029 Accepted 2026-05-14 and merged via PR #30; implementation arc landed across the bisectable commit chain through review-round 6 — see the dated banner above for the chain summary, noting the pre-merge banner pre-dates review-rounds 5–6). Implementation scope per [T-019 §Acceptance criteria](../analysis/tasks/phase-b/T-019-task-loader.md#acceptance-criteria): `pub fn load_image(image, pmm, mmu, table, as_arena, parent_as_cap, new_rights, image_base_va, stack_size_pages) -> Result` lives in `kernel/src/obj/task_loader.rs`; returns a `LoadedImage { as_cap, entry_va, stack_top_va, image_bytes, stack_bytes }` opaque descriptor — **not** a `CapHandle{CapObject::Task(...)}` (runnability prerequisites — kernel mappings in userspace AS + EL0 context + syscall entry — gate on B5/B6 per phase-b §B4 §Revision-notes); leak-path-closure preflight discipline (every rejectable check before first `pmm.alloc_frame`; cap-side rollback uses `cap_drop(loaded_as_cap)`, not `cap_revoke`, for the freshly-minted leaf cap); typed **10-variant** `LoadError` enum (`InvalidImage` / `InvalidStackSize` / `MisalignedImageBaseVa(VirtAddr)` / `InvalidImageBaseVa { base, end }` / `InvalidParentCap(CapError)` / `FrameBudgetExceeded { needed, available }` / `ImageOverlapsAllocatableMemory` / `AddressSpaceCreationFailed(AddressSpaceError)` / `OutOfFrames` / `MapFailed(AddressSpaceError)`); host tests pin every row of the T-019 §Approach §Simulation table per the [`write-adr` skill row-to-verification mapping discipline](../../.agents/skills/write-adr/SKILL.md#procedure); smoke trace gains exactly one new banner line; no userspace execution (B6 trigger). UNSAFE-2026-0025 / 0026's `Pending QEMU smoke verification` notes lifted via 2026-05-14 Amendments (T-019 BSP wiring is the first runtime exerciser of both paths post-bootstrap); audit entry UNSAFE-2026-0027 opened standalone for the loader's `core::ptr::copy_nonoverlapping` byte-copy site, with 2026-05-15 Amendments recording: (a) the `Pmm::could_yield_pa_overlapping` PA-overlap preflight runtime-enforcing the non-overlap invariant (review-round 1), (b) the `crate::mm::phys_frame_kernel_ptr` helper centralising the identity-mapping invariant for future ADR-0033 high-half migration (review-round 2 F1), (c) the row-3 VA-range preflight + frame-budget reordering and the `intermediate_frame_count` exact-count helper (review-round 3 F1+F2), (d) the row-1 alignment preflight + `MisalignedImageBaseVa(VirtAddr)` variant closing the preventable root-frame-leak path on misaligned `image_base_va` (review-round 4 P2). +- **In review:** none. (T-019's PR #31 merged 2026-05-16; the B4 closure trio is pending but is a review *trigger*, not an in-flight task.) - **In progress:** none. -- **Working branch:** development branches off `main` per PR pattern; T-016 lives on `t-016-mmu-activation`. No rebase pending. -- **Last completed milestone:** **B1 — Drop to EL1 + exception infrastructure, closed 2026-05-07** via PR #15 merge (`e9fa019`) and the closure trio that landed in PR #16 (`95b15aa`). Required tasks all Done: T-013 (2026-04-27) + T-012 (2026-04-28) + T-014 (2026-05-07). Headline numbers at B1 closure: 152 host tests (149 → 152 via T-014's +3); kernel image `.text` 21,792 bytes (-116 vs 2026-04-28); QEMU smoke produces full demo trace + boot-to-end timing line; miri 152/152 clean. ADR-0026 Accepted (supersedes ADR-0022's *idle-task-location* axis only; *typed-error* axis stands). UNSAFE-2026-0019 / 0020 gained *partial-verification* + *post-T-014 smoke* Amendments; UNSAFE-2026-0021 gained *no-verification* Amendment (timer-write site unreachable in v1 demo); all three retain `Pending QEMU smoke verification` for the IRQ-dispatch path. Comprehensive code review (2026-05-06) + α/β/γ doc/code-polish PRs closed 7 Track-E blockers + Track-J/A/B/F/G/I non-blockers. **Post-T-015 amendment (2026-05-07, PR #17):** kernel image `.text` is now **22,020 bytes (+228 vs PR-#16 baseline; +112 vs 2026-04-28)** — the 228-byte delta is the new non-generic `ipc_cancel_recv` body (216 bytes) + the cold cancel-call arm in `ipc_recv_and_yield` (12 bytes); `.rodata` and `.bss` byte-identical to the closure-trio re-baseline. See [`2026-05-07-B1-closure.md` §"Post-T-015 amendment"](../analysis/reviews/performance-optimization-reviews/2026-05-07-B1-closure.md) for the per-symbol decomposition. Previous milestone closure: **B0 — Phase A exit hygiene, closed 2026-04-27** via PR #9 merge (`9a66e8b`). -- **Last completed tasks:** **T-018 — Done 2026-05-11, live on `main` 2026-05-14 via PR #28** (branch `t-018-address-space-kernel-object`, merge commit `47b0a86`). T-018 implementation: [`AddressSpace`](../../kernel/src/mm/address_space.rs) kernel-object struct + per-type [`AddressSpaceArena`](../../kernel/src/mm/address_space.rs) (ADR-0016 pattern); `CapKind::AddressSpace` + `CapObject::AddressSpace(AddressSpaceHandle)` variants in [`kernel/src/cap/mod.rs`](../../kernel/src/cap/mod.rs); capability-gated wrappers `cap_create_address_space` / `cap_map` / `cap_unmap` with step-by-step preflights (DERIVE rights → no-widening → depth preflight → arena/cap-table capacity → PMM alloc → arena commit → `cap_derive` cap-table insert); `Task` struct extension with `address_space_handle`; activation-on-context-switch hook threaded through `yield_now` / `start` / `ipc_recv_and_yield` / `ipc_send_and_yield` (closure-as-parameter, fires only when outgoing and incoming task ASes differ — short-circuits in v1's bootstrap-shared topology); BSP wiring in [`bsp-qemu-virt/src/main.rs`](../../bsp-qemu-virt/src/main.rs) wraps the already-live bootstrap root via the new `QemuVirtAddressSpace::from_existing_root` `pub unsafe fn` companion. Cross-cutting additions during the review-round arc: `MmuError::BlockMapped` variant (commit `8b9f52e`) so unmap into a bootstrap block descriptor surfaces a distinct typed error from `AlreadyMapped`; `CapabilityTable::depth_of` `pub(crate)` preflight helper closing the PMM-leak path; UNSAFE-2026-0014 fifth Amendment scope-extends the umbrella to the activation hook + BSP-side activation closure (zero new audit entries — additive scope on the existing `&mut Scheduler` momentary-borrow umbrella). Smoke trace gains one new line `tyrne: address-space-arena ready (1 / 8 slots used; bootstrap AS root = 0x4008d000)` immediately after `tyrne: pmm initialized (...)` and before `tyrne: timer ready (...)`. Full demo runs to `tyrne: all tasks complete`; `-d int,unimp,guest_errors` reports only the pre-existing PL011-disabled-UART noise (unchanged baseline). **Earlier:** T-017 — Done 2026-05-10 (PR #27, branch `t-017-physical-memory-manager`) — Physical Memory Manager (`Pmm` bitmap allocator + `FrameProvider` trait + UNSAFE-2026-0026 zero-fill audit). **Earlier:** T-016 — Done 2026-05-08 (branch `t-016-mmu-activation`) — MMU activation, VMSAv8 descriptor encoders, `MapperFlush` flush-token, UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced. **Earlier:** T-015 — Done 2026-05-07 (PR #17, branch `t-015-endpoint-rollback-cancel-recv`) — `ipc_cancel_recv` recovery primitive + symmetric scheduler+endpoint rollback in `ipc_recv_and_yield`'s Phase 2 Deadlock branch (ADR-0032). **Earlier:** T-014 (2026-05-07 via PR #15), T-012 (2026-04-28 via PR #10), T-013 (2026-04-27 via PR #9). +- **Working branch:** none / awaiting B4 closure trio. Development branches off `main` per the PR pattern; no task branch is currently active and no rebase is pending. +- **Last completed milestone:** **B4 — Task loader, implementation-complete 2026-05-16** via PR #31 merge (`7f876af`); the **B4 closure trio (business + security + performance) has NOT yet fired** — until it does, B4 is "implementation-complete", not formally "Closed" in the sense B0–B3 were. The closure trio is the next review trigger (see "Next review trigger" below); the maintainer sequences it separately. Required task Done: T-019 (Done 2026-05-16). Headline numbers at merge: **260 host tests** (42 hal + 175 kernel + 43 test-hal); QEMU smoke produces the full demo trace through `tyrne: all tasks complete` with the new `tyrne: image loaded (...)` line; miri 260/260 clean; `-d int,unimp,guest_errors` shows only the pre-existing 629 PL011-disabled-UART warnings. ADR-0029 Accepted (2026-05-14); UNSAFE-2026-0027 opened standalone for the loader byte-copy; UNSAFE-2026-0025 / 0026 lifted their `Pending QEMU smoke verification` notes via 2026-05-14 Amendments (T-019 is the first runtime exerciser of both). **Previous milestone closures (fully Closed with trios):** **B3 — Address space abstraction, closed 2026-05-14** via PR #29's closure trio (merge commit `b425dc1`); **B2 — MMU activation, closed 2026-05-09** via its closure trio; **B1 — Drop to EL1 + exception infrastructure, closed 2026-05-07** via PR #15 merge (`e9fa019`) + the PR #16 closure trio (`95b15aa`); **B0 — Phase A exit hygiene, closed 2026-04-27** via PR #9 merge (`9a66e8b`). +- **Last completed tasks:** **T-019 — Done 2026-05-16, merged to `main` via PR #31** (branch `t-019-task-loader`, merge commit `7f876af`) — Task loader: `load_image` produces a `LoadedImage` descriptor of a freshly populated userspace AS (10-variant `LoadError`, leak-path-closure preflight chain, UNSAFE-2026-0027 byte-copy entry); does **not** mint a runnable `TaskCap` (B5/B6 prerequisite). **Earlier:** **T-018 — Done 2026-05-11, live on `main` 2026-05-14 via PR #28** (branch `t-018-address-space-kernel-object`, merge commit `47b0a86`). T-018 implementation: [`AddressSpace`](../../kernel/src/mm/address_space.rs) kernel-object struct + per-type [`AddressSpaceArena`](../../kernel/src/mm/address_space.rs) (ADR-0016 pattern); `CapKind::AddressSpace` + `CapObject::AddressSpace(AddressSpaceHandle)` variants in [`kernel/src/cap/mod.rs`](../../kernel/src/cap/mod.rs); capability-gated wrappers `cap_create_address_space` / `cap_map` / `cap_unmap` with step-by-step preflights (DERIVE rights → no-widening → depth preflight → arena/cap-table capacity → PMM alloc → arena commit → `cap_derive` cap-table insert); `Task` struct extension with `address_space_handle`; activation-on-context-switch hook threaded through `yield_now` / `start` / `ipc_recv_and_yield` / `ipc_send_and_yield` (closure-as-parameter, fires only when outgoing and incoming task ASes differ — short-circuits in v1's bootstrap-shared topology); BSP wiring in [`bsp-qemu-virt/src/main.rs`](../../bsp-qemu-virt/src/main.rs) wraps the already-live bootstrap root via the new `QemuVirtAddressSpace::from_existing_root` `pub unsafe fn` companion. Cross-cutting additions during the review-round arc: `MmuError::BlockMapped` variant (commit `8b9f52e`) so unmap into a bootstrap block descriptor surfaces a distinct typed error from `AlreadyMapped`; `CapabilityTable::depth_of` `pub(crate)` preflight helper closing the PMM-leak path; UNSAFE-2026-0014 fifth Amendment scope-extends the umbrella to the activation hook + BSP-side activation closure (zero new audit entries — additive scope on the existing `&mut Scheduler` momentary-borrow umbrella). Smoke trace gains one new line `tyrne: address-space-arena ready (1 / 8 slots used; bootstrap AS root = 0x4008d000)` immediately after `tyrne: pmm initialized (...)` and before `tyrne: timer ready (...)`. Full demo runs to `tyrne: all tasks complete`; `-d int,unimp,guest_errors` reports only the pre-existing PL011-disabled-UART noise (unchanged baseline). **Earlier:** T-017 — Done 2026-05-10 (PR #27, branch `t-017-physical-memory-manager`) — Physical Memory Manager (`Pmm` bitmap allocator + `FrameProvider` trait + UNSAFE-2026-0026 zero-fill audit). **Earlier:** T-016 — Done 2026-05-08 (branch `t-016-mmu-activation`) — MMU activation, VMSAv8 descriptor encoders, `MapperFlush` flush-token, UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced. **Earlier:** T-015 — Done 2026-05-07 (PR #17, branch `t-015-endpoint-rollback-cancel-recv`) — `ipc_cancel_recv` recovery primitive + symmetric scheduler+endpoint rollback in `ipc_recv_and_yield`'s Phase 2 Deadlock branch (ADR-0032). **Earlier:** T-014 (2026-05-07 via PR #15), T-012 (2026-04-28 via PR #10), T-013 (2026-04-27 via PR #9). - **Last reviews:** - [B1 closure retrospective (2026-05-07)](../analysis/reviews/business-reviews/2026-05-07-B1-closure.md) — fresh closure trio replacing the 2026-04-28 trio's load-bearing role - [B1 closure consolidated security review (2026-05-07)](../analysis/reviews/security-reviews/2026-05-07-B1-closure.md) — Approve, eight axes pass @@ -81,10 +83,10 @@ A short pointer file updated as work progresses. For the full plan see [`phases/ - [ADR-0024 — EL drop to EL1 policy](../decisions/0024-el-drop-policy.md) — `Accepted` (2026-04-27). Implemented by T-013 (Done 2026-04-27). First ADR to use ADR-0025's *Dependency chain* section in production; same-day Accept after careful re-read per [ADR-0025 §Revision notes](../decisions/0025-adr-governance-amendments.md) (cool-down rule withdrawn pre-Accept). - [ADR-0025 — ADR governance amendments](../decisions/0025-adr-governance-amendments.md) — `Accepted` (2026-04-27). Two normative rules for ADR drafting: (§Rule 1) every forward-reference points at a real T-NNN, (§Rule 2) riders are not failures — their *frequency* is the signal. Cool-down rule withdrawn pre-Accept on maintainer feedback; substance preserved in the write-adr skill's careful-re-read step. - [ADR-0026 — Idle dispatch via separate fallback slot](../decisions/0026-idle-dispatch-fallback.md) — `Accepted` (2026-05-06). Supersedes ADR-0022's *idle-task-location* axis only (Option A → Option B: dedicated `Scheduler::idle: Option` slot, dispatched via `ready.dequeue().or(s.idle)` only when the ready queue is empty). ADR-0022's *typed-error* axis (Option G — `SchedError::Deadlock` + `IpcError::PendingAfterResume` + `start`'s panic) stands. Implemented by T-014 (Done 2026-05-07). Includes a queue-state simulation table that ADR-0022 lacked; this discipline (simulation tables on multi-step state-machine ADRs) is the central learning of the [B1 smoke-regression arc](../analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md). - - [ADR-0032 — Endpoint state rollback + `ipc_cancel_recv` primitive](../decisions/0032-endpoint-rollback-and-cancel-recv.md) — `Accepted` (2026-05-07). Adds a recovery primitive that reverses an `Idle → RecvWaiting` transition, called by `ipc_recv_and_yield`'s Phase 2 Deadlock branch so both *scheduler* and *endpoint* state restore to pre-call shape on `SchedError::Deadlock`. Kernel-internal in v1 (no userspace caller); future consumers are the userspace-driven endpoint destroy drain (B2+), multi-waiter wake (ADR-0019 §Open questions), and preemption-rollback (B5+). Implemented by T-015 (Done 2026-05-07). Includes a Phase-2 Deadlock simulation table; ADR-0017 §Revision notes rider records the additive recovery primitive (user-observable surface unchanged). The Accept commit is the first project-side application of [`write-adr` skill](../../.claude/skills/write-adr/SKILL.md) step 10's *careful re-read* discipline as a separate diff from the Propose commit. - - [ADR-0027 — Kernel virtual memory layout (B2 — identity-mapped MMU activation)](../decisions/0027-kernel-virtual-memory-layout.md) — **`Accepted` (2026-05-08)**. B2 commits to identity-only mapping (kernel in `TTBR0_EL1`; `TTBR1_EL1` reserved with `EPD1=1` for future high-half ADR-0033 placeholder when B5 surfaces per-task `TTBR0_EL1` swap), 4 KiB granule + 48-bit VA + 4-level translation, MAIR indices 0/1 for device-nGnRnE / normal-cached, four bootstrap page-table frames in a new `.boot_pt` section, and a typed [`MapperFlush`](../../hal/src/mmu.rs) flush-token discipline at the `Mmu` trait surface (additive change to `map`/`unmap` return types, recorded in ADR-0009 §Revision notes rider via T-016). Includes a five-row §Simulation table walking the SCTLR.M=1 transition (Steps 0–4). **First non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../.claude/skills/write-adr/SKILL.md) discipline** — ADR-0026's table was the empirical retro-source; ADR-0032's table was the first application but its subject is a recovery primitive; ADR-0027 is the first productive-design state machine to use the rule. Implementation: T-016 (Draft, opens with the Propose commit). Accept landed as a separate commit (`bb0a6ba`) per `write-adr` §10. -- **Next task to open:** **B4 milestone closure + B5 syscall-ABI ADR pair (ADR-0030 + ADR-0031)**. T-019 (above) closes B4's implementation half on merge; the B4 closure trio (business retro + consolidated security review + performance baseline) lands per the B3 closure precedent. The next implementation thread is the [Phase B §B4 §3 `task_create_from_image`](phases/phase-b.md#milestone-b4--task-loader) surface (which currently gates on B5/B6 — `LoadedImage` exists but is not yet wrapped into a runnable `CapHandle{CapObject::Task(...)}`); the B5 syscall ABI per ADR-0030 is the prerequisite, followed by B6 (first userspace "hello"). The B-phase plan in [phase-b.md §B5](phases/phase-b.md) describes the milestone shape. -- **Next review trigger:** **B4 milestone closure trio.** Produced when T-019 reaches `Done` and B4 reaches "implementation-complete". The trio shape mirrors the [2026-05-14 B3 closure](../analysis/reviews/business-reviews/2026-05-14-B3-closure.md): business retrospective + consolidated security review + performance baseline. Possible interim triggers: a mini-retro if T-019's loader implementation surfaces a learning worth capturing mid-arc; a maintainer-initiated review if a non-trivial follow-up surfaces before B4 implementation starts (e.g., the deferred B5+ MemoryRegion cap variant + per-operation rights set extension ADR — the T-018 review-round arc's F2 carry-forward — could escalate into an interim review if B4's loader exposes the gap operationally). Forward-flag audit notes (UNSAFE-2026-0019 / 0020 / 0021 / 0025 / 0026 `Pending QEMU smoke verification`) gate on T-019's smoke output: 0025 / 0026 likely lift via Amendment if T-019 exercises a real `cap_map` / `cap_create_address_space` call post-bootstrap; 0019 / 0020 / 0021 continue to gate on the first deadline-arming caller (B5+). + - [ADR-0032 — Endpoint state rollback + `ipc_cancel_recv` primitive](../decisions/0032-endpoint-rollback-and-cancel-recv.md) — `Accepted` (2026-05-07). Adds a recovery primitive that reverses an `Idle → RecvWaiting` transition, called by `ipc_recv_and_yield`'s Phase 2 Deadlock branch so both *scheduler* and *endpoint* state restore to pre-call shape on `SchedError::Deadlock`. Kernel-internal in v1 (no userspace caller); future consumers are the userspace-driven endpoint destroy drain (B2+), multi-waiter wake (ADR-0019 §Open questions), and preemption-rollback (B5+). Implemented by T-015 (Done 2026-05-07). Includes a Phase-2 Deadlock simulation table; ADR-0017 §Revision notes rider records the additive recovery primitive (user-observable surface unchanged). The Accept commit is the first project-side application of [`write-adr` skill](../../.agents/skills/write-adr/SKILL.md) step 10's *careful re-read* discipline as a separate diff from the Propose commit. + - [ADR-0027 — Kernel virtual memory layout (B2 — identity-mapped MMU activation)](../decisions/0027-kernel-virtual-memory-layout.md) — **`Accepted` (2026-05-08)**. B2 commits to identity-only mapping (kernel in `TTBR0_EL1`; `TTBR1_EL1` reserved with `EPD1=1` for future high-half ADR-0033 placeholder when B5 surfaces per-task `TTBR0_EL1` swap), 4 KiB granule + 48-bit VA + 4-level translation, MAIR indices 0/1 for device-nGnRnE / normal-cached, four bootstrap page-table frames in a new `.boot_pt` section, and a typed [`MapperFlush`](../../hal/src/mmu/mod.rs) flush-token discipline at the `Mmu` trait surface (additive change to `map`/`unmap` return types, recorded in ADR-0009 §Revision notes rider via T-016). Includes a five-row §Simulation table walking the SCTLR.M=1 transition (Steps 0–4). **First non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../.agents/skills/write-adr/SKILL.md) discipline** — ADR-0026's table was the empirical retro-source; ADR-0032's table was the first application but its subject is a recovery primitive; ADR-0027 is the first productive-design state machine to use the rule. Implementation: T-016 (Draft, opens with the Propose commit). Accept landed as a separate commit (`bb0a6ba`) per `write-adr` §10. +- **Next task to open:** **B4 milestone closure trio (now due) + B5 syscall-ABI ADR pair (ADR-0030 + ADR-0031)**. T-019 merged 2026-05-16, closing B4's implementation half; the B4 closure trio (business retro + consolidated security review + performance baseline) is now due per the B3 closure precedent but has **not** yet fired (the maintainer sequences it separately). The next implementation thread is the [Phase B §B4 §3 `task_create_from_image`](phases/phase-b.md#milestone-b4--task-loader) surface (which currently gates on B5/B6 — `LoadedImage` exists but is not yet wrapped into a runnable `CapHandle{CapObject::Task(...)}`); the B5 syscall ABI per ADR-0030 is the prerequisite, followed by B6 (first userspace "hello"). The B-phase plan in [phase-b.md §B5](phases/phase-b.md) describes the milestone shape. +- **Next review trigger:** **B4 milestone closure trio — now due (not yet fired).** T-019 reached `Done` and B4 reached "implementation-complete" at the 2026-05-16 merge, so the trio is now the active review trigger; it has **not** yet been produced. The trio shape mirrors the [2026-05-14 B3 closure](../analysis/reviews/business-reviews/2026-05-14-B3-closure.md): business retrospective + consolidated security review + performance baseline. Possible interim triggers: a mini-retro if a B4 follow-up surfaces a learning worth capturing; a maintainer-initiated review if a non-trivial follow-up surfaces (e.g., the deferred B5+ MemoryRegion cap variant + per-operation rights set extension ADR — the T-018 review-round arc's F2 carry-forward). Forward-flag audit notes: UNSAFE-2026-0025 / 0026's `Pending QEMU smoke verification` notes were lifted via Amendment by T-019 (first post-bootstrap `cap_map` / `cap_create_address_space` runtime exerciser); UNSAFE-2026-0019 / 0020 / 0021 continue to gate on the first deadline-arming caller (B5+). ## Notes diff --git a/docs/roadmap/phases/phase-b.md b/docs/roadmap/phases/phase-b.md index dcdb83e..78b0a7d 100644 --- a/docs/roadmap/phases/phase-b.md +++ b/docs/roadmap/phases/phase-b.md @@ -24,7 +24,7 @@ Cleans up the items the 2026-04-21 Phase-A code and security reviews surfaced. E 1. **ADR-0021 — Raw-pointer scheduler API.** Reshape `Scheduler::ipc_send_and_yield` / `Scheduler::ipc_recv_and_yield` so no `&mut` reference to `SCHED` / `EP_ARENA` / `IPC_QUEUES` / `TABLE_*` is live across the cooperative context switch. Resolves UNSAFE-2026-0012 (Security review §1 / §3 blocker #1). 2. **ADR-0022 — Idle task + typed `SchedError::Deadlock`.** Register a kernel idle task at boot so the ready queue is never empty; convert the `panic!("deadlock: …")` at [`kernel/src/sched/mod.rs:388-395`](../../../kernel/src/sched/mod.rs#L388-L395) to a typed error. Bundle: also convert `Scheduler::start`'s empty-queue panic at [sched/mod.rs:246-253](../../../kernel/src/sched/mod.rs#L246-L253) to `Err(SchedError::QueueEmpty)`; also harden the `debug_assert!` in the `ipc_recv_and_yield` resume path at [sched/mod.rs:417-421](../../../kernel/src/sched/mod.rs#L417-L421) to a release-mode `Err(...)`. Security review §4; code review §Correctness (Scheduler bullets 2, 4). 3. **ADR-0023 — Cross-table capability revocation policy.** Record the v1 single-table scope of the "Revocation is transitive" invariant (already qualified in [`docs/architecture/security-model.md`](../../architecture/security-model.md) by commit `de66d68`). 🚩 **Decision:** accept-deferred (option a; recommended — no code work, document the limitation and push cross-table CDT to Phase C) vs. implement-now (option b; substantial storage + IPC rewiring, only justified if a multi-task server appears in B3–B6 that needs post-transfer revocation). -4. **Architecture docs × 3** via the [`write-architecture-doc`](../../../.claude/skills/write-architecture-doc/SKILL.md) skill: `docs/architecture/kernel-objects.md` (ADR-0016 + Arena pattern), `docs/architecture/ipc.md` (ADR-0017 + ADR-0018 + state machine), `docs/architecture/scheduler.md` (ADR-0019 + ADR-0020 + IPC bridge + UNSAFE-2026-0008). Code review §Documentation follow-up #2. +4. **Architecture docs × 3** via the [`write-architecture-doc`](../../../.agents/skills/write-architecture-doc/SKILL.md) skill: `docs/architecture/kernel-objects.md` (ADR-0016 + Arena pattern), `docs/architecture/ipc.md` (ADR-0017 + ADR-0018 + state machine), `docs/architecture/scheduler.md` (ADR-0019 + ADR-0020 + IPC bridge + UNSAFE-2026-0008). Code review §Documentation follow-up #2. 5. **Timer initialisation** — populate `QemuVirtCpu`'s `Timer` trait impl with `CNTVCT_EL0` (virtual counter, register-family-aligned with the deferred `CNTV_*` deadline-arming registers per ADR-0010) and `CNTFRQ_EL0` reads; wire a free-running counter so IPC round-trip latency and context-switch overhead can be measured. Unlocks the first hypothesis-driven performance-review cycle (baseline at [`2026-04-21-A6-baseline.md`](../../analysis/reviews/performance-optimization-reviews/2026-04-21-A6-baseline.md) is blocked on this). *Note: the original phase-plan wording said "CNTPCT_EL0"; T-009 second-read review surfaced the register-family mismatch and switched to `CNTVCT_EL0`.* 6. **Scheduler / IPC hardening bundle.** Grouped in T-010 with ADR-0022's implementation: - `const { assert!(N > 0) }` on `SchedQueue::new` and `CapabilityTable::new` so zero-capacity constructions are a build-time error, matching `Arena::new`'s pattern. @@ -108,11 +108,11 @@ The scope of this milestone was extended on 2026-04-27 (after T-009 — the time Turn on the MMU with an identity map for the kernel image region and its stack. This is the foundation that per-task address spaces will layer atop. -**Status: B2 Closed 2026-05-09** via [the closure-trio](../../analysis/reviews/business-reviews/2026-05-09-B2-closure.md) ([business retro](../../analysis/reviews/business-reviews/2026-05-09-B2-closure.md) + [security review](../../analysis/reviews/security-reviews/2026-05-09-B2-closure.md) + [performance baseline](../../analysis/reviews/performance-optimization-reviews/2026-05-09-B2-closure.md)). ADR-0027 `Accepted` 2026-05-08; T-016 (MMU activation) `Done` 2026-05-08 (PR #23 merged 2026-05-09). The closure-trio confirms: 185/185 host + miri clean; release ELF `.text 22,384` (+364 vs post-T-015) / `.bss 40,208` (+17,952; dominantly the 16 KiB `.boot_pt` reservation); release-build harness band p10/p50/p90 = 4.262/4.642/6.456 ms (the first release-codegen baseline-of-record); UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced with bootstrap-Amendments + 2026-05-09 smoke-verification Amendments; smoke-trace adds exactly one new `tyrne: mmu activated` line (every other line byte-stable). **Carry-forward (post-closure):** UNSAFE-2026-0019 / 0020 / 0021 retain `Pending QEMU smoke verification` for the IRQ-take / dispatch path (gates on first deadline-arming caller); UNSAFE-2026-0025 gains a similar status note (gates on first B3+ post-bootstrap `Mmu::map` caller); pre-existing PL011 "data written to disabled UART" guest-errors noise queued as a follow-on B-phase BSP task. **Original status text (preserved as historical record):** [ADR-0027](../../decisions/0027-kernel-virtual-memory-layout.md) committed to identity-only mapping in B2 (kernel in `TTBR0_EL1`; `TTBR1_EL1` reserved for future high-half ADR-0033 placeholder), MAIR indices 0/1 for device-nGnRnE / normal-cached, four bootstrap page-table frames in `.boot_pt`, and the typed [`MapperFlush`](../../../hal/src/mmu/mod.rs) flush-token discipline at the `Mmu` trait surface (additive change to `map`/`unmap` return types). Companion [`docs/architecture/memory-management.md`](../../architecture/memory-management.md) landed in the same PR. ADR-0027 is the **first non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../../.claude/skills/write-adr/SKILL.md) discipline** (ADR-0026 was the retro-source; ADR-0032 was the first application but its subject is a recovery primitive). Accept landed as a separate commit per `write-adr` §10. T-016 implementation lands across six independently-bisectable commits; smoke trace gains a single new line (`tyrne: mmu activated`) and is otherwise byte-stable. UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced; UNSAFE-2026-0023 / 0024 each carry an Amendment block recording the bootstrap-site scope extension. +**Status: B2 Closed 2026-05-09** via [the closure-trio](../../analysis/reviews/business-reviews/2026-05-09-B2-closure.md) ([business retro](../../analysis/reviews/business-reviews/2026-05-09-B2-closure.md) + [security review](../../analysis/reviews/security-reviews/2026-05-09-B2-closure.md) + [performance baseline](../../analysis/reviews/performance-optimization-reviews/2026-05-09-B2-closure.md)). ADR-0027 `Accepted` 2026-05-08; T-016 (MMU activation) `Done` 2026-05-08 (PR #23 merged 2026-05-09). The closure-trio confirms: 185/185 host + miri clean; release ELF `.text 22,384` (+364 vs post-T-015) / `.bss 40,208` (+17,952; dominantly the 16 KiB `.boot_pt` reservation); release-build harness band p10/p50/p90 = 4.262/4.642/6.456 ms (the first release-codegen baseline-of-record); UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced with bootstrap-Amendments + 2026-05-09 smoke-verification Amendments; smoke-trace adds exactly one new `tyrne: mmu activated` line (every other line byte-stable). **Carry-forward (post-closure):** UNSAFE-2026-0019 / 0020 / 0021 retain `Pending QEMU smoke verification` for the IRQ-take / dispatch path (gates on first deadline-arming caller); UNSAFE-2026-0025 gains a similar status note (gates on first B3+ post-bootstrap `Mmu::map` caller); pre-existing PL011 "data written to disabled UART" guest-errors noise queued as a follow-on B-phase BSP task. **Original status text (preserved as historical record):** [ADR-0027](../../decisions/0027-kernel-virtual-memory-layout.md) committed to identity-only mapping in B2 (kernel in `TTBR0_EL1`; `TTBR1_EL1` reserved for future high-half ADR-0033 placeholder), MAIR indices 0/1 for device-nGnRnE / normal-cached, four bootstrap page-table frames in `.boot_pt`, and the typed [`MapperFlush`](../../../hal/src/mmu/mod.rs) flush-token discipline at the `Mmu` trait surface (additive change to `map`/`unmap` return types). Companion [`docs/architecture/memory-management.md`](../../architecture/memory-management.md) landed in the same PR. ADR-0027 is the **first non-recovery-primitive state-machine ADR drafted under [`write-adr` skill §Simulation](../../../.agents/skills/write-adr/SKILL.md) discipline** (ADR-0026 was the retro-source; ADR-0032 was the first application but its subject is a recovery primitive). Accept landed as a separate commit per `write-adr` §10. T-016 implementation lands across six independently-bisectable commits; smoke trace gains a single new line (`tyrne: mmu activated`) and is otherwise byte-stable. UNSAFE-2026-0022 / 0023 / 0024 / 0025 introduced; UNSAFE-2026-0023 / 0024 each carry an Amendment block recording the bootstrap-site scope extension. ### Sub-breakdown -1. ✅ **[ADR-0027 — Kernel virtual memory layout](../../decisions/0027-kernel-virtual-memory-layout.md)** *(Accepted 2026-05-08; Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../../.claude/skills/write-adr/SKILL.md))*. Settled choice: identity-only mapping in B2 (`TTBR0_EL1` carries the kernel; `TTBR1_EL1` reserved with `EPD1=1` for future high-half ADR-0033 placeholder); 4 KiB granule + 48-bit VA + 4-level translation; MAIR indices 0 (device-nGnRnE) and 1 (normal-cached, write-back, write-allocate, inner+outer shareable); four bootstrap page-table frames in `.boot_pt` (statically reserved, pre-zeroed by the BSS-zero loop); typed [`MapperFlush`](../../../hal/src/mmu.rs) flush-token discipline (additive `Result` return type for `Mmu::map` / `unmap`). Includes the §Simulation table walking the SCTLR.M=1 transition end-to-end — first ADR to apply the rule forward. +1. ✅ **[ADR-0027 — Kernel virtual memory layout](../../decisions/0027-kernel-virtual-memory-layout.md)** *(Accepted 2026-05-08; Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../../.agents/skills/write-adr/SKILL.md))*. Settled choice: identity-only mapping in B2 (`TTBR0_EL1` carries the kernel; `TTBR1_EL1` reserved with `EPD1=1` for future high-half ADR-0033 placeholder); 4 KiB granule + 48-bit VA + 4-level translation; MAIR indices 0 (device-nGnRnE) and 1 (normal-cached, write-back, write-allocate, inner+outer shareable); four bootstrap page-table frames in `.boot_pt` (statically reserved, pre-zeroed by the BSS-zero loop); typed [`MapperFlush`](../../../hal/src/mmu/mod.rs) flush-token discipline (additive `Result` return type for `Mmu::map` / `unmap`). Includes the §Simulation table walking the SCTLR.M=1 transition end-to-end — first ADR to apply the rule forward. 2. ✅ **[T-016 — MMU activation with identity-mapped kernel + `MapperFlush` token discipline](../../analysis/tasks/phase-b/T-016-mmu-activation.md)** *(Done 2026-05-08; six bisectable commits on branch `t-016-mmu-activation`)*. Bundled task (mirrors T-012 shape) covering: HAL `MapperFlush` token + ADR-0009 §Revision rider; pure VMSAv8 descriptor encoders in [`tyrne_hal::mmu::vmsav8`](../../../hal/src/mmu/vmsav8.rs) (host-tested); `QemuVirtMmu` impl in [`bsp-qemu-virt/src/mmu.rs`](../../../bsp-qemu-virt/src/mmu.rs); `linker.ld` `.boot_pt` reservation; `mmu_bootstrap` Rust routine in [`bsp-qemu-virt/src/mmu_bootstrap.rs`](../../../bsp-qemu-virt/src/mmu_bootstrap.rs); `kernel_entry` wiring; four audit-log entries (UNSAFE-2026-0022 through 0025) + Amendments to 0023/0024 for bootstrap-site scope extension; cross-references to [`docs/architecture/memory-management.md`](../../architecture/memory-management.md) verified byte-stable. Smoke trace gained one new `tyrne: mmu activated` line; every other line byte-stable from post-T-015 baseline. Host tests: 182/182 (was 159 — +12 vmsav8 + 6 MapperFlush + 5 round-trip-update). 3. **Initial page-table construction (covered by T-016).** Bootstrap-time identity-mapping of kernel image + RAM range (128 MiB at 0x4000_0000..0x4800_0000) + MMIO range (GIC + UART at 0x0800_0000..0x0902_0000); 2 MiB block descriptors at L2 keep the bootstrap to four page-table frames. Finer-grained per-section permissions (`.text` RX vs `.rodata` R vs `.bss/.data` RW) await a follow-on B-phase task that re-maps the kernel-image region into 4 KiB pages with section-specific flags — out of scope for T-016. 4. **MMU activation sequence (covered by T-016).** Exact `MAIR_EL1` / `TCR_EL1` / `TTBR0_EL1` / `TTBR1_EL1` / `SCTLR_EL1` writes per ADR-0027 §Decision outcome (a) + §Simulation. TLB + I-cache invalidate + barrier sequence (`TLBI VMALLE1; DSB ISH; IC IALLU; DSB ISH; ISB; SCTLR_EL1.{M,I,C} = 1; ISB`) lands as audit-tag UNSAFE-2026-0024. @@ -122,7 +122,7 @@ Turn on the MMU with an identity map for the kernel image region and its stack. ### Acceptance criteria -- ADR-0027 Accepted (separate Accept commit per [`write-adr` skill §10](../../../.claude/skills/write-adr/SKILL.md)). +- ADR-0027 Accepted (separate Accept commit per [`write-adr` skill §10](../../../.agents/skills/write-adr/SKILL.md)). - T-016 Done: kernel runs with the MMU on; identity-mapped kernel + RAM + MMIO; `MapperFlush` token discipline at the `Mmu` trait surface; UNSAFE-2026-0022 through 0025 audit entries land with full Operation / Invariants / Rejected-alternatives shape. - Smoke trace gains exactly one new line (`tyrne: mmu activated`); otherwise byte-stable; `-d int,unimp,guest_errors` empty. - (Subsequent B2 tasks, separate from T-016) Physical frame allocator has host-tested correctness and a QEMU integration smoke; `MemoryRegionCap` first real use; deliberate traps route through the exception-vector table. @@ -145,7 +145,7 @@ Multiple per-task translation tables. Capability-gated map / unmap. Activation o ### Sub-breakdown -1. ✅ **ADR-0035 — Physical Memory Manager (bitmap allocator).** B3 prerequisite. Settles allocation discipline + reservation tracking + `FrameProvider` impl shape. Includes the §Simulation table walking init / alloc / free / exhaustion / recovery state transitions. Forward-portable to high-half kernel (ADR-0033 placeholder) without algorithm rewrite. *Accepted 2026-05-09 (Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../../.claude/skills/write-adr/SKILL.md)).* +1. ✅ **ADR-0035 — Physical Memory Manager (bitmap allocator).** B3 prerequisite. Settles allocation discipline + reservation tracking + `FrameProvider` impl shape. Includes the §Simulation table walking init / alloc / free / exhaustion / recovery state transitions. Forward-portable to high-half kernel (ADR-0033 placeholder) without algorithm rewrite. *Accepted 2026-05-09 (Propose + careful-re-read separate-commit pair per [`write-adr` skill §10](../../../.agents/skills/write-adr/SKILL.md)).* 2. ✅ **ADR-0028 — Address-space data structure.** How a BSP-specific `AddressSpace` is represented; who owns its page tables; how it integrates with the `Mmu` trait's associated type. **Sits above ADR-0035** — consumes PMM frames for the root translation table + intermediate L1/L2/L3 frames via [`Mmu::map`](../../../hal/src/mmu/mod.rs)'s `&mut dyn FrameProvider`. *Accepted 2026-05-11 via PR #27.* 3. ✅ **`AddressSpace` kernel object** — a new kernel-object type, like those from A3, with `AddressSpaceCap`. *T-018 commits 1-2 (2026-05-11).* 4. ✅ **Map / unmap operations** — wrappers around [`Mmu::map`](../../../hal/src/mmu/mod.rs) / `Mmu::unmap` that validate the caller's capabilities. *T-018 commit 3 (2026-05-11) — `cap_create_address_space` / `cap_map` / `cap_unmap`.* @@ -175,17 +175,19 @@ Multiple per-task translation tables. Capability-gated map / unmap. Activation o Load a userspace binary into an address space. For B4 the binary is statically embedded in the kernel image (e.g., `include_bytes!`); the filesystem / dynamic loading comes later. +**Status: B4 implementation-complete 2026-05-16 (T-019 merged via PR #31; closure trio pending).** ADR-0029 Accepted; T-019 (task loader) merged to `main`. The closure trio (business + security + performance) is the next review trigger, following the B3 precedent. The §Sub-breakdown / §Acceptance-criteria / §Revision-notes below stand as the design record; the past-tense framing reflects that the implementation has landed. + ### Sub-breakdown -1. **ADR-0029 — Initial userspace image format.** Raw flat binary vs. minimal ELF subset. v1 favours raw flat (simplest). -2. **Loader** — maps the embedded binary into a fresh address space via `cap_create_address_space` + `cap_map` (the AddressSpace cap surface from [ADR-0028](../../decisions/0028-address-space-data-structure.md)), sets up the initial stack, marks the entry point. The `MemoryRegionCap` shape for per-frame ownership tracking is **deferred to B5+** per the [B3 closure §Adjustments](../../analysis/reviews/business-reviews/2026-05-14-B3-closure.md#adjustments); v1's loader operates with kernel-mode authority through the AS cap and accepts the rollback-leaks-frames v1 baseline T-019 documents. -3. **Task creation from a binary** — `task_create_from_image(image, as_cap, initial_caps) -> TaskCap`. -4. **Tests** — host-side loader correctness (given an image blob, produce the expected mapping); QEMU-side task creation without yet running the task (that's B6). +1. ✅ **[ADR-0029 — Initial userspace image format](../../decisions/0029-initial-userspace-image-format.md)** *(Accepted)*. Raw flat binary vs. minimal ELF subset. v1 favoured raw flat (simplest). +2. ✅ **Loader** — maps the embedded binary into a fresh address space via `cap_create_address_space` + `cap_map` (the AddressSpace cap surface from [ADR-0028](../../decisions/0028-address-space-data-structure.md)), sets up the initial stack, marks the entry point. The `MemoryRegionCap` shape for per-frame ownership tracking is **deferred to B5+** per the [B3 closure §Adjustments](../../analysis/reviews/business-reviews/2026-05-14-B3-closure.md#adjustments); v1's loader operates with kernel-mode authority through the AS cap and accepts the rollback-leaks-frames v1 baseline T-019 documents. +3. **Task creation from a binary** — `task_create_from_image(image, as_cap, initial_caps) -> TaskCap`. *Deferred to B5/B6 per the §Revision notes below; T-019 delivered the `LoadedImage` metadata struct half.* +4. ✅ **Tests** — host-side loader correctness (given an image blob, produce the expected mapping); QEMU-side task creation without yet running the task (that's B6). ### Acceptance criteria -- ADR-0029 Accepted. -- A kernel test can load the embedded userspace image into an address space and report the entry point and initial stack pointer. +- ✅ ADR-0029 Accepted. +- ✅ A kernel test loads the embedded userspace image into an address space and reports the entry point and initial stack pointer (the `LoadedImage` shape — see §Revision notes). ### Revision notes @@ -267,15 +269,15 @@ When B6 is Done, run a business review. Phase C becomes active after that review | ADR-0024 | EL drop policy | B1 (Accepted 2026-04-27) | was ADR-0021 in the pre-review plan | | ADR-0025 | ADR governance amendments (forward-reference, riders) | meta-process (Accepted 2026-04-27) | new — captures the rules T-006/T-009 retros surfaced; not B-phase content. Cool-down rule withdrawn pre-Accept; see ADR-0025 §Revision notes | | ADR-0026 | Idle dispatch via separate fallback slot (supersedes ADR-0022 Option A) | B1 (Accepted 2026-05-06) | **repurposed.** Originally reserved for T-012 exception-vector / dispatch shape, which T-012 absorbed without a separate ADR. Slot reassigned 2026-05-06 to the idle-dispatch supersession motivated by the [B1 smoke regression](../../analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md). Drives [T-014](../../analysis/tasks/phase-b/T-014-idle-dispatch-fallback.md). | -| ADR-0027 | Kernel virtual memory layout (B2 — identity-mapped MMU activation) | B2 (**Accepted 2026-05-08**) | was ADR-0025 in the pre-2026-04-27 plan; renumbered down by 2 because ADR-0025 (governance) and ADR-0026 (T-012 reservation) consumed slots. Drives [T-016](../../analysis/tasks/phase-b/T-016-mmu-activation.md) (Draft 2026-05-08; moves to In Progress with this Accept). First ADR to apply [`write-adr` skill §Simulation](../../../.claude/skills/write-adr/SKILL.md) discipline forward (rather than retro-extracted as for ADR-0026 / ADR-0032). Accept landed as a separate commit per `write-adr` §10. Companion architecture doc: [`docs/architecture/memory-management.md`](../../architecture/memory-management.md). | -| ADR-0028 | Address-space data structure (B3 — kernel-object + capability-gated `Mmu::map` wrappers + activation-on-context-switch) | B3 (**Accepted 2026-05-11**) | was ADR-0026 in the pre-2026-04-27 plan. Drives [T-018 (Draft 2026-05-11; moves to In Progress with the same-day Accept)](../../analysis/tasks/phase-b/T-018-address-space-kernel-object.md). Chosen shape: **Option A — Generic `AddressSpace` wrapping `M::AddressSpace` inline; per-type `AddressSpaceArena`**. Reuses [ADR-0016](../../decisions/0016-kernel-object-storage.md)'s per-type fixed-size-block arena pattern; propagates the existing `M: Mmu` generic axis from [ADR-0019](../../decisions/0019-scheduler-shape.md) / [ADR-0020](../../decisions/0020-cpu-trait-v2-context-switch.md); zero new `unsafe` audit-log entries (the activation borrow rides UNSAFE-2026-0014's existing umbrella); zero HAL trait surface change (post-T-016 [`Mmu`](../../../hal/src/mmu/mod.rs) trait stays stable). Includes the §Simulation table walking bootstrap-AS wrap / create / map / activation-on-context-switch state transitions per [`write-adr` skill §Simulation](../../../.claude/skills/write-adr/SKILL.md). | +| ADR-0027 | Kernel virtual memory layout (B2 — identity-mapped MMU activation) | B2 (**Accepted 2026-05-08**) | was ADR-0025 in the pre-2026-04-27 plan; renumbered down by 2 because ADR-0025 (governance) and ADR-0026 (T-012 reservation) consumed slots. Drives [T-016](../../analysis/tasks/phase-b/T-016-mmu-activation.md) (Draft 2026-05-08; moves to In Progress with this Accept). First ADR to apply [`write-adr` skill §Simulation](../../../.agents/skills/write-adr/SKILL.md) discipline forward (rather than retro-extracted as for ADR-0026 / ADR-0032). Accept landed as a separate commit per `write-adr` §10. Companion architecture doc: [`docs/architecture/memory-management.md`](../../architecture/memory-management.md). | +| ADR-0028 | Address-space data structure (B3 — kernel-object + capability-gated `Mmu::map` wrappers + activation-on-context-switch) | B3 (**Accepted 2026-05-11**) | was ADR-0026 in the pre-2026-04-27 plan. Drives [T-018 (Draft 2026-05-11; moves to In Progress with the same-day Accept)](../../analysis/tasks/phase-b/T-018-address-space-kernel-object.md). Chosen shape: **Option A — Generic `AddressSpace` wrapping `M::AddressSpace` inline; per-type `AddressSpaceArena`**. Reuses [ADR-0016](../../decisions/0016-kernel-object-storage.md)'s per-type fixed-size-block arena pattern; propagates the existing `M: Mmu` generic axis from [ADR-0019](../../decisions/0019-scheduler-shape.md) / [ADR-0020](../../decisions/0020-cpu-trait-v2-context-switch.md); zero new `unsafe` audit-log entries (the activation borrow rides UNSAFE-2026-0014's existing umbrella); zero HAL trait surface change (post-T-016 [`Mmu`](../../../hal/src/mmu/mod.rs) trait stays stable). Includes the §Simulation table walking bootstrap-AS wrap / create / map / activation-on-context-switch state transitions per [`write-adr` skill §Simulation](../../../.agents/skills/write-adr/SKILL.md). | | ADR-0029 | Initial userspace image format | B4 | was ADR-0027 | | ADR-0030 | Syscall ABI (includes `IpcError` taxonomy per K2-5) | B5 | was ADR-0028; scope still enlarged to cover error taxonomy | | ADR-0031 | Initial syscall set | B5 | was ADR-0029 | | ADR-0032 | Endpoint state rollback on `ipc_recv_and_yield` Deadlock + `ipc_cancel_recv` primitive | B2 prep (**Accepted 2026-05-07**) | drove [T-015 (Done 2026-05-07)](../../analysis/tasks/phase-b/T-015-endpoint-rollback-cancel-recv.md) via PR #17. Surfaced as Track A non-blocker in the [2026-05-06 comprehensive review](../../analysis/reviews/code-reviews/2026-05-06-full-tree-comprehensive.md) and a forward-flagged item in the [2026-05-07 B1 closure security review](../../analysis/reviews/security-reviews/2026-05-07-B1-closure.md). Closed before B-phase task lands the first userspace-driven endpoint destroy. ADR-0017 §Revision notes rider records the additive recovery primitive (user-observable surface unchanged). | | ADR-0033 | Kernel high-half migration | B5+ (placeholder; named-but-unallocated) | named in [ADR-0027](../../decisions/0027-kernel-virtual-memory-layout.md) §Decision outcome (Option D) as the future home of the `TTBR0_EL1`-swap discipline that arrives with userspace. No file today; opens with the first B5 task whose userspace requires per-task address-space switching. Mirrors the slot-naming pattern of ADR-0028 / 0029 / 0030 / 0031. | | ADR-0034 | Kernel-image section permissions (.text RX / .rodata R / .bss/.data RW) | B-late (placeholder; named-but-unallocated) | named in [ADR-0027 §Decision outcome (a)](../../decisions/0027-kernel-virtual-memory-layout.md) as the future home of finer-grained kernel-image permissions. v1 maps the entire 128 MiB RAM range as kernel R/W/X via 2 MiB blocks; T-016 §Out of scope and [`memory-management.md` §"v1 layout"](../../architecture/memory-management.md) defer the re-map. Opens with the first B-phase task whose threat model includes a kernel R/W of `.text` as a meaningful surface — likely paired with the B5+ first userspace destroy that introduces an attacker-controlled execution context. | -| ADR-0035 | Physical Memory Manager (B3 prerequisite — bitmap allocator) | B3 (**Accepted 2026-05-09**) | new — drove the realisation that B3's "Address space abstraction" milestone has a foundational prerequisite (a real `FrameProvider` impl over physical RAM) which deserves its own ADR rather than being absorbed into ADR-0028 (address-space data structure). Drives [T-017 (Draft 2026-05-09; moves to In Progress with this Accept)](../../analysis/tasks/phase-b/T-017-physical-memory-manager.md). Bitmap allocator with hint pointer; 4 KiB metadata for QEMU virt's 32 K frames; reservation-list at init + cached for `free_frame` defensive validation per the §Simulation §Step 2 Critical row; forward-portable to high-half kernel without algorithm rewrite. Includes the §Simulation table walking init / alloc / free / exhaustion / recovery state transitions per [`write-adr` skill §Simulation](../../../.claude/skills/write-adr/SKILL.md). Accept landed as a separate commit per `write-adr` §10 after a careful re-read pass that surfaced and corrected three substantive drafting issues (broken anchor, safe-Rust-vs-`unsafe` zeroing contradiction, muddled "undefined-vs-error" wording in §Simulation row 2; the row-2 fix tightened the Pmm struct contract to add a cached reserved-range list for defensive `free_frame` validation, propagated to T-017). | +| ADR-0035 | Physical Memory Manager (B3 prerequisite — bitmap allocator) | B3 (**Accepted 2026-05-09**) | new — drove the realisation that B3's "Address space abstraction" milestone has a foundational prerequisite (a real `FrameProvider` impl over physical RAM) which deserves its own ADR rather than being absorbed into ADR-0028 (address-space data structure). Drives [T-017 (Draft 2026-05-09; moves to In Progress with this Accept)](../../analysis/tasks/phase-b/T-017-physical-memory-manager.md). Bitmap allocator with hint pointer; 4 KiB metadata for QEMU virt's 32 K frames; reservation-list at init + cached for `free_frame` defensive validation per the §Simulation §Step 2 Critical row; forward-portable to high-half kernel without algorithm rewrite. Includes the §Simulation table walking init / alloc / free / exhaustion / recovery state transitions per [`write-adr` skill §Simulation](../../../.agents/skills/write-adr/SKILL.md). Accept landed as a separate commit per `write-adr` §10 after a careful re-read pass that surfaced and corrected three substantive drafting issues (broken anchor, safe-Rust-vs-`unsafe` zeroing contradiction, muddled "undefined-vs-error" wording in §Simulation row 2; the row-2 fix tightened the Pmm struct contract to add a cached reserved-range list for defensive `free_frame` validation, propagated to T-017). | Numbers are tentative. Final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). @@ -304,7 +306,9 @@ Numbers are tentative. Final numbers are assigned when the ADR is actually writt ## How to start Phase B -1. Open **T-006** (raw-pointer scheduler API refactor) via the [`start-task`](../../../.claude/skills/start-task/SKILL.md) skill. Writing ADR-0021 is the first step inside that task. +> **Historical onboarding record (B0–B4 complete).** Phase B implementation is done: B0, B1, B2, and B3 are closed and B4 is implementation-complete (closure trio pending — see the B4 status line above). The active next steps are the B4 closure trio and then B5 (syscall boundary); the live operational pointer for what to open next is [`docs/roadmap/current.md`](../current.md), not this section. The numbered procedure below is preserved as the entry procedure that was followed when Phase B opened — it is a record of how B0 was started, not live instructions. + +1. Open **T-006** (raw-pointer scheduler API refactor) via the [`start-task`](../../../.agents/skills/start-task/SKILL.md) skill. Writing ADR-0021 is the first step inside that task. 2. After T-006 is In Progress, parallel work on **T-008** (architecture docs) is safe — they do not touch the same code. 3. **T-007** (idle task + typed deadlock) should follow T-006 so both changes land on top of the settled `Scheduler` shape. 4. **T-009** (timer init) can run in parallel with any of the above — it only touches `QemuVirtCpu` and does not intersect the scheduler refactor. diff --git a/docs/roadmap/phases/phase-c.md b/docs/roadmap/phases/phase-c.md index f4f14c4..30dcbc2 100644 --- a/docs/roadmap/phases/phase-c.md +++ b/docs/roadmap/phases/phase-c.md @@ -14,7 +14,7 @@ Bring secondary cores online via PSCI `CPU_ON`. Each core arrives at a kernel en ### Sub-breakdown -1. **ADR-0027 — Secondary core start protocol.** PSCI vs. spin-table. Entry point for secondaries (shared with primary or separate). Rendezvous semantics (when the primary considers a secondary "up"). +1. **ADR-0037 — Secondary core start protocol.** PSCI vs. spin-table. Entry point for secondaries (shared with primary or separate). Rendezvous semantics (when the primary considers a secondary "up"). 2. **`Cpu` trait v3 extension** — adds `start_core(core_id, entry, context)` and `core_count()`; probably as a sibling `MultiCore` trait to keep `Cpu` v2 stable. 3. **Secondary-core asm entry** in `bsp-qemu-virt` — minimal per-core stack setup before Rust. 4. **Per-core state struct** introduced here (fully fleshed out in C2). @@ -22,7 +22,7 @@ Bring secondary cores online via PSCI `CPU_ON`. Each core arrives at a kernel en ### Acceptance criteria -- ADR-0027 Accepted. +- ADR-0037 Accepted. - `Cpu::start_core` (or sibling trait) lands in `tyrne-hal`. - All configured cores reach the Rust-level rendezvous point on QEMU. @@ -34,14 +34,14 @@ Every online core needs its own current-task pointer, IRQ-mask shadow, and sched ### Sub-breakdown -1. **ADR-0028 — Per-core state access pattern.** `TPIDR_EL1` pointer vs. indexed lookup. Thread-local-like access to the current core's state. +1. **ADR-0038 — Per-core state access pattern.** `TPIDR_EL1` pointer vs. indexed lookup. Thread-local-like access to the current core's state. 2. **`PerCore` abstraction** — kernel-provided primitive for per-core state with interior synchronization. 3. **Current-task pointer** moved to per-core state. 4. **Tests** — each core sees its own state; no accidental cross-core access. ### Acceptance criteria -- ADR-0028 Accepted. +- ADR-0038 Accepted. - Per-core state accessible from any core via the chosen pattern. - Tests cover the access invariants. @@ -53,17 +53,17 @@ Replace the cooperative scheduler from A5 with a preemptive one driven by the ti ### Sub-breakdown -1. **ADR-0029 — Scheduler topology.** Per-core queues with work stealing, vs. a single global queue with locking, vs. hybrid. Real-time guarantees (or the lack thereof). +1. **ADR-0039 — Scheduler topology.** Per-core queues with work stealing, vs. a single global queue with locking, vs. hybrid. Real-time guarantees (or the lack thereof). 2. **Timer tick wiring** — [`Timer`](../../../hal/src/timer.rs) arm-deadline fires an IRQ; [`IrqController`](../../../hal/src/irq_controller.rs) delivers it; ISR triggers the scheduler's tick handler. 3. **Preemption points** — when and how a running task can be interrupted and the scheduler invoked. 4. **Time slice** — configurable per-task or global for v1. 5. **Idle-core behaviour** — WFI until IRQ, wake on timer or work-steal signal. 6. **Interrupt-masked critical-section primitive on [`tyrne-hal::Cpu`](../../../hal/src/cpu.rs).** Introduce a closure-based `Cpu::without_interrupts(|| { ... })` (equivalent of `x86_64::instructions::interrupts::without_interrupts`) backed by aarch64 `DAIF` manipulation. Every spin-locked kernel resource that an IRQ handler can touch must be acquired inside this closure to avoid handler-vs.-main-path deadlock. Discipline is mandatory, not optional; C3 makes it real because this is the phase where IRQs can interrupt kernel code. -6. **Tests** — two userspace tasks (from B6) time-slice; tick frequency observable; tasks that never yield still get preempted. +7. **Tests** — two userspace tasks (from B6) time-slice; tick frequency observable; tasks that never yield still get preempted. ### Acceptance criteria -- ADR-0029 Accepted. +- ADR-0039 Accepted. - Preemption works: a CPU-bound userspace task is preempted by the tick and another runnable task gets CPU time. - Idle cores enter low-power WFI. - No scheduling-related deadlocks or priority inversions (v1 is single priority, so this is mostly vacuous; real-time concerns deferred). @@ -76,15 +76,15 @@ A sender on core 0 sending to a receiver on core 1 works. The receiver wakes on ### Sub-breakdown -1. **ADR-0030 — Cross-core wakeup.** IPI-based (inter-processor interrupt) vs. polling. Latency expectations. +1. **ADR-0040 — Cross-core wakeup.** IPI-based (inter-processor interrupt) vs. polling. Latency expectations. 2. **IPI support** — new primitive on `IrqController` (or a sibling trait) to send an IPI to another core. 3. **Endpoint rendezvous across cores** — the wait/wake path handles the cross-core case correctly. 4. **Tests** — cross-core IPC round trip; behaviour when the receiver's core is idle (WFI'd); behaviour when both cores are busy. ### Acceptance criteria -- ADR-0030 Accepted. -- IPI primitive implemented for QEMU virt (GICv3 SGI). +- ADR-0040 Accepted. +- IPI primitive implemented for QEMU virt (GICv2 SGI; QEMU virt is GICv2, GIC-400 on Pi 4 — the SGI mechanism applies to both; no IOMMU in v1, per ADR-0036). - Cross-core IPC has the same correctness guarantees as same-core IPC (atomic cap transfer, etc.). --- @@ -95,14 +95,14 @@ When an address space is modified on one core, other cores with that address spa ### Sub-breakdown -1. **ADR-0031 — TLB shootdown protocol.** Broadcast IPI vs. per-address targeted; whether to extend `Mmu` trait or add a sibling. +1. **ADR-0041 — TLB shootdown protocol.** Broadcast IPI vs. per-address targeted; whether to extend `Mmu` trait or add a sibling. 2. **`invalidate_tlb_cross_core` primitive** — probably on a sibling trait, since `Mmu` v1 is single-core. 3. **Integration with address-space unmap paths.** 4. **Tests** — cross-core unmap visibility is immediate; stale TLB entries never observed after shootdown. ### Acceptance criteria -- ADR-0031 Accepted. +- ADR-0041 Accepted. - Cross-core unmap is safely observable on all cores before the next memory access. ### Phase C closure @@ -113,13 +113,15 @@ Business review. Phase D (Pi 4) or Phase D + E overlap becomes active. ## ADR ledger for Phase C -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0027 | Secondary core start protocol | C1 | -| ADR-0028 | Per-core state access pattern | C2 | -| ADR-0029 | Scheduler topology (preemptive) | C3 | -| ADR-0030 | Cross-core wakeup (IPI) | C4 | -| ADR-0031 | TLB shootdown protocol | C5 | +| ADR | Purpose | Expected state | Note | +|-----|---------|----------------|------| +| ADR-0037 | Secondary core start protocol | C1 | renumbered 2026-05-22, was ADR-0027 (collided with the live Accepted ADR-0027 kernel-virtual-memory-layout; Phase C shifted above the Phase B ceiling and the ADR-0036 supersession slot) | +| ADR-0038 | Per-core state access pattern | C2 | renumbered 2026-05-22, was ADR-0028 (collided with the live Accepted ADR-0028 address-space-data-structure) | +| ADR-0039 | Scheduler topology (preemptive) | C3 | renumbered 2026-05-22, was ADR-0029 (collided with the live Accepted ADR-0029 initial-userspace-image-format) | +| ADR-0040 | Cross-core wakeup (IPI) | C4 | renumbered 2026-05-22, was ADR-0030 (reserved by phase-b.md §B5 ledger for the syscall ABI) | +| ADR-0041 | TLB shootdown protocol | C5 | renumbered 2026-05-22, was ADR-0031 (reserved by phase-b.md §B5 ledger for the initial syscall set) | + +Numbers are tentative; final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). ## Open questions carried into Phase C diff --git a/docs/roadmap/phases/phase-d.md b/docs/roadmap/phases/phase-d.md index 708344b..e944882 100644 --- a/docs/roadmap/phases/phase-d.md +++ b/docs/roadmap/phases/phase-d.md @@ -14,14 +14,14 @@ A new BSP crate that compiles for `aarch64-unknown-none` and provides a minimal ### Sub-breakdown -1. **ADR-0032 — Pi 4 boot flow.** Load address under Pi firmware (`kernel_address` in `config.txt`); Pi firmware's initial CPU mode; what `config.txt` settings Tyrne expects. +1. **ADR-0042 — Pi 4 boot flow.** Load address under Pi firmware (`kernel_address` in `config.txt`); Pi firmware's initial CPU mode; what `config.txt` settings Tyrne expects. 2. **New crate** `bsp-pi4/` with its own `Cargo.toml`, `build.rs`, `linker.ld`, `boot.s`, `main.rs`, `console.rs` — mirroring `bsp-qemu-virt` structure. 3. **Pi firmware interaction** — `config.txt` documentation and the expected load / entry addresses. 4. **Placeholder main** that just spins in `wfe`; no console yet (D3 adds that). ### Acceptance criteria -- ADR-0032 Accepted. +- ADR-0042 Accepted. - `cargo build --target aarch64-unknown-none -p tyrne-bsp-pi4` produces an ELF. - `config.txt` example committed alongside. @@ -29,11 +29,11 @@ A new BSP crate that compiles for `aarch64-unknown-none` and provides a minimal ## Milestone D2 — GIC-400 implementation -Pi 4 uses GIC-400 (GICv2-ish, compatible subset). The `IrqController` impl differs from `bsp-qemu-virt`'s GICv3. +Pi 4 uses GIC-400 (a GICv2 implementation). The `IrqController` impl differs from `bsp-qemu-virt`'s GICv2 only in base addresses and board specifics — QEMU virt is GICv2, Pi 4 is GIC-400 (also GICv2); no IOMMU in v1, per ADR-0036. ### Sub-breakdown -1. **ADR-0033 — GIC-400 register layout.** Distributor / CPU-interface base addresses on BCM2711; register offsets used; which features are used vs. ignored. +1. **ADR-0043 — GIC-400 register layout.** Distributor / CPU-interface base addresses on BCM2711; register offsets used; which features are used vs. ignored. 2. **`IrqController` impl** in `bsp-pi4/src/irq.rs`. 3. **Tests** — host-side register layout; the real verification is D8 on hardware. @@ -50,14 +50,14 @@ Pi 4 has both a mini-UART and a PL011 (UART0). We use the PL011 for diagnostic o ### Sub-breakdown -1. **ADR-0034 — Pi 4 console choice.** PL011 vs. mini-UART; which pins; what baud rate; whether GPIO pin-muxing is part of the BSP or out of scope. +1. **ADR-0044 — Pi 4 console choice.** PL011 vs. mini-UART; which pins; what baud rate; whether GPIO pin-muxing is part of the BSP or out of scope. 2. **PL011 init sequence** — baud-rate register programming (QEMU's PL011 is pre-initialized; Pi's is not). 3. **`Console` impl** in `bsp-pi4/src/console.rs` using the same trait as `bsp-qemu-virt` with the Pi-specific init. 4. **Tests** — host-side: none meaningful (hardware behaviour); D7 exercises it on real hardware. ### Acceptance criteria -- ADR-0034 Accepted. +- ADR-0044 Accepted. - `Console` impl compiles; the first real-hardware smoke will validate it. --- @@ -84,14 +84,14 @@ MMU activation on Pi 4. Memory layout is different (RAM at `0x0000_0000` on Pi v ### Sub-breakdown -1. **ADR-0035 — Pi 4 memory layout.** Kernel load address; peripheral window (`0xFE00_0000` class on BCM2711); identity vs. high-half choices here. +1. **ADR-0045 — Pi 4 memory layout.** Kernel load address; peripheral window (`0xFE00_0000` class on BCM2711); identity vs. high-half choices here. 2. **`Mmu` impl** — inherits VMSAv8 from QEMU's impl; differences in the linker script and the MMIO mapping tables. 3. **Cache maintenance** — Pi 4 specifics (cache lines, I/D separation, which invalidate sequences are necessary). 4. **Tests** — B2's test suite applied to Pi 4. ### Acceptance criteria -- ADR-0035 Accepted. +- ADR-0045 Accepted. - Kernel runs with the MMU on on Pi 4. --- @@ -102,7 +102,7 @@ A userspace-agnostic library crate that parses a flattened device tree into a ty ### Sub-breakdown -1. **ADR-0036 — DTB parsing scope.** Full FDT spec support vs. a minimal read-only subset; zero-copy vs. owned parsing; allocation strategy (probably `no_std + alloc` with an arena). +1. **ADR-0046 — DTB parsing scope.** Full FDT spec support vs. a minimal read-only subset; zero-copy vs. owned parsing; allocation strategy (probably `no_std + alloc` with an arena). 2. **New crate** `tyrne-dt/` — separate from `tyrne-hal` so BSPs opt in. 3. **Parser API** — `DeviceTree::from_bytes(ptr) -> Result`; iterators over nodes; property lookup. 4. **Pi 4 integration** — `kernel_entry` parses the DTB passed in `x0` and emits a `BootInfo` struct. @@ -110,7 +110,7 @@ A userspace-agnostic library crate that parses a flattened device tree into a ty ### Acceptance criteria -- ADR-0036 Accepted. +- ADR-0046 Accepted. - `tyrne-dt` parses a real DTB into typed records. - `bsp-pi4` uses it at boot; the kernel's `BootInfo` contains at least memory-map and UART-address entries read from the DTB. @@ -157,13 +157,16 @@ Business review; the phase is the most significant in terms of validating that " ## ADR ledger for Phase D -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0032 | Pi 4 boot flow | D1 | -| ADR-0033 | GIC-400 register layout | D2 | -| ADR-0034 | Pi 4 console choice (PL011 vs. mini-UART) | D3 | -| ADR-0035 | Pi 4 memory layout | D5 | -| ADR-0036 | DTB parsing scope | D6 | +| ADR | Purpose | Expected state | Note | +|-----|---------|----------------|------| +| ADR-0042 | Pi 4 boot flow | D1 | renumbered 2026-05-22, was ADR-0032 (collided with the live Accepted ADR-0032 endpoint-rollback-and-cancel-recv; Phase D shifted above Phase C's new ceiling) | +| ADR-0043 | GIC-400 register layout | D2 | renumbered 2026-05-22, was ADR-0033 (reserved by phase-b.md §B5 ledger for the kernel high-half migration) | +| ADR-0044 | Pi 4 console choice (PL011 vs. mini-UART) | D3 | renumbered 2026-05-22, was ADR-0034 (reserved by phase-b.md §B5 ledger for kernel-image section permissions) | +| _(none)_ | D4 — ARM generic timer on Pi 4 | D4 | implementation-only milestone; the generic-timer behaviour and the `Timer` trait are already settled by ADR-0010, so D4 requires no new ADR. The ledger jumps D3 → D5 for this reason. | +| ADR-0045 | Pi 4 memory layout | D5 | renumbered 2026-05-22, was ADR-0035 (collided with the live Accepted ADR-0035 physical-memory-manager) | +| ADR-0046 | DTB parsing scope | D6 | renumbered 2026-05-22, was ADR-0036 (avoids the ADR-0036 supersession slot reserved for the GICv2/no-IOMMU decision) | + +Numbers are tentative; final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). ## Open questions carried into Phase D diff --git a/docs/roadmap/phases/phase-e.md b/docs/roadmap/phases/phase-e.md index da0b4ee..67de9f7 100644 --- a/docs/roadmap/phases/phase-e.md +++ b/docs/roadmap/phases/phase-e.md @@ -14,13 +14,13 @@ A template crate and guide for writing a userspace driver task. A driver holds a ### Sub-breakdown -1. **ADR-0037 — Driver task structure.** Single-threaded vs. multi-threaded; how does a driver receive IRQ notifications (endpoint + notify); error / restart semantics. +1. **ADR-0047 — Driver task structure.** Single-threaded vs. multi-threaded; how does a driver receive IRQ notifications (endpoint + notify); error / restart semantics. 2. **Template crate** `tyrne-driver-template/` — a skeleton a new driver copies from. 3. **Guide** `docs/guides/write-a-driver.md`. ### Acceptance criteria -- ADR-0037 Accepted. +- ADR-0047 Accepted. - Template compiles and documents the driver's service interface. ## Milestone E2 — Log service @@ -29,13 +29,13 @@ A userspace service that receives log records from kernel and other userspace ta ### Sub-breakdown -1. **ADR-0038 — Log wire format.** Binary (postcard / custom TLV); versioned; structured key-value per [logging-and-observability.md](../../standards/logging-and-observability.md). +1. **ADR-0048 — Log wire format.** Binary (postcard / custom TLV); versioned; structured key-value per [logging-and-observability.md](../../standards/logging-and-observability.md). 2. **`tyrne-log` facade** in the kernel — the `log!` / `info!` / `warn!` macros encoded in the facade. 3. **Log service task** — listens on its endpoint, reads records, renders to the console. ### Acceptance criteria -- ADR-0038 Accepted. +- ADR-0048 Accepted. - Kernel logs route through the service rather than direct UART writes (the boot console remains as emergency fallback). ## Milestone E3 — Service manager / supervisor @@ -44,13 +44,13 @@ A task that starts, watches, and restarts other tasks per a config. The foundati ### Sub-breakdown -1. **ADR-0039 — Supervision strategy.** Always-restart / N-failures-then-give-up / operator-controlled. +1. **ADR-0049 — Supervision strategy.** Always-restart / N-failures-then-give-up / operator-controlled. 2. **Supervisor task** that reads a config (compile-time initial, filesystem-based later). 3. **Fault-endpoint plumbing** — each supervised task has its fault endpoint held by the supervisor. ### Acceptance criteria -- ADR-0039 Accepted. +- ADR-0049 Accepted. - A deliberately-crashing test task is restarted by the supervisor per the configured policy. ## Milestone E4 — Storage driver @@ -59,13 +59,13 @@ QEMU: virtio-blk. Pi 4: SD card via the SDHCI-like controller on BCM2711. The dr ### Sub-breakdown -1. **ADR-0040 — Block-device service interface.** Synchronous / asynchronous read-write; sector size; capability model. +1. **ADR-0050 — Block-device service interface.** Synchronous / asynchronous read-write; sector size; capability model. 2. **`tyrne-driver-virtio-blk`** — the first real non-trivial driver. 3. **`tyrne-driver-sdhci-bcm2711`** — the Pi 4 counterpart (may be stubbed until later). ### Acceptance criteria -- ADR-0040 Accepted. +- ADR-0050 Accepted. - A userspace client can read and write sectors through the storage service. ## Milestone E5 — Simple filesystem @@ -74,13 +74,13 @@ A read-mostly filesystem service on top of E4. Initial choice may be read-only ( ### Sub-breakdown -1. **ADR-0041 — Filesystem choice.** Build a simple one, port an existing crate (`littlefs`, `ext4`-via-crate, a log-structured FS like F2FS-style for flash-friendly wear-levelling), or start with a read-only block layout and add write support incrementally. Weighed against portability, `no_std + alloc` compatibility, crash-consistency guarantees, and the smart-home target's preference for flash-friendly wear-levelling. +1. **ADR-0051 — Filesystem choice.** Build a simple one, port an existing crate (`littlefs`, `ext4`-via-crate, a log-structured FS like F2FS-style for flash-friendly wear-levelling), or start with a read-only block layout and add write support incrementally. Weighed against portability, `no_std + alloc` compatibility, crash-consistency guarantees, and the smart-home target's preference for flash-friendly wear-levelling. 2. **Filesystem service task** implementing the chosen approach. 3. **Storage capability flow** — the filesystem service has the block-device capability; it grants named-file capabilities to clients. ### Acceptance criteria -- ADR-0041 Accepted. +- ADR-0051 Accepted. - A userspace client can open, read, and (at minimum) list files through the filesystem service. ## Milestone E6 — Network stack integration @@ -89,13 +89,13 @@ A read-mostly filesystem service on top of E4. Initial choice may be read-only ( ### Sub-breakdown -1. **ADR-0042 — Network stack choice.** smoltcp is the probable answer; this ADR commits to it or to an alternative, covering `no_std + alloc` compatibility, license, and maintenance. +1. **ADR-0052 — Network stack choice.** smoltcp is the probable answer; this ADR commits to it or to an alternative, covering `no_std + alloc` compatibility, license, and maintenance. 2. **`tyrne-driver-virtio-net`** driver. 3. **Network service task** wrapping the stack with a capability-gated interface. ### Acceptance criteria -- ADR-0042 Accepted. +- ADR-0052 Accepted. - Loopback works; a test client completes a TCP three-way handshake with a server on the host. ### Phase E closure @@ -104,14 +104,18 @@ Business review. The system now has enough plumbing to support a real end-user d ## ADR ledger for Phase E -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0037 | Driver task structure | E1 | -| ADR-0038 | Log wire format | E2 | -| ADR-0039 | Supervision strategy | E3 | -| ADR-0040 | Block-device service interface | E4 | -| ADR-0041 | Filesystem choice | E5 | -| ADR-0042 | Network stack choice | E6 | +| ADR | Purpose | Expected state | Note | +|-----|---------|----------------|------| +| ADR-0047 | Driver task structure | E1 | renumbered 2026-05-22, was ADR-0037 (cascade from the Phase C/D renumbering, which shifted onto Phase E's old range) | +| ADR-0048 | Log wire format | E2 | renumbered 2026-05-22, was ADR-0038 (cascade) | +| ADR-0049 | Supervision strategy | E3 | renumbered 2026-05-22, was ADR-0039 (cascade) | +| ADR-0050 | Block-device service interface | E4 | renumbered 2026-05-22, was ADR-0040 (cascade) | +| ADR-0051 | Filesystem choice | E5 | renumbered 2026-05-22, was ADR-0041 (cascade) | +| ADR-0052 | Network stack choice | E6 | renumbered 2026-05-22, was ADR-0042 (cascade). ADR-0052 is now uniquely Phase E's E6: the phase-h/phase-i cascade was completed in this same pass (H → 0063–0065, I → 0066–0068) — see the §Downstream-renumbering note below. | + +Numbers are tentative; final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). + +> **Downstream-renumbering note (2026-05-22).** The Phase C/D ADR-number collision fix shifted the entire forward ADR chain up by ten slots, and Phase F gained a new milestone (F5 — secure field update, ADR-0057). Phases **C, D, E, F, G, H, and I** were all renumbered/extended in this pass so the full forward chain is collision-free and ascends with phase order: Phase G's ceiling is **ADR-0062** (G5), and the cascade was carried through **H → 0063–0065** and **I → 0066–0068** (which also freed ADR-0057 for the new Phase F5 placeholder). The new overall ceiling is **ADR-0068** (Phase I's I3). All these numbers remain tentative per [ADR-0013](../../decisions/0013-roadmap-and-planning.md); none collides with a live ADR file (highest live is ADR-0035; the supersession ADR-0036 is the only newly-written one). ## Open questions carried into Phase E diff --git a/docs/roadmap/phases/phase-f.md b/docs/roadmap/phases/phase-f.md index 9f2a437..47460ef 100644 --- a/docs/roadmap/phases/phase-f.md +++ b/docs/roadmap/phases/phase-f.md @@ -14,13 +14,13 @@ GPIO control on BCM2711. Fundamental because most smart-home peripherals (sensor ### Sub-breakdown -1. **ADR-0043 — GPIO service interface.** Pin granularity, capability per pin vs. per bank; direction / pull-up / drive-strength configuration. +1. **ADR-0053 — GPIO service interface.** Pin granularity, capability per pin vs. per bank; direction / pull-up / drive-strength configuration. 2. **`tyrne-driver-gpio-bcm2711`** driver task. 3. **Client library** `tyrne-gpio` with typed pin handles. ### Acceptance criteria -- ADR-0043 Accepted. +- ADR-0053 Accepted. - Driver toggles a GPIO pin observable externally (an LED, a scope). ## Milestone F2 — I2C and SPI drivers @@ -29,8 +29,8 @@ Most smart-home sensors use one of these. Covers the BCM2711 peripherals. ### Sub-breakdown -1. **ADR-0044 — I2C service interface.** -2. **ADR-0045 — SPI service interface.** (Separate ADR because of different capability semantics — SPI has chip-select per device, I2C has addresses.) +1. **ADR-0054 — I2C service interface.** +2. **ADR-0055 — SPI service interface.** (Separate ADR because of different capability semantics — SPI has chip-select per device, I2C has addresses.) 3. **Drivers** `tyrne-driver-i2c-bcm2711`, `tyrne-driver-spi-bcm2711`. 4. **Test clients** that read a known sensor (e.g., BME280 on I2C, an MCP SPI flash) to verify end-to-end. @@ -46,13 +46,13 @@ The smart-home communication protocol. Matter is the modern open standard; MQTT ### Sub-breakdown -1. **ADR-0046 — Smart-home protocol.** Weighed by: open-source library availability, power profile, interop with the maintainer's existing hub, security posture. +1. **ADR-0056 — Smart-home protocol.** Weighed by: open-source library availability, power profile, interop with the maintainer's existing hub, security posture. 2. **Implementation** — either a port of an existing Rust crate (preferred) or a minimal subset implementation from scratch (accepted cost). 3. **Security review** of the protocol implementation per [`analysis/reviews/security-reviews/`](../../analysis/reviews/security-reviews/). ### Acceptance criteria -- ADR-0046 Accepted. +- ADR-0056 Accepted. - End-to-end: Tyrne device sends a heartbeat / state update to a real hub. ## Milestone F4 — First smart-home device @@ -73,21 +73,42 @@ A chosen device — e.g., a temperature sensor node, a smart plug, an environmen - Its state is reflected in the hub and reacts to commands. - Guide reproducible. +## Milestone F5 — Secure field update + +A deployed device runs Tyrne as its firmware (F4) with a 7-day-uptime expectation; there must be a way to deliver a new kernel/userspace image to an already-running device without a physical re-flash. This milestone establishes that path. Scope is sketched at a high level here; the detailed mechanism is deferred to its own ADR. + +### Sub-breakdown (high level — detail deferred to ADR) + +1. **ADR-0057 — Field-update / OTA scheme.** High-level scope only at plan time; the design is deferred to the ADR itself. It must cover, at minimum: + - **Image transport** — how a new image reaches the device (pulled over the network service from E6, or staged via the storage service from E4; the choice and its trust assumptions). + - **Image verification before activation** — signature and/or measurement verification of the candidate image against a trusted key/manifest before it is allowed to become the active image. Ties directly into the cryptographic primitives (Phase G's crypto ADR, ADR-0059) and measured-boot work in Phase G (G1 / G2); F5 may have to pull those forward or ship a minimal verifier and harden it in G. + - **A/B (dual-bank) image layout with automatic rollback** — two image slots so an update is written to the inactive slot and only made active after it boots and passes a health check; a failed boot rolls back to the last-known-good slot automatically (boot-counter / watchdog discipline). + - **Update-authority capability model** — which capability authorizes triggering an update and writing the inactive slot, so "who may push an image to this device" is an explicit, capability-gated decision rather than ambient authority. + +### Acceptance criteria (provisional) + +- ADR-0057 Accepted. +- A new image can be delivered to a running device, verified, activated on the next boot, and automatically rolled back if it fails to come up — demonstrated end-to-end on Pi 4 hardware. + ### Phase F closure Milestone F4 is a genuine milestone: Tyrne becomes real when this ships. Subsequent phases tighten the security story (Phase G) and expand the platform base (Phase H). ## ADR ledger for Phase F -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0043 | GPIO service interface | F1 | -| ADR-0044 | I2C service interface | F2 | -| ADR-0045 | SPI service interface | F2 | -| ADR-0046 | Smart-home protocol | F3 | +| ADR | Purpose | Expected state | Note | +|-----|---------|----------------|------| +| ADR-0053 | GPIO service interface | F1 | renumbered 2026-05-22, was ADR-0043 (cascade from the Phase C/D renumbering) | +| ADR-0054 | I2C service interface | F2 | renumbered 2026-05-22, was ADR-0044 (cascade) | +| ADR-0055 | SPI service interface | F2 | renumbered 2026-05-22, was ADR-0045 (cascade) | +| ADR-0056 | Smart-home protocol | F3 | renumbered 2026-05-22, was ADR-0046 (cascade) | +| ADR-0057 | Field-update / OTA scheme | F5 | new 2026-05-22 (master-review MR-021 — light placeholder; detailed design deferred to the ADR). ADR-0057 was previously also used by phase-i.md (I3, power management); phase-i was renumbered in this same pass (I3 → ADR-0068), so ADR-0057 is now uniquely this F5 placeholder — see the §Downstream-renumbering note in [phase-e.md](phase-e.md). | + +Numbers are tentative; final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). ## Open questions carried into Phase F - **Wi-Fi on Pi 4.** The Broadcom Wi-Fi chip requires proprietary firmware; Tyrne's policy rejects blobs. Options: use Ethernet instead on Pi 4 (simplest), use USB Wi-Fi dongles with open-source firmware, or accept a documented exception for firmware that lives outside the kernel (in-scope for an ADR). - **Battery operation.** Power-management is substantial; may belong in Phase I alongside mobile. - **Encryption at rest** on device storage — crosses into Phase G. +- **Field update / OTA (F5).** How much of the verification stack (signatures, measured boot) must land in F5 versus being pulled forward from Phase G (G1 / G2). Whether the A/B dual-bank layout is decided here or in the Pi 4 memory-layout ADR (ADR-0045). What the trust root for the update-signing key is, and where it lives on-device. Whether the update path reuses the E6 network service or the E4 storage service for transport. diff --git a/docs/roadmap/phases/phase-g.md b/docs/roadmap/phases/phase-g.md index 7c1037e..b0c131a 100644 --- a/docs/roadmap/phases/phase-g.md +++ b/docs/roadmap/phases/phase-g.md @@ -14,13 +14,13 @@ On hardware that supports it (Pi 4 via its secure-boot chain, or a future board ### Sub-breakdown -1. **ADR-0047 — Boot measurement scheme.** PCR-like registers, event log, chaining algorithm (SHA-256 vs. -384), where measurements are stored. +1. **ADR-0058 — Boot measurement scheme.** PCR-like registers, event log, chaining algorithm (SHA-256 vs. -384), where measurements are stored. 2. **BSP integration** — measurement computed for the kernel image before `kernel_entry`; recorded somewhere the kernel can query. 3. **Verification path** — a post-boot service that reads the measurement log and compares against expected values (signed manifest). ### Acceptance criteria -- ADR-0047 Accepted. +- ADR-0058 Accepted. - Measurement log produced on a supported board and inspectable from userspace. ## Milestone G1.5 — TEE support in the HAL @@ -50,14 +50,14 @@ A crypto crate with audited implementations: hash (SHA-2, SHA-3), AEAD (ChaCha20 ### Sub-breakdown -1. **ADR-0048 — Crypto crate choice.** RustCrypto crates vs. one curated alternative vs. in-tree impls. Auditability, formal guarantees where applicable, no-std support. +1. **ADR-0059 — Crypto crate choice.** RustCrypto crates vs. one curated alternative vs. in-tree impls. Auditability, formal guarantees where applicable, no-std support. 2. **`tyrne-crypto`** — the crate that wraps the chosen primitives with Tyrne-native types (`Hash`, `Key`, `Signature`, `Nonce`). 3. **Security review** per [`analysis/reviews/security-reviews/`](../../analysis/reviews/security-reviews/) — mandatory for every primitive. 4. **Constant-time audit** — where timing side channels matter, document which paths are constant-time and which are not. ### Acceptance criteria -- ADR-0048 Accepted. +- ADR-0059 Accepted. - Each primitive's security review recorded. - No primitive is in-tree hand-rolled without explicit justification. @@ -67,13 +67,13 @@ For network services (Phase E6) to talk securely. Probably `rustls` if `no_std + ### Sub-breakdown -1. **ADR-0049 — TLS library choice.** rustls (preferred), mbedtls, in-tree. Covers version-pinning, maintenance, supply-chain posture. +1. **ADR-0060 — TLS library choice.** rustls (preferred), mbedtls, in-tree. Covers version-pinning, maintenance, supply-chain posture. 2. **Integration** with the network service from E6. 3. **Test** — TLS 1.3 handshake with a known server. ### Acceptance criteria -- ADR-0049 Accepted. +- ADR-0060 Accepted. - Network service completes a TLS handshake and exchanges encrypted data. ## Milestone G4 — Formal verification pilot @@ -82,14 +82,14 @@ Pick one kernel primitive — probably the capability table's derivation/revocat ### Sub-breakdown -1. **ADR-0050 — Verification tool choice.** What tool for what scope; how it interacts with the build. +1. **ADR-0061 — Verification tool choice.** What tool for what scope; how it interacts with the build. 2. **Specification** of the chosen primitive's invariants in machine-checkable form. 3. **Proof** (or counterexamples that correct the implementation). 4. **CI integration** — verification runs per-commit on the verified subset. ### Acceptance criteria -- ADR-0050 Accepted. +- ADR-0061 Accepted. - At least one invariant proved on a real kernel primitive. - CI fails if the verification breaks. @@ -100,12 +100,12 @@ Revisit the threat model from [`security-model.md`](../../architecture/security- ### Sub-breakdown 1. **Review of the existing threat model** against Phase F deployment learnings. -2. **ADR-0051 — Threat model v2.** Explicit supersession of security-model's Phase-1 threat statements where warranted. +2. **ADR-0062 — Threat model v2.** Explicit supersession of security-model's Phase-1 threat statements where warranted. 3. **`security-model.md` update** to reflect v2 (standards-update skill + ADR-first discipline). ### Acceptance criteria -- ADR-0051 Accepted. +- ADR-0062 Accepted. - `security-model.md` updated. - Business review captures "what the deployment taught us about the model." @@ -115,13 +115,15 @@ Business review. Tyrne now has the security engineering to back the "high assura ## ADR ledger for Phase G -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0047 | Boot measurement scheme | G1 | -| ADR-0048 | Crypto crate choice | G2 | -| ADR-0049 | TLS library choice | G3 | -| ADR-0050 | Verification tool choice | G4 | -| ADR-0051 | Threat model v2 | G5 | +| ADR | Purpose | Expected state | Note | +|-----|---------|----------------|------| +| ADR-0058 | Boot measurement scheme | G1 | renumbered 2026-05-22, was ADR-0047 (cascade from the Phase C/D renumbering). G1.5's TEE-trait ADR remains an unnumbered placeholder. | +| ADR-0059 | Crypto crate choice | G2 | renumbered 2026-05-22, was ADR-0048 (cascade) | +| ADR-0060 | TLS library choice | G3 | renumbered 2026-05-22, was ADR-0049 (cascade) | +| ADR-0061 | Verification tool choice | G4 | renumbered 2026-05-22, was ADR-0050 (cascade) | +| ADR-0062 | Threat model v2 | G5 | renumbered 2026-05-22, was ADR-0051 (cascade). Phase G's ceiling is ADR-0062; phases H and I were renumbered in the same pass (H → 0063–0065, I → 0066–0068), completing the cascade — see the §Downstream-renumbering note in [phase-e.md](phase-e.md). | + +Numbers are tentative; final numbers are assigned when the ADR is actually written, per [ADR-0013](../../decisions/0013-roadmap-and-planning.md). ## Open questions carried into Phase G diff --git a/docs/roadmap/phases/phase-h.md b/docs/roadmap/phases/phase-h.md index 5cae846..27d90d1 100644 --- a/docs/roadmap/phases/phase-h.md +++ b/docs/roadmap/phases/phase-h.md @@ -14,14 +14,14 @@ Raspberry Pi 5 uses BCM2712 (Cortex-A76) with a new RP1 southbridge that handles ### Sub-breakdown -1. **ADR-0052 — Pi 5 differences.** RP1 southbridge, peripheral topology, console routing, GIC changes. +1. **ADR-0063 — Pi 5 differences.** RP1 southbridge, peripheral topology, console routing, GIC changes. 2. **New BSP** `bsp-pi5/` — mirrors `bsp-pi4`'s shape with Pi 5 specifics. 3. **QEMU parity** — Phase A / B features work on Pi 5. 4. **Additive expectation**: the HAL trait surfaces do not change; any change is a signal for an ADR that reviews whether the HAL was wrong. ### Acceptance criteria -- ADR-0052 Accepted. +- ADR-0063 Accepted. - Pi 5 boots and runs the test suite from Phase D's parity list. ## Milestone H2 — `bsp-jetson` (CPU-only) @@ -30,13 +30,13 @@ NVIDIA Jetson Orin Nano / Orin NX / AGX Orin as aarch64 boards. Per [ADR-0004](. ### Sub-breakdown -1. **ADR-0053 — Jetson boot chain.** CBoot / U-Boot sequence, where Tyrne inserts itself. +1. **ADR-0064 — Jetson boot chain.** CBoot / U-Boot sequence, where Tyrne inserts itself. 2. **New BSP** `bsp-jetson/` with the specific Jetson model(s) supported. 3. **`config` documentation** for users setting up Jetson hardware. ### Acceptance criteria -- ADR-0053 Accepted. +- ADR-0064 Accepted. - A Jetson board boots Tyrne to the Phase A / B exit bar. - Release notes are explicit: Jetson's GPU / NPU are inaccessible. @@ -46,14 +46,14 @@ Candidate: an MMU-capable RISC-V board — e.g., a SiFive HiFive Unmatched / Unl ### Sub-breakdown -1. **ADR-0054 — RISC-V target choice.** Specific board, specific ISA subset (RV32 vs. RV64, extensions). +1. **ADR-0065 — RISC-V target choice.** Specific board, specific ISA subset (RV32 vs. RV64, extensions). 2. **`Cpu` / `Mmu` / `IrqController` extensions or splits** if needed — e.g., RISC-V's PLIC differs from GIC enough that an adapter or sibling trait may be warranted. If so, an ADR captures the architectural separation. 3. **New BSP** `bsp-/`. 4. **Parity tests** on real hardware for the Phase A / B subset. ### Acceptance criteria -- ADR-0054 Accepted. +- ADR-0065 Accepted. - RISC-V BSP boots Tyrne; the test suite runs within the architecture's capabilities. ### Phase H closure @@ -62,11 +62,11 @@ Business review. The HAL abstraction has been tested by three architecturally di ## ADR ledger for Phase H -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0052 | Pi 5 differences | H1 | -| ADR-0053 | Jetson boot chain | H2 | -| ADR-0054 | RISC-V target choice | H3 | +| ADR | Purpose | Expected state | Notes | +|-----|---------|----------------|-------| +| ADR-0063 | Pi 5 differences | H1 | renumbered 2026-05-22, was ADR-0052 (cascade from the Phase C/D collision fix, MR-001) | +| ADR-0064 | Jetson boot chain | H2 | renumbered 2026-05-22, was ADR-0053 (cascade) | +| ADR-0065 | RISC-V target choice | H3 | renumbered 2026-05-22, was ADR-0054 (cascade) | ## Open questions carried into Phase H diff --git a/docs/roadmap/phases/phase-i.md b/docs/roadmap/phases/phase-i.md index 2bda41b..ea6e886 100644 --- a/docs/roadmap/phases/phase-i.md +++ b/docs/roadmap/phases/phase-i.md @@ -20,7 +20,7 @@ Pick a target SoC that is approachable (community-supported, documented) and com ### Sub-breakdown -1. **ADR-0055 — Mobile target.** Specific device, specific SoC, exhaustive list of required peripherals, policy on required blobs. +1. **ADR-0066 — Mobile target.** Specific device, specific SoC, exhaustive list of required peripherals, policy on required blobs. 2. **Availability survey** — prices, lead times, reliability of the supply. 3. **Prior-art survey** — who else has built non-Linux kernels on this SoC family; what they learned. @@ -30,7 +30,7 @@ A display panel writes pixels; a touch panel reports events; these compose into ### Sub-breakdown -1. **ADR-0056 — Display stack architecture.** Framebuffer vs. compositor vs. direct-panel. Probably direct-panel + a tiny software compositor for Phase I. +1. **ADR-0067 — Display stack architecture.** Framebuffer vs. compositor vs. direct-panel. Probably direct-panel + a tiny software compositor for Phase I. 2. **Display driver** for the chosen panel. 3. **Touch driver** (often I2C-attached; reuses F2 work). 4. **Input service** mapping raw touch events to typed events. @@ -41,7 +41,7 @@ Mobile requires battery-aware CPU scaling, suspend / resume, screen blanking, ag ### Sub-breakdown -1. **ADR-0057 — Power management scope.** What levels (idle, suspend-to-RAM, hibernate) and what invariants. +1. **ADR-0068 — Power management scope.** What levels (idle, suspend-to-RAM, hibernate) and what invariants. 2. **Scheduler integration** — CPU-frequency hints, big.LITTLE awareness if the SoC has it. 3. **Battery service** — SoC-specific PMIC driver + SOC / voltage monitoring. 4. **Wake sources** — timer, touch, modem (if applicable). @@ -63,11 +63,11 @@ The mobile milestone is explicitly a stretch goal. Reaching I4 makes Tyrne a cre ## ADR ledger for Phase I -| ADR | Purpose | Expected state | -|-----|---------|----------------| -| ADR-0055 | Mobile target | I1 | -| ADR-0056 | Display stack architecture | I2 | -| ADR-0057 | Power management scope | I3 | +| ADR | Purpose | Expected state | Notes | +|-----|---------|----------------|-------| +| ADR-0066 | Mobile target | I1 | renumbered 2026-05-22, was ADR-0055 (cascade from the Phase C/D collision fix, MR-001) | +| ADR-0067 | Display stack architecture | I2 | renumbered 2026-05-22, was ADR-0056 (cascade) | +| ADR-0068 | Power management scope | I3 | renumbered 2026-05-22, was ADR-0057 (cascade; frees ADR-0057 for the Phase F5 field-update placeholder) | ## Open questions carried into Phase I diff --git a/docs/standards/bsp-boot-checklist.md b/docs/standards/bsp-boot-checklist.md index 0520cef..6299368 100644 --- a/docs/standards/bsp-boot-checklist.md +++ b/docs/standards/bsp-boot-checklist.md @@ -215,7 +215,7 @@ after the first yield. ### Enabling QEMU exception logging -Add `--debug` to `tools/run-qemu.sh` or pass flags directly: +Pass `--int-log` to `tools/run-qemu.sh` (it adds `-d int -D /tmp/qemu_int.log` to the QEMU invocation), or pass the flags directly: ```sh qemu-system-aarch64 ... -d int -D /tmp/qemu_int.log diff --git a/docs/standards/code-style.md b/docs/standards/code-style.md index 08a2abf..b6466a7 100644 --- a/docs/standards/code-style.md +++ b/docs/standards/code-style.md @@ -55,7 +55,7 @@ Unforgeable capability types should be named with a `Cap` suffix when the bearer ## Documentation comments -- Every `pub` and `pub(crate)` item has a doc-comment. CI runs `#![deny(missing_docs)]` on public kernel crates. +- Every `pub` and `pub(crate)` item has a doc-comment. The workspace currently sets `missing_docs = "warn"` (see `Cargo.toml` `[workspace.lints.rust]`): undocumented public items produce a warning, not a hard CI failure. Tightening this to `deny` is a future option once every public item in the kernel crates is documented; until then a missing doc-comment is a review-rejection reason even though the lint does not block the build. - Doc-comments follow the standard Rust shape: - First line is a **one-sentence summary**. - Blank line. @@ -94,7 +94,7 @@ pub fn send(&self, msg: Message) -> Result<(), IpcError> { /* ... */ } ## `no_std` discipline - Kernel and HAL crates are `#![no_std]`. Do not depend, transitively or directly, on anything that pulls `std`. -- Heap allocation is **not** available in the kernel by default. When the allocator is added (see ADR-0006 when written), it will be a distinct crate and kernel code will opt in explicitly. +- Heap allocation is **not** available in the kernel by default. When the allocator is added (see the allocator ADR, to be written when the allocator is introduced — no existing ADR covers it), it will be a distinct crate and kernel code will opt in explicitly. - No `println!`, `print!`, `eprintln!`. Use the logging facade (see [logging-and-observability.md](logging-and-observability.md)). ## Capability type conventions @@ -117,17 +117,18 @@ See [unsafe-policy.md](unsafe-policy.md). Summary: every `unsafe` block and ever ## Lints -The project's `clippy.toml` / `#![deny]` set includes, at minimum: +The workspace lint set (in `Cargo.toml` `[workspace.lints.*]`) plus the per-crate `#![deny(...)]` attributes in each `lib.rs` includes, at minimum: -- `unsafe_op_in_unsafe_fn` -- `missing_docs` (on public crates) +- `unsafe_op_in_unsafe_fn` (deny, workspace) +- `missing_docs` (warn, workspace — see the documentation-comments section above) - `clippy::pedantic` (warn, not deny — reviewed case-by-case) -- `clippy::alloc_instead_of_core` +- `clippy::alloc_instead_of_core` (deny, workspace) +- `clippy::undocumented_unsafe_blocks`, `clippy::missing_safety_doc` (deny, workspace) - `clippy::arithmetic_side_effects` (deny in kernel; explicit wrapping math required) - `clippy::float_arithmetic` (deny in kernel) - `clippy::panic`, `clippy::unwrap_used`, `clippy::expect_used` (deny in kernel paths) -The full list is codified in `clippy.toml` once the workspace is created. +Lint **levels** are configured in `Cargo.toml` `[workspace.lints.clippy]` / `[workspace.lints.rust]` and in per-crate `#![deny(...)]` attributes in `lib.rs`. `clippy.toml` holds only numeric thresholds (e.g. `avoid-breaking-exported-api`), not lint levels — do not look there for the authoritative list. ## Tooling diff --git a/docs/standards/commit-style.md b/docs/standards/commit-style.md index 1e00fbb..4cf6285 100644 --- a/docs/standards/commit-style.md +++ b/docs/standards/commit-style.md @@ -8,6 +8,8 @@ Every commit to every branch. Local work-in-progress commits are the author's bu ## Format +This format is enforced **by convention and review**, not by a git hook or a CI check: there is no `commit-msg` hook or commitlint gate at present (see the Tooling section — both are planned). Until one is added, conformance depends on authors and reviewers applying this standard. + Each commit follows the Conventional Commits pattern with Tyrne-specific trailers: ``` @@ -40,11 +42,13 @@ Refs: ADR-0001 - `fix` — a bug fix. - `docs` — documentation changes only. - `refactor` — code change that neither fixes a bug nor adds a feature. + - `style` — non-behavioral, non-refactor code-style changes (e.g. `cargo fmt` reflow, comment formatting). - `perf` — a performance improvement with a measurement. - `test` — adding or correcting tests. - `build` — build system, dependency updates. - `ci` — continuous integration configuration. - `chore` — tooling, reformatting, licensing housekeeping. + - `audit` — `unsafe`-audit-log or security-audit bookkeeping (e.g. reconciliation rounds, audit-entry amendments). - `revert` — reverts a previous commit (subject: `revert: `). - **Scope** — the subsystem most affected. Choose from the short list: `kernel`, `hal`, `ipc`, `mm`, `sched`, `syscalls`, `drivers`, `userspace`, diff --git a/docs/standards/error-handling.md b/docs/standards/error-handling.md index d7b9e8b..57e915d 100644 --- a/docs/standards/error-handling.md +++ b/docs/standards/error-handling.md @@ -169,6 +169,8 @@ When introducing a new `Error` enum, ask: - `clippy::result_large_err` is `warn` — error types larger than ~128 bytes bloat the `Result` return value. - `clippy::missing_errors_doc` is `warn` — every public `fn -> Result` should document its errors. +> Both of the above are active workspace-wide through `clippy::pedantic = "warn"` (root `Cargo.toml`, `[workspace.lints.clippy]`) and clippy's default lint groups — not as standalone per-lint entries. A grep of `Cargo.toml` for the literal lint names will therefore not find them; this is expected, not a missing configuration. + ## References - Rust API Guidelines, Error types: https://rust-lang.github.io/api-guidelines/interoperability.html#error-types diff --git a/docs/standards/infrastructure.md b/docs/standards/infrastructure.md index f84a3d9..7c27831 100644 --- a/docs/standards/infrastructure.md +++ b/docs/standards/infrastructure.md @@ -16,10 +16,12 @@ How Tyrne is built, how its dependencies are managed, what its CI gates are, and - **Pinned nightly Rust via `rust-toolchain.toml`** at the repository root. The file specifies the exact nightly date and the components required (`rust-src`, `rustfmt`, `clippy`, `llvm-tools-preview` as needed). - The pinned nightly is bumped deliberately, via a dedicated PR, with a commit message explaining the upgrade. Do not update the toolchain as a side effect of other changes. -- CI runs against the pinned toolchain only. Multiple-toolchain matrices are not currently useful for a `no_std` kernel. +- `Cargo.lock` is format `version = 4`, which requires a reasonably recent Cargo (Rust 1.78+). The pinned nightly is far newer than that floor, so this is not a constraint today; it is recorded here only so that, should an external contributor on an older Cargo ever appear, the minimum is known. +- The kernel jobs run against the pinned nightly only — the same toolchain `rust-toolchain.toml` selects for every in-repo `cargo` invocation (its override beats `rustup default`). The `lint-and-host-test`, `kernel-build`, `miri`, and `coverage` jobs all select the pin explicitly. Multiple-toolchain matrices are not useful for a `no_std` kernel that requires nightly. +- One additional job, `host-stable-check`, runs `cargo +stable build` and `cargo +stable host-test` over the host-buildable crates (workspace `default-members`: kernel, hal, test-hal; the bare-metal BSP is excluded because it needs nightly). It is a deliberate "host crates compile and pass tests on stable Rust" gate, not a kernel build. It deliberately does **not** run clippy/fmt with `-D warnings`: `clippy::pedantic` is `warn` workspace-wide and stable is a rolling toolchain, so a future stable release could add a pedantic lint that reddens the gate with no code change of ours — lint/format enforcement therefore lives only on the pinned-nightly jobs. See [ci.md](../guides/ci.md). - Cross-compile targets are installed with `rustup target add` per CI job: - - `aarch64-unknown-none` (primary kernel target). - - `aarch64-unknown-none-softfloat` (variants where needed). + - `aarch64-unknown-none` (primary kernel target; the only target pinned in `rust-toolchain.toml`). + - `aarch64-unknown-none-softfloat` (added on demand if FP-trap behaviour ever requires it; not pinned and not used by any current job). - Additional targets added as tiers 2+ come online. ## Dependency policy @@ -60,28 +62,44 @@ Removing a dependency (replacing with in-tree code or dropping the feature it en ## Continuous integration -CI is expected to be set up early in Phase 4 (Rust toolchain + workspace skeleton). The gates below define the bar. +CI was set up in Phase 4 (completed 2026-04-23); the gates below define the bar. The list distinguishes gates that are **enforced today** from gates that are **planned but not yet enforced** so a reader can tell what actually blocks a merge. -### Required gates (block merge) +### Required gates (enforced today, block merge) -- `cargo fmt --all -- --check` -- `cargo clippy --workspace --all-targets -- -D warnings` -- `cargo test --workspace` — host-runnable unit and integration tests. -- `cargo build --workspace --target aarch64-unknown-none` — kernel builds clean. -- QEMU smoke — kernel boots under `qemu-system-aarch64 -machine virt` and reaches the success marker. *(As of 2026-05: maintainer-launched only; no `qemu-smoke` CI job yet — tracked as a B2-or-later roadmap follow-up.)* -- `cargo audit` — fails on known advisories. `cargo-audit` database is updated weekly in CI. *(Conditional — currently dormant: `Cargo.lock` carries zero external dependencies, so the gate would be a no-op. The job is wired in once the first external dependency lands per [add-dependency](../../.claude/skills/add-dependency/SKILL.md).)* -- `cargo vet check` — fails if any dependency is not audited. *(Same conditional — see `cargo audit` above.)* +These map directly to jobs that exist in [`.github/workflows/ci.yml`](../../.github/workflows/ci.yml): + +- `cargo fmt --all -- --check` (`lint-and-host-test`). +- Clippy with `-D warnings`, run as the two aliases CI executes: `host-clippy` (`clippy --all-targets -- -D warnings` over `default-members`) for the host crates and `kernel-clippy` (`clippy --target aarch64-unknown-none -p tyrne-bsp-qemu-virt -- -D warnings`) for the bare-metal BSP. The two aliases together cover the whole workspace; their combination is the executed equivalent of `cargo clippy --workspace --all-targets -- -D warnings`. +- `cargo test` — host-runnable unit and integration tests over `default-members` (`lint-and-host-test`; re-run on stable by `host-stable-check`). +- `cargo kernel-build` — the kernel ELF builds clean for `aarch64-unknown-none` (`kernel-build`). +- Host crates build and test clean on **stable** Rust (`host-stable-check`); lint/format enforcement is nightly-only (see the job note above). +- Miri (Stacked Borrows) over the host-test suite (`miri`) — see §"Miri as a blocking gate" below. + +### Planned gates (not yet enforced) + +These appear in the release/standards prose but have **no CI job yet**; do not assume a green CI exercised them. + +- QEMU smoke — kernel boots under `qemu-system-aarch64 -machine virt` and reaches the success marker. *(As of 2026-05: maintainer-launched only; no `qemu-smoke` CI job yet — tracked as a B2-or-later roadmap follow-up. A behavioural gate, not a no-op — its absence means boot behaviour is verified by the maintainer, not by CI.)* +- `cargo audit` — fails on known advisories; `cargo-audit` database refreshed weekly in CI. *(Currently dormant: `Cargo.lock` carries zero external dependencies, so the gate would be a no-op today. Wired in once the first external dependency lands per [add-dependency](../../.agents/skills/add-dependency/SKILL.md).)* +- `cargo vet check` — fails if any dependency is not audited. *(Same dormant conditional as `cargo audit` above — zero external deps today.)* ### Advisory gates (warn, do not block) - `cargo-geiger` report — records `unsafe` counts, compared against the audit log. -- Coverage delta (via `cargo llvm-cov`) — not a gate yet; informational. +- Coverage delta (via `cargo llvm-cov`) — **informational, not a gate.** The `coverage` job runs with `continue-on-error: true`, so a coverage drop never blocks a merge today; the job exists to make trends visible. The plan is to flip it to enforce a floor after T-011 settles the workspace shape — at which point `continue-on-error` is removed first and only then is the job added to branch protection (see [ci.md](../guides/ci.md) §"Branch protection"). Until then it must **not** be in the required-checks list (a `continue-on-error` job reports a neutral verdict that never satisfies `required == passing`). - Binary size delta (`cargo bloat`) — informational; large increases prompt a question. +### Miri as a blocking gate + +The `miri` job runs the host-test suite under Stacked Borrows on the pinned nightly and does **not** set `continue-on-error` — a Miri regression is a hard stop. It is intended as a blocking gate, with particular weight on changes under `kernel/src/sched/**` and `kernel/src/ipc/**`, where aliasing and `unsafe` invariants are densest (see ADR-0021 / UNSAFE-2026-0014). Ideally a lightweight `unsafe`-audit-log reconciliation (every `unsafe` block has a current `SAFETY:` comment, no undocumented `unsafe`) runs alongside it; that reconciliation is not yet a CI job and is performed at review time per [unsafe-policy.md](unsafe-policy.md). + +**Required-status enforcement lives in GitHub branch protection (a UI setting), not in this repository.** The in-tree workflow makes the `miri` job correct and gating-by-construction; whether a green Miri is actually *required* to merge is configured in the repository settings and is outside version control. The branch-protection checklist in §"Branch protection and merge rules" below names the jobs that must be marked required. + ### CI platform - GitHub Actions is the default. Workflows live under `.github/workflows/`. - Jobs are reusable — shared setup (install toolchain, cache cargo registry) is a composite action. +- A top-level `permissions: contents: read` block applies least privilege to the auto-provisioned `GITHUB_TOKEN`: the pipeline only reads the repo and runs builds, so it gets read scope and nothing more. Any future job that genuinely needs a wider scope (e.g. publishing artifacts) declares its own `permissions:` block at the job level rather than widening the workflow default. - CI caches `~/.cargo/registry` and `target/` keyed by the toolchain hash. - Secrets never enter CI. If a future workflow needs a secret (e.g. publishing artifacts), it is scoped and rotated. @@ -106,7 +124,7 @@ tools/perf-harness.sh --report=CONTEXT # also emit a ma # docs/analysis/reports/perf-baseline-YYYY-MM-DD-CONTEXT.md ``` -A run aborts non-zero if fewer than 50 % of iterations produced a valid sample — that threshold is treated as environmental (kernel image missing, QEMU not in PATH, host under heavy load) rather than a measurement worth aggregating. +A run aborts non-zero if fewer than ⌈n/2⌉ of the `n` iterations produced a valid sample (i.e. the valid count must be at least half, rounding up — for odd `n` the threshold rounds up, so e.g. `n=5` requires ≥3 valid runs). Falling below that is treated as environmental (kernel image missing, QEMU not in PATH, host under heavy load) rather than a measurement worth aggregating. ### Reporting discipline @@ -121,6 +139,15 @@ The harness measures the kernel's `now_ns()` delta. Under QEMU TCG that counter ## Supply-chain security +### GitHub Actions pinning + +The dependency graph in `Cargo.lock` is the obvious supply-chain surface, but the CI pipeline itself runs third-party code on every push, so it gets the same discipline. + +- **Every third-party action is pinned to a full 40-character commit SHA**, with the human-readable version in a trailing comment, e.g. `uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2`. Tags (`@v4`, `@v2`) are mutable — a tag repoint silently changes what runs without any commit in this repo. A SHA cannot be moved. +- This matters most for actions that **download and execute a binary** into the build. `taiki-e/install-action` (used to install the pinned `cargo-llvm-cov`) is the load-bearing case: a tag repoint there would be arbitrary code execution in CI. SHA-pinning closes that gap, mirroring the `NIGHTLY_PIN` / `cargo-llvm-cov` version-pin discipline already applied to the tools those actions wrap. +- **Refresh path.** Action SHAs are bumped deliberately, not silently — the same posture as the toolchain pin. Bump them via Dependabot for GitHub Actions (planned — see [`.github/dependabot.yml`](#planned-when-first-external-dependency-lands) below) or a `pinact`-style tool that re-resolves each pinned tag to its current SHA in a reviewable PR. Each bump moves the SHA and updates the trailing `# vX.Y.Z` comment together, so the comment never drifts from the pinned commit. +- **No `permissions:` widening to satisfy an action.** If a third-party action asks for write scope, that is a signal to scrutinise it, not to widen the workflow token (see §"CI platform"). + ### `cargo-vet` - Tracks, per dependency, whether it has been audited and by whom. @@ -156,7 +183,7 @@ When the project moves out of solo phase: - `main` is protected. - PRs to `main` require at least one approval (two for security-sensitive changes — see [security-review.md](security-review.md)). -- Required status checks: the CI gates listed under "Required gates" above. +- Required status checks — **GitHub matches the job's display `name`, not its id**, so add these exact strings: `fmt + clippy + host tests (nightly)` (job `lint-and-host-test`), `aarch64-unknown-none kernel build (nightly)` (job `kernel-build`), `host crates on stable` (job `host-stable-check`), and `miri (Stacked Borrows)` (job `miri`). The `coverage` job is **not** in this list (it is `continue-on-error`; a neutral verdict would block every push). This is a GitHub branch-protection (UI) setting and is not stored in the repository. - Force-push to `main` disabled. - Force-push to protected `release/*` branches disabled. @@ -179,7 +206,7 @@ The lint set is canonical at [`code-style.md` §Lints](code-style.md#lints); eve | `rustfmt.toml` | Formatter config. | | `clippy.toml` | Linter thresholds and allowed lints. | | `.cargo/config.toml` | Target triples, linker flags per target. | -| `.github/workflows/*.yml` | CI pipelines. Active jobs at HEAD: `lint-and-host-test`, `kernel-build`, `miri`, `coverage`. | +| `.github/workflows/*.yml` | CI pipelines. Active jobs at HEAD: `lint-and-host-test`, `kernel-build`, `host-stable-check`, `miri`, `coverage`. Third-party actions are SHA-pinned (see §"GitHub Actions pinning"). | ### Planned (when first external dependency lands) @@ -187,9 +214,9 @@ The lint set is canonical at [`code-style.md` §Lints](code-style.md#lints); eve |------|---------| | `supply-chain/config.toml` | `cargo-vet` trust imports and thresholds. | | `supply-chain/audits.toml` | Local audits. | -| `.github/dependabot.yml` | Dependency PR automation (to be enabled once standards are enforced in CI). | +| `.github/dependabot.yml` | Dependency PR automation. Covers two ecosystems: `cargo` (enabled once the first external crate lands) and `github-actions` (refreshes the SHA pins described in §"GitHub Actions pinning"). | -The `supply-chain/` directory does not exist at HEAD — see [add-dependency](../../.claude/skills/add-dependency/SKILL.md) for the trigger that creates it. +The `supply-chain/` directory does not exist at HEAD — see [add-dependency](../../.agents/skills/add-dependency/SKILL.md) for the trigger that creates it. ## Anti-patterns to reject diff --git a/docs/standards/release.md b/docs/standards/release.md index 105eb5a..c2de785 100644 --- a/docs/standards/release.md +++ b/docs/standards/release.md @@ -58,7 +58,8 @@ Before a version is tagged, **every one of these must be true**. A gate failure ### Process gates -- [ ] All CI gates green on the commit being released (format, clippy, tests, build, QEMU smoke, `cargo-audit`, `cargo-vet`). +- [ ] **All enforced CI gates green** on the commit being released: `cargo fmt --check`, clippy (`host-clippy` + `kernel-clippy`), host tests, `kernel-build`, host crates on stable (`host-stable-check`), and Miri. +- [ ] **Manual verification gates passed** — these are listed in the release standard but are **not yet CI jobs**, so they must be checked manually until they are wired up (see [infrastructure.md](infrastructure.md) §"Planned gates"): QEMU smoke *(maintainer-launched; no CI job yet)*, `cargo-audit` *(dormant until the first external dependency lands)*, `cargo-vet` *(same dormant conditional)*. - [ ] Hardware smoke tests passed on every Tier 2+ target that this release claims to support. - [ ] No `#[ignore]`d tests without a tracking issue. - [ ] No open `Security` advisories that this release does not fix. diff --git a/docs/standards/security-review.md b/docs/standards/security-review.md index 10c6c2e..d2315f8 100644 --- a/docs/standards/security-review.md +++ b/docs/standards/security-review.md @@ -85,7 +85,7 @@ The security reviewer works through every applicable item. "Not applicable" is a ### Threat model impact -- [ ] The change is reconciled with the documented threat model (once `docs/architecture/security-model.md` exists — Phase 3). +- [ ] The change is reconciled with the documented threat model in [`docs/architecture/security-model.md`](../architecture/security-model.md). - [ ] If the change reshapes the threat model, the PR includes the threat-model update or links to a follow-up PR scheduled imminently. ## Outcome diff --git a/docs/standards/testing.md b/docs/standards/testing.md index 5ae3f3d..f2b40fc 100644 --- a/docs/standards/testing.md +++ b/docs/standards/testing.md @@ -99,10 +99,17 @@ Coverage is measured with `cargo llvm-cov` on host-runnable tests. Hardware-only ## CI gates -- `cargo test --workspace` — must pass. -- `cargo clippy --workspace --all-targets -- -D warnings` — must pass. +Enforced in CI today (block merge): + +- `cargo test` (host crates, `default-members`) — must pass. +- Clippy with `-D warnings` — must pass (run as the `host-clippy` + `kernel-clippy` aliases, which together cover the workspace; see [infrastructure.md](infrastructure.md) §"Required gates"). - `cargo fmt --all -- --check` — must pass. -- QEMU smoke tests — must pass on the primary target. +- Host crates on stable Rust (`host-stable-check`) — must build/lint/test clean. +- Miri (Stacked Borrows) over the host-test suite — must pass. + +Not yet enforced in CI: + +- QEMU smoke tests — run by the maintainer before merge on the primary target; there is **no QEMU-smoke CI job yet** (tracked as a B2-or-later follow-up per [infrastructure.md](infrastructure.md) §"Planned gates"). A failing smoke test does not currently turn CI red — it is caught at maintainer review. - Hardware smoke tests — run periodically, not per-PR. A red CI is never ignored. Flaky tests are bugs, not facts of life. diff --git a/docs/standards/unsafe-policy.md b/docs/standards/unsafe-policy.md index f730170..78a9789 100644 --- a/docs/standards/unsafe-policy.md +++ b/docs/standards/unsafe-policy.md @@ -75,7 +75,9 @@ The audit log is **append-only**. The original body of an entry (fields written `cargo-geiger` output is periodically reconciled against the log. In-place editing of an entry's original body is a policy violation; reviewers should reject PRs that rewrite rather than amend. -**Mechanical-edit exemption.** Repository-wide find-and-replace sweeps that change *terminology* without altering *meaning* — e.g. URL renames (`cemililik/TyrneOS` → `cemililik/Tyrne`), localisation passes (`Yüksek` → `High`), or trademark / project-name updates — may touch an entry's body in-place provided the PR description names the sweep, the substitution is uniform across the repository, and no semantic field of the entry (Operation, Invariants, Rejected alternatives' substance, Status) changes. Rationale: the append-only invariant exists to preserve verification-state-over-time history, not to ossify terminology that drifts at the project / language level. The PR-side record (commit message + reviewer note) is enough to reconstruct what changed; an Amendment block would carry no information beyond what `git blame` already provides for these classes of edit. The 2026-05-07 [PR #14 multi-axis review](../analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-h-audit.md) §MIN-1 is the codifying source. +**Test-only `unsafe` exemption.** `unsafe` confined to `#[cfg(test)]` modules — test doubles (`Fake*` / `Failing*` HAL impls, harness `unsafe impl Send`/`Sync` on ZST or single-thread-owned types) and test harness code that touches no production type — still requires a conforming `// SAFETY:` comment (this is the norm and is clippy-enforced via `undocumented_unsafe_blocks`), but is **exempt from individual audit-log entries** and from the `Audit:` reference required by §1 and §2. Logging every test double's `Send` impl or every fake's constructor would drown the log and obscure the real trusted computing base. Production `unsafe` reachable from a non-test build — anything in a shipped crate's non-`#[cfg(test)]` code, including `unsafe fn`s that tests merely call — remains fully logged with an `Audit:` tag, with no exemption. This codifies the project's existing practice; see the 2026-05-22 master review tracks `X3-unsafe-audit` §X3-003 and `D5b-audits-reports` §D5b-006 for the reconciliation that confirmed the implicit exemption was already in effect. + +**Mechanical-edit exemption.** Repository-wide find-and-replace sweeps that change *terminology* without altering *meaning* — e.g. URL renames (`cemililik/TyrneOS` → `cemililik/Tyrne`), a localisation pass that translates a stray non-English token into its English equivalent, or trademark / project-name updates — may touch an entry's body in-place provided the PR description names the sweep, the substitution is uniform across the repository, and no semantic field of the entry (Operation, Invariants, Rejected alternatives' substance, Status) changes. Rationale: the append-only invariant exists to preserve verification-state-over-time history, not to ossify terminology that drifts at the project / language level. The PR-side record (commit message + reviewer note) is enough to reconstruct what changed; an Amendment block would carry no information beyond what `git blame` already provides for these classes of edit. The 2026-05-07 [PR #14 multi-axis review](../analysis/reviews/code-reviews/2026-05-07-pr-12-to-17-multi-axis-review/track-h-audit.md) §MIN-1 is the codifying source. ### 4. `unsafe impl` and `unsafe trait` follow the same discipline diff --git a/hal/src/console.rs b/hal/src/console.rs index e45f683..8e4fcbd 100644 --- a/hal/src/console.rs +++ b/hal/src/console.rs @@ -2,7 +2,7 @@ //! //! See [ADR-0007] for the design rationale behind the trait shape. //! -//! [ADR-0007]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0007-console-trait.md +//! [ADR-0007]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0007-console-trait.md use core::fmt; diff --git a/hal/src/context_switch.rs b/hal/src/context_switch.rs index ec70151..f600fa2 100644 --- a/hal/src/context_switch.rs +++ b/hal/src/context_switch.rs @@ -5,7 +5,7 @@ //! the scheduler is generic over `C: ContextSwitch` and does not use //! dynamic dispatch. //! -//! [ADR-0020]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md +//! [ADR-0020]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md /// Context-switch extension for BSPs that support cooperative task switching. /// @@ -17,11 +17,18 @@ /// /// Implementations must ensure that `context_switch` atomically saves /// all callee-saved registers of the current execution context and -/// restores all callee-saved registers of the next context. On aarch64 -/// that is `x19`–`x28`, `x29` (fp), `x30` (lr), and `sp`. From the -/// perspective of both call sites, `context_switch` appears to return -/// normally — the saving side resumes here when it is later selected as -/// `next`. +/// restores all callee-saved registers of the next context — i.e. the +/// target ABI's full callee-saved register set. On aarch64 (AAPCS64) +/// that is the general-purpose callee-saved registers `x19`–`x28`, +/// `x29` (fp), `x30` (lr), `sp`, **and the SIMD/FP callee-saved +/// registers `d8`–`d15` (the lower 64 bits of `v8`–`v15`) whenever FP +/// is enabled (`CPACR_EL1.FPEN ≠ 0`)**. Omitting `d8`–`d15` silently +/// corrupts FP state across a yield: the compiler may allocate those +/// registers for any task and does not emit callee-save spills across a +/// cooperative `context_switch` call, so the corruption is +/// data-dependent and survives smoke testing. From the perspective of +/// both call sites, `context_switch` appears to return normally — the +/// saving side resumes here when it is later selected as `next`. pub trait ContextSwitch: Send + Sync { /// The saved register state for one cooperative task. /// @@ -47,6 +54,12 @@ pub trait ContextSwitch: Send + Sync { /// - `next` must contain a context previously written by /// `context_switch` or fully initialised by `init_context`. /// Restoring an uninitialised context is undefined behaviour. + /// - The implementation must save and restore the **full** callee- + /// saved register set for the target ABI — see the trait-level + /// `# Safety contract`. On aarch64 this includes the SIMD/FP + /// callee-saved registers `d8`–`d15` (lower 64 bits of `v8`–`v15`) + /// whenever FP is enabled (`CPACR_EL1.FPEN ≠ 0`), not only the + /// general-purpose `x19`–`x28` / `x29` / `x30` / `sp`. unsafe fn context_switch(&self, current: &mut Self::TaskContext, next: &Self::TaskContext); /// Write an initial register state into `ctx` so that the first diff --git a/hal/src/cpu.rs b/hal/src/cpu.rs index c1cb2c5..c10fc11 100644 --- a/hal/src/cpu.rs +++ b/hal/src/cpu.rs @@ -2,7 +2,7 @@ //! //! See [ADR-0008] for the trait-signature rationale. //! -//! [ADR-0008]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0008-cpu-trait.md +//! [ADR-0008]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0008-cpu-trait.md /// Identifier for a CPU core. /// @@ -14,10 +14,21 @@ pub type CoreId = u32; /// The state of the CPU's interrupt mask, as saved by /// [`Cpu::disable_irqs`] and consumed by [`Cpu::restore_irq_state`]. /// -/// Callers should treat the inner value as opaque — pass it back -/// unmodified to `restore_irq_state`. The inner field is `pub` so that BSP -/// implementations can construct the value from raw architecture bits; -/// it is not an invitation for callers to inspect or synthesize bits. +/// A value returned by `disable_irqs` is otherwise opaque: pass it back +/// unmodified to `restore_irq_state` and do not inspect or recombine its +/// bits. The inner field is `pub` so that BSP implementations can construct +/// the value from raw architecture bits. +/// +/// # Canonical zero value +/// +/// `IrqState(0)` is the one value callers may legitimately synthesize: it +/// is defined as the **IRQs-enabled (unmasked)** state, and every [`Cpu`] +/// implementor must honour `restore_irq_state(IrqState(0))` as "enable +/// interrupts." This is the natural aarch64 encoding (a zero `DAIF` mask +/// leaves IRQs unmasked); the scheduler relies on it when dispatching a +/// freshly-created task that must begin with interrupts enabled. Test +/// doubles (`tyrne_test_hal::FakeCpu`) follow the same polarity, so a +/// shared fake cannot invert production interrupt semantics. #[derive(Copy, Clone)] pub struct IrqState(pub usize); @@ -55,9 +66,11 @@ pub trait Cpu: Send + Sync { /// Restore the CPU interrupt mask to the given saved state. /// /// `state` must be a value previously returned by - /// [`Cpu::disable_irqs`]; passing any other value is a violation of - /// the calling contract and the resulting behaviour is - /// implementation-defined. + /// [`Cpu::disable_irqs`], with one canonical exception: callers may + /// synthesize `IrqState(0)`, which every implementation must treat as + /// "enable interrupts" (see [`IrqState`] § Canonical zero value). + /// Passing any other unsynthesized value is a violation of the calling + /// contract and the resulting behaviour is implementation-defined. fn restore_irq_state(&self, state: IrqState); /// Halt the CPU until the next interrupt wakes it. diff --git a/hal/src/irq_controller.rs b/hal/src/irq_controller.rs index 75b9db6..fcc940b 100644 --- a/hal/src/irq_controller.rs +++ b/hal/src/irq_controller.rs @@ -2,7 +2,7 @@ //! //! See [ADR-0011] for the v1 scope and the list of deferred capabilities. //! -//! [ADR-0011]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0011-irq-controller-trait.md +//! [ADR-0011]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0011-irq-controller-trait.md /// A hardware interrupt line number. /// diff --git a/hal/src/lib.rs b/hal/src/lib.rs index 415063c..3c02b95 100644 --- a/hal/src/lib.rs +++ b/hal/src/lib.rs @@ -9,8 +9,8 @@ //! full responsibilities of each trait and [ADR-0006][adr-0006] for the //! crate-boundary rationale. //! -//! [hal-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/hal.md -//! [adr-0006]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md +//! [hal-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/hal.md +//! [adr-0006]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md //! //! ## Status //! @@ -58,5 +58,5 @@ pub use timer::Timer; /// [`docs/architecture/security-model.md`][sec-doc] for the trust-boundary /// implications. /// -/// [sec-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/security-model.md +/// [sec-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/security-model.md pub trait Iommu {} diff --git a/hal/src/mmu/mod.rs b/hal/src/mmu/mod.rs index 32d26c0..cfe3c4c 100644 --- a/hal/src/mmu/mod.rs +++ b/hal/src/mmu/mod.rs @@ -8,8 +8,8 @@ //! [ADR-0009 §Revision notes][adr-0009-rev] for the additive-extension //! record. //! -//! [ADR-0009]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md -//! [adr-0009-rev]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md#revision-notes +//! [ADR-0009]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md +//! [adr-0009-rev]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md#revision-notes pub mod vmsav8; @@ -106,6 +106,17 @@ impl MappingFlags { /// /// Callers should prefer combining the named constants; `from_raw` /// exists so BSP implementations can pass bits across ABI boundaries. + /// + /// **Unknown bits are accepted but ignored by design.** Only bits 0–4 + /// (the five named flags) carry meaning; any bit ≥ 5 is preserved in + /// the raw value (and round-trips through [`Self::raw`]) but is + /// silently dropped by every consumer that interprets flags — notably + /// [`vmsav8::flags_to_descriptor_bits`], which consults the five named + /// flags via [`Self::contains`] and never inspects higher bits. There + /// is intentionally no validation here: `from_raw` is the ABI escape + /// hatch, and the descriptor encoder is "locked-shut by default" so an + /// unknown bit cannot grant permission. Callers that need to reject + /// stray bits should mask against the five named constants first. #[must_use] pub const fn from_raw(bits: u32) -> Self { Self(bits) @@ -227,13 +238,20 @@ pub trait FrameProvider { /// [`docs/architecture/memory-management.md` §"The MapperFlush flush-token /// discipline"][mm-doc] for the full rationale. /// -/// The token does not bind the minting [`Mmu`] instance — `flush` accepts -/// any `Mmu` impl. v1 has a single `Mmu` instance so the absence of an -/// instance-identity check is harmless; future multi-CPU / multi-address- -/// space topologies may grow the shape (flagged in ADR-0027). +/// The token does not bind the minting [`Mmu`] instance **or the address +/// space it was minted for** — `flush` accepts any `Mmu` impl and the +/// token carries only a [`VirtAddr`]. v1 has a single `Mmu` instance and a +/// single address space, so the absence of an instance/AS-identity check +/// is harmless. This is a future-soundness cliff, not just a style note: +/// once more than one address space exists, flushing a token minted for +/// AS-A against AS-B would invalidate the wrong TLB entry, and nothing in +/// the type system prevents it. The multi-AS step (flagged in ADR-0027) +/// must add an AS/ASID discriminant to `MapperFlush` (e.g. a +/// `PhantomData` AS-id or a stored ASID) and make `flush` reject a +/// mismatch. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [mm-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/memory-management.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [mm-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/memory-management.md #[must_use = "MapperFlush carries a TLB-invalidation responsibility — \ call .flush(mmu) to invalidate the per-address TLB entry, \ or .ignore() if a bulk invalidate_tlb_all() will follow"] @@ -318,9 +336,9 @@ impl MapperFlush { /// [ADR-0009 §Revision notes][adr-0009-rev] for the additive-extension /// record. /// -/// [ADR-0009]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [adr-0009-rev]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md#revision-notes +/// [ADR-0009]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0009-rev]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md#revision-notes pub trait Mmu: Send + Sync { /// Per-BSP address-space structure. type AddressSpace: Send; @@ -389,7 +407,13 @@ pub trait Mmu: Send + Sync { /// /// # Errors /// - /// - [`MmuError::AlreadyMapped`] if `va` already has a mapping. + /// - [`MmuError::AlreadyMapped`] if `va` already has a mapping. Note + /// that a `va` falling inside an existing large block (e.g. a 2 MiB + /// block at L1/L2 from the bootstrap mapping) also returns + /// `AlreadyMapped` on `map` — **not** [`MmuError::BlockMapped`]: + /// the requested 4 KiB slot is structurally occupied, and + /// block-split is deferred to B3+. (`unmap` *does* distinguish the + /// block case as `BlockMapped`; the asymmetry is deliberate.) /// - [`MmuError::MisalignedAddress`] if `va` is not /// [`PAGE_SIZE`]-aligned. /// - [`MmuError::OutOfFrames`] if an intermediate table needed a frame @@ -397,8 +421,12 @@ pub trait Mmu: Send + Sync { /// frames already pulled from `frames` may have been installed /// into `as_` and are not returned to the caller; `pa` itself /// (the leaf frame) is unchanged per (2). - /// - [`MmuError::InvalidFlags`] if `flags` cannot be applied (for - /// example, user + kernel-only combinations). + /// - [`MmuError::InvalidFlags`] if `flags` requests an unrepresentable + /// combination. In v1 the only such case is any mapping with **both + /// [`MappingFlags::DEVICE`] and [`MappingFlags::EXECUTE`]** set, + /// because MMIO is never executable (ADR-0027 §Decision outcome (b); + /// `flags_to_descriptor_bits` forces `PXN = UXN = 1` for DEVICE). + /// Both shipped implementors reject exactly this combination. fn map( &self, as_: &mut Self::AddressSpace, diff --git a/hal/src/mmu/vmsav8.rs b/hal/src/mmu/vmsav8.rs index ccfb2ea..8d4c5a4 100644 --- a/hal/src/mmu/vmsav8.rs +++ b/hal/src/mmu/vmsav8.rs @@ -11,7 +11,7 @@ //! [`docs/architecture/memory-management.md` §"Page-table entry encoding"][mm-doc] //! for the field-by-field bit map this module reifies. //! -//! Lands with [T-016](https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-016-mmu-activation.md). +//! Lands with [T-016](https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-016-mmu-activation.md). //! //! ## Scope //! @@ -29,8 +29,8 @@ //! safety contract) live in the BSP's `Mmu::map` / `Mmu::unmap` impl //! and are audited under UNSAFE-2026-0025. //! -//! [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -//! [mm-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/memory-management.md +//! [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +//! [mm-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/memory-management.md use super::MappingFlags; @@ -43,6 +43,15 @@ use super::MappingFlags; /// MAIR index for **device-nGnRnE** memory (encoding `0x00`). /// /// Used for every `MappingFlags::DEVICE` mapping (GIC + UART MMIO). +/// +/// Note (C7-002): the device attribute byte `0x00` is *both* the +/// architecture's device-nGnRnE encoding *and* the all-zero "unset MAIR +/// slot" value, so there is no positive bit pattern that distinguishes +/// "device attribute correctly programmed" from "MAIR slot is zero". The +/// device-attribute correctness therefore rests on [`ATTR_IDX_NORMAL`] +/// being `1` (selecting the non-zero `Attr1 = 0xFF`): a regression that +/// zeroed `Attr1` would be caught by the `Attr1 == 0xFF` half of the host +/// test, even though the `Attr0 == 0x00` half passes for any all-zero MAIR. pub const ATTR_IDX_DEVICE: u8 = 0; /// MAIR index for **normal cached** memory (encoding `0xFF` — write-back, @@ -58,8 +67,8 @@ pub const ATTR_IDX_NORMAL: u8 = 1; /// Per [ADR-0027 §Decision outcome (a)][adr-0027] / [`memory-management.md` /// §"`MAIR_EL1` attribute encoding"][mm-doc]. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [mm-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/memory-management.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [mm-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/memory-management.md pub const MAIR_EL1_VALUE: u64 = 0x0000_0000_0000_FF00; // ── Shareability + access-permission encodings ───────────────────────────────── @@ -126,7 +135,7 @@ pub const AP_USER_RO: u8 = 0b11; /// /// Per [ADR-0027 §Decision outcome (a)][adr-0027]. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md #[allow( clippy::unreadable_literal, reason = "system-register bit-pattern; field-by-field decomposition lives in the surrounding comment" @@ -170,7 +179,7 @@ pub const TCR_EL1_VALUE: u64 = { /// bits are read-modify-written: the bootstrap reads the current value, /// ORs in this mask, and writes back. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md pub const SCTLR_EL1_MMU_ENABLE_MASK: u64 = (1 << 0) | (1 << 2) | (1 << 12); // ── Descriptor field bit positions ───────────────────────────────────────────── @@ -246,8 +255,18 @@ pub struct DescriptorBits { /// (`PXN = UXN = 1`) because the v1 attack surface gains nothing from /// MMIO execute. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [mm-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/memory-management.md +/// **Unknown flag bits are ignored by design.** Only the five named +/// [`MappingFlags`] (bits 0–4) are consulted, each via +/// [`MappingFlags::contains`]; any bit ≥ 5 set via +/// [`MappingFlags::from_raw`] is silently dropped. This is intentional +/// and safe-by-construction: an unrecognised bit cannot *grant* a +/// permission (the encoder is locked-shut by default), only the named +/// flags can. The [`flags_to_descriptor_bits_ignores_bits_above_four`] +/// test pins this behaviour. Callers that must reject stray bits should +/// mask the value against the named constants before calling. +/// +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [mm-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/memory-management.md #[must_use] pub const fn flags_to_descriptor_bits(flags: MappingFlags) -> DescriptorBits { let device = flags.contains(MappingFlags::DEVICE); @@ -314,10 +333,18 @@ pub const fn flags_to_descriptor_bits(flags: MappingFlags) -> DescriptorBits { /// Encode an L2 block descriptor (2 MiB block at level 2 with 4 KiB /// granule) per ARM ARM §D5.3. /// -/// `pa` must be 2 MiB-aligned (bottom 21 bits zero); behaviour for -/// unaligned inputs is to mask the address into the OA field, dropping -/// the low bits — callers are expected to validate alignment upstream -/// via [`crate::PhysFrame`] or equivalent. +/// `pa` must be 2 MiB-aligned (bottom 21 bits zero). **Unaligned inputs +/// are silently truncated** into the OA field: the low bits below bit 21 +/// are dropped, so an unaligned `pa` encodes a descriptor pointing at a +/// *different* physical frame than the caller intended, with **no +/// diagnostic at all** (this `const fn` deliberately performs no +/// runtime check — the `block_descriptor_drops_low_bits_for_unaligned_pa` +/// test pins the truncation as the boundary contract). This is a +/// memory-safety-adjacent hazard (it maps the wrong physical page), so +/// callers MUST validate 2 MiB alignment upstream — e.g. via a typed +/// 2 MiB-block-frame newtype or an explicit check before calling. The +/// in-tree bootstrap caller relies on its 2 MiB-strided loop arithmetic +/// for alignment (C6-004). #[must_use] pub const fn block_descriptor(pa: u64, bits: DescriptorBits) -> u64 { DESC_VALID_BIT @@ -338,6 +365,14 @@ pub const fn block_descriptor(pa: u64, bits: DescriptorBits) -> u64 { /// `pa` must be 4 KiB-aligned. The encoding is identical to /// [`block_descriptor`] except for bit 1 (which is 1 for an L3 page, /// 0 for an L2 block) and the OA mask (which uses bits `[47:12]`). +/// +/// As with [`block_descriptor`], an unaligned `pa` is **silently +/// truncated** into the OA field (low 12 bits dropped) with no runtime +/// check, encoding the wrong frame +/// (`page_descriptor_drops_low_bits_for_unaligned_pa` pins this). The BSP +/// feeds this a [`crate::PhysFrame`] (alignment-guaranteed by the +/// newtype) so the in-tree call site is safe; any future raw-`u64` +/// caller must validate 4 KiB alignment before calling (C6-004). #[must_use] pub const fn page_descriptor(pa: u64, bits: DescriptorBits) -> u64 { DESC_VALID_BIT @@ -423,6 +458,24 @@ mod tests { // ── flags_to_descriptor_bits ─────────────────────────────────────────────── + #[test] + fn flags_to_descriptor_bits_ignores_bits_above_four() { + // Unknown bits (≥ 5), set via `from_raw`, must not perturb the + // descriptor — only bits 0–4 (the five named flags) are meaningful, + // and an unrecognised bit can never grant a permission. Compare a + // known flag set against the same set OR'd with stray high bits. + let known = MappingFlags::WRITE | MappingFlags::EXECUTE; + let with_stray = MappingFlags::from_raw(known.raw() | (1 << 5) | (1 << 17) | (1 << 31)); + let a = flags_to_descriptor_bits(known); + let b = flags_to_descriptor_bits(with_stray); + assert_eq!(a.attr_idx, b.attr_idx); + assert_eq!(a.ap, b.ap); + assert_eq!(a.sh, b.sh); + assert_eq!(a.pxn, b.pxn); + assert_eq!(a.uxn, b.uxn); + assert_eq!(a.ng, b.ng); + } + #[test] fn empty_flags_kernel_ro_normal_no_execute_global_inverted() { let bits = flags_to_descriptor_bits(MappingFlags::empty()); diff --git a/hal/src/timer.rs b/hal/src/timer.rs index f38967c..8035053 100644 --- a/hal/src/timer.rs +++ b/hal/src/timer.rs @@ -2,7 +2,7 @@ //! //! See [ADR-0010] for the v1 scope and the list of deferred capabilities. //! -//! [ADR-0010]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0010-timer-trait.md +//! [ADR-0010]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0010-timer-trait.md /// Monotonic time source and one-shot deadline support. /// @@ -86,6 +86,15 @@ pub const NANOS_PER_SECOND: u64 = 1_000_000_000; /// if a caller forgets, rather than degrading to an implicit /// divide-by-zero (which produces an unfriendly compile-time error in /// const context and an unfriendly runtime error otherwise). +/// +/// **Caller contract.** This helper is on the `Timer::now_ns` hot path, +/// where `error-handling.md` §4 forbids panics outside one-shot init. +/// Callers must therefore pass a `frequency_hz` they validated as +/// non-zero **at init** (the BSP `QemuVirtCpu::new` pattern: read + +/// assert + cache `CNTFRQ_EL0`), so the assertion is provably +/// unreachable on the hot path. Any caller that cannot guarantee a +/// non-zero frequency at the call site must validate it before calling, +/// not rely on this assert as runtime error-handling. #[allow( clippy::cast_possible_truncation, reason = "saturating cast handled explicitly by the if/else guard at the end of this function" diff --git a/kernel/src/cap/mod.rs b/kernel/src/cap/mod.rs index 6ff4037..3a11338 100644 --- a/kernel/src/cap/mod.rs +++ b/kernel/src/cap/mod.rs @@ -10,16 +10,18 @@ //! [ADR-0014][adr-0014]. The architectural role of capabilities lives in //! [`security-model.md`][sec] and [architectural principle P1][p1]. //! -//! [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md -//! [sec]: https://github.com/cemililik/Tyrne/blob/main/docs/architecture/security-model.md -//! [p1]: https://github.com/cemililik/Tyrne/blob/main/docs/standards/architectural-principles.md#p1--no-ambient-authority +//! [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [sec]: https://github.com/HodeTech/Tyrne/blob/main/docs/architecture/security-model.md +//! [p1]: https://github.com/HodeTech/Tyrne/blob/main/docs/standards/architectural-principles.md#p1--no-ambient-authority //! //! ## Status (T-001 + T-002) //! //! - [`Capability`] is move-only (not `Copy`, not `Clone`). -//! - [`CapRights`] carries four v1 rights (`DUPLICATE`, `DERIVE`, `REVOKE`, -//! `TRANSFER`); more rights land with their subsystems. +//! - [`CapRights`] carries the table-management rights (`DUPLICATE`, +//! `DERIVE`, `REVOKE`, `TRANSFER`) plus the IPC rights that landed +//! with their subsystems (`SEND`, `RECV`, `NOTIFY`); reserved bits are +//! masked away by [`CapRights::from_raw`] at the future ABI boundary. //! - [`CapObject`] is a typed enum that names a kernel object by its //! typed handle — [`super::obj::TaskHandle`] / [`super::obj::EndpointHandle`] //! / [`super::obj::NotificationHandle`] / [`super::mm::AddressSpaceHandle`] @@ -52,7 +54,7 @@ use crate::obj::{EndpointHandle, NotificationHandle, TaskHandle}; /// the live [`AddressSpace`][crate::mm::AddressSpace] kernel-object /// landing per [ADR-0028][adr-0028]. /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum CapKind { /// Refers to a task kernel object. @@ -66,7 +68,7 @@ pub enum CapKind { /// [`CapObject::AddressSpace`] variant carries the typed /// [`AddressSpaceHandle`]. /// - /// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md + /// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md AddressSpace, /// Refers to a physical memory region (Phase B4+). MemoryRegion, @@ -82,7 +84,7 @@ pub enum CapKind { /// introduces frame-ownership semantics. `AddressSpace` landed with /// T-018 (per [ADR-0028][adr-0028]). /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum CapObject { /// Capability naming a [`Task`][crate::obj::Task] kernel object. @@ -94,7 +96,7 @@ pub enum CapObject { /// Capability naming an [`AddressSpace`][crate::mm::AddressSpace] /// kernel object (per [ADR-0028][adr-0028]; T-018 commit 2). /// - /// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md + /// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md AddressSpace(AddressSpaceHandle), } diff --git a/kernel/src/cap/rights.rs b/kernel/src/cap/rights.rs index eea0c48..ce18dfa 100644 --- a/kernel/src/cap/rights.rs +++ b/kernel/src/cap/rights.rs @@ -3,7 +3,7 @@ //! Hand-rolled rather than using the `bitflags` crate to keep the kernel //! dependency-free for now; see the open question in [ADR-0014][adr-0014]. //! -//! [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +//! [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md use core::ops::{BitAnd, BitOr, BitOrAssign}; @@ -40,6 +40,16 @@ impl CapRights { /// through [`from_raw`][Self::from_raw], which silently masks reserved /// bits away so an untrusted caller cannot smuggle unknown rights past /// `contains` / subset checks. + /// + /// **Forward-API note (C1-007).** `KNOWN_BITS`, [`from_raw`][Self::from_raw], + /// [`raw`][Self::raw], [`difference`][Self::difference], and + /// [`is_empty`][Self::is_empty] are exercised only by the rights unit + /// tests today — they are *intended* surface for the future + /// syscall / userspace ABI boundary (where untrusted raw bits cross + /// into the kernel), not accidental dead code. A future dead-code + /// audit should not strip them; removing and re-adding at the ABI + /// boundary would be churn, and the masking behaviour is the right + /// design to land now (pinned by `from_raw_masks_unknown_bits`). pub const KNOWN_BITS: Self = Self( Self::DUPLICATE.0 | Self::DERIVE.0 @@ -106,6 +116,16 @@ impl CapRights { } } +// Operator surface (C1-006): `BitOr` / `BitAnd` provide the by-value +// union / intersection sugar; the only in-place operator is +// `BitOrAssign` (widening). The narrowing directions are deliberately +// **method-only** — [`intersection`][CapRights::intersection] and +// [`difference`][CapRights::difference] — rather than `BitAndAssign` / +// `SubAssign`. Narrowing is the security-relevant direction for a rights +// bitfield, so keeping it as a named call (not operator sugar) makes +// every rights-reduction explicit and grep-able at the call site. The +// asymmetry is intentional, not an oversight. + impl BitOr for CapRights { type Output = Self; fn bitor(self, rhs: Self) -> Self { diff --git a/kernel/src/cap/table.rs b/kernel/src/cap/table.rs index e926e41..8e21522 100644 --- a/kernel/src/cap/table.rs +++ b/kernel/src/cap/table.rs @@ -6,7 +6,7 @@ //! `cap_derive`, `cap_revoke`, `cap_drop` — plus `insert_root` for //! bootstrapping. No `unsafe`, no heap. //! -//! [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +//! [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md use super::{CapError, CapObject, CapRights, Capability}; @@ -15,7 +15,7 @@ use super::{CapError, CapObject, CapRights, Capability}; /// Per [ADR-0014][adr-0014]; revisit when a real use-case demands more. /// For v1 this is a compile-time constant. /// -/// [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +/// [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md pub const CAP_TABLE_CAPACITY: usize = 64; /// Hard cap on derivation depth. @@ -196,16 +196,11 @@ impl CapabilityTable { let new_index = self.pop_free().ok_or(CapError::CapsExhausted)?; let generation = self.slots[new_index as usize].generation; - // Splice into the parent's child list (or leave as a root if the - // source has no parent). Read the parent's current first_child - // *before* writing the new entry. - let former_first_child = match parent { - Some(parent_idx) => match &self.slots[parent_idx as usize].entry { - Some(parent_entry) => parent_entry.first_child, - None => return Err(CapError::InvalidHandle), - }, - None => None, - }; + // Splice `new_index` into the parent's child list as the new + // head (or leave it a root if the source has no parent), and + // recover the former head to chain as `next_sibling`. Shared with + // `cap_derive` via `link_child` (C1-008). + let former_first_child = self.link_child(parent, new_index)?; self.slots[new_index as usize].entry = Some(SlotEntry { capability: Capability::new(new_rights, object), @@ -215,13 +210,6 @@ impl CapabilityTable { depth, }); - // Update the parent's first_child to point at us. - if let Some(parent_idx) = parent { - if let Some(parent_entry) = self.slots[parent_idx as usize].entry.as_mut() { - parent_entry.first_child = Some(new_index); - } - } - Ok(CapHandle { index: new_index, generation, @@ -272,7 +260,13 @@ impl CapabilityTable { if new_depth_usize > MAX_DERIVATION_DEPTH { return Err(CapError::DerivationTooDeep); } - // `new_depth_usize` fits in `u8` because MAX_DERIVATION_DEPTH ≤ u8::MAX. + // `new_depth_usize` fits in `u8` because MAX_DERIVATION_DEPTH ≤ + // u8::MAX. The const-assert (C1-009) converts a future ADR that + // raises the cap above 255 — which the `depth: u8` field and + // this cast would otherwise silently truncate — into a hard + // build error, matching the `const { assert!(...) }` idiom in + // `CapabilityTable::new`. + const { assert!(MAX_DERIVATION_DEPTH <= u8::MAX as usize) }; #[allow( clippy::cast_possible_truncation, reason = "bounded by MAX_DERIVATION_DEPTH" @@ -282,12 +276,10 @@ impl CapabilityTable { let new_index = self.pop_free().ok_or(CapError::CapsExhausted)?; let generation = self.slots[new_index as usize].generation; - // Read the parent's current first_child (cap_derive always has a - // concrete parent: `src` itself). - let former_first_child = match &self.slots[parent_index as usize].entry { - Some(parent_entry) => parent_entry.first_child, - None => return Err(CapError::InvalidHandle), - }; + // Splice `new_index` as the new head of the parent's child list + // (cap_derive always has a concrete parent: `src` itself) and + // recover the former head. Shared with `cap_copy` (C1-008). + let former_first_child = self.link_child(Some(parent_index), new_index)?; self.slots[new_index as usize].entry = Some(SlotEntry { capability: Capability::new(new_rights, new_object), @@ -297,10 +289,6 @@ impl CapabilityTable { depth: new_depth, }); - if let Some(parent_entry) = self.slots[parent_index as usize].entry.as_mut() { - parent_entry.first_child = Some(new_index); - } - Ok(CapHandle { index: new_index, generation, @@ -417,7 +405,7 @@ impl CapabilityTable { /// conservative choice of refusing to drop interior nodes keeps the /// contract auditable and leaves cascade semantics to `cap_revoke`. /// - /// [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md + /// [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md /// /// # Errors /// @@ -441,8 +429,8 @@ impl CapabilityTable { } /// Remove the capability at `handle` from this table and return it, - /// transferring ownership to the caller. Behaves like [`cap_drop`] but - /// gives the caller the capability value instead of discarding it. + /// transferring ownership to the caller. Behaves like [`Self::cap_drop`] + /// but gives the caller the capability value instead of discarding it. /// /// Used by the IPC layer ([`crate::ipc`]) to atomically move a capability /// from a sender's table into an in-flight message during `ipc_send`. @@ -523,10 +511,20 @@ impl CapabilityTable { /// [ADR-0016][adr-0016] — callers pass their watcher tables and /// refuse destruction if any of them reports a reference. /// - /// The check is linear in [`CAP_TABLE_CAPACITY`]; acceptable at - /// Phase A's scale. + /// The check is linear in [`CAP_TABLE_CAPACITY`] (a full-table scan, + /// no early reverse index). The [`crate::obj`] destroy paths call it + /// **per candidate-destroy against every watcher table**, so the + /// aggregate cost is `O(watcher_tables × CAP_TABLE_CAPACITY)` — i.e. + /// bounded by the total capability space, not by a small constant or + /// a derivation subtree. This is the one operation in the module + /// whose complexity is not subtree-bounded; it is acceptable at + /// Phase A's scale (C1-002). When the watcher-set grows, replace the + /// scan with a reverse index (object → referencing slots) or a + /// per-object refcount maintained at `insert_root` / `cap_derive` / + /// `free_slot`; this pairs with the ADR-0023 cross-table-CDT + /// question (both ask "who references this object across tables"). /// - /// [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md + /// [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md #[must_use] pub fn references_object(&self, target: CapObject) -> bool { self.slots @@ -570,18 +568,65 @@ impl CapabilityTable { Some(head) } + /// Splice `new_index` in as the **head** of `parent`'s child list and + /// return the former head (to be chained as `new_index`'s + /// `next_sibling`). For a `None` parent (`new_index` is a root / + /// peer-of-root), returns `Ok(None)` and repoints nothing. + /// + /// Factored out of `cap_copy` and `cap_derive` (C1-008), which must + /// stay in lockstep so the linked-list invariants `cap_revoke` / + /// `unlink_from_siblings` rely on cannot diverge. The caller is + /// responsible for writing `new_index`'s `SlotEntry` (with + /// `next_sibling` set to the returned former head) *after* this call; + /// there is no intervening fallible step, so the parent pointer and + /// the child entry are always published together. + /// + /// # Errors + /// + /// [`CapError::InvalidHandle`] if `parent` names a slot whose entry is + /// absent (an internal inconsistency — every caller passes a parent + /// it has already resolved to a live slot). + fn link_child( + &mut self, + parent: Option, + new_index: Index, + ) -> Result, CapError> { + let Some(parent_idx) = parent else { + // Root / peer-of-root: no parent to splice into. + return Ok(None); + }; + let Some(parent_entry) = self.slots[parent_idx as usize].entry.as_mut() else { + return Err(CapError::InvalidHandle); + }; + let former_first_child = parent_entry.first_child; + parent_entry.first_child = Some(new_index); + Ok(former_first_child) + } + /// Free the slot at `index`: clear the entry, bump the generation, /// prepend to the free list. + /// + /// The `free_head` publish is the **last** write, gated on the + /// bounds check succeeding (C1-001): if `index` were ever + /// out-of-range, an early-published head would orphan the prior free + /// list and leave `free_head` pointing at a non-existent slot (the + /// next `pop_free` would then index out of bounds). Every current + /// caller derives `index` from a validated handle / tree walk, so + /// the out-of-range branch is unreachable — the `debug_assert!` + /// surfaces a future mis-call loudly in tests while release builds + /// fail safe as a clean no-op that leaves the free list intact. fn free_slot(&mut self, index: Index) { let old_free_head = self.free_head; - self.free_head = Some(index); - let Some(slot) = self.slots.get_mut(index as usize) else { + debug_assert!(false, "free_slot called with out-of-range index"); return; }; slot.entry = None; slot.generation = slot.generation.wrapping_add(1); slot.next_free = old_free_head; + // Publish the new head only now that the slot is confirmed + // in-range and wired into the free list. + self.free_head = Some(index); } /// Remove the slot at `index` from its parent's child list. @@ -598,10 +643,18 @@ impl CapabilityTable { }; // Walk the parent's child list to find us and remove. - let mut cursor = match &self.slots[parent_idx as usize].entry { - Some(entry) => entry.first_child, - None => return Err(CapError::InvalidHandle), + let Some(parent_entry) = &self.slots[parent_idx as usize].entry else { + // C1-003: we hold a parent index but the parent slot is + // empty. Callers reach `unlink_from_siblings` only after + // `resolve_handle` has validated the handle, so this is + // internal tree corruption, not a stale handle. Surface it + // loudly in tests; release still returns conservatively + // without mutating state. (Mirrors the `cap_revoke` + // cycle/duplicate guard.) + debug_assert!(false, "unlink_from_siblings: parent slot is empty"); + return Err(CapError::InvalidHandle); }; + let mut cursor = parent_entry.first_child; // Case 1: we are the head of the list. if cursor == Some(index) { @@ -626,8 +679,17 @@ impl CapabilityTable { cursor = c_next; } - // Not found — either the slot was never linked or the parent's - // child list is inconsistent. The latter is an internal bug. + // Not found — the slot claims this parent, but it is absent from + // the parent's child list. Since the caller already validated the + // handle via `resolve_handle`, this can only mean the derivation + // tree's bookkeeping is inconsistent — an internal bug, not a + // stale handle (C1-003). The `debug_assert!` distinguishes the + // two in tests/CI; release returns `InvalidHandle` conservatively + // (no state mutation) rather than corrupting the tree further. + debug_assert!( + false, + "unlink_from_siblings: slot not found in parent's child list" + ); Err(CapError::InvalidHandle) } } @@ -1185,4 +1247,47 @@ mod tests { assert_eq!(t.lookup(first).unwrap_err(), CapError::InvalidHandle); assert_eq!(t.lookup(middle).unwrap_err(), CapError::InvalidHandle); } + + #[test] + fn cap_copy_of_root_then_revoke_root_leaves_peer_alive() { + // C1-004: a peer produced by `cap_copy` of a *root* capability + // is itself an independent root (same tree position = no parent, + // depth 0). `cap_revoke` is subtree-only — it walks the source's + // descendants — so revoking the original root does NOT reach a + // peer-of-a-root (the peer is a sibling root, not a descendant). + // This is the within-table analogue of the cross-table + // revocation gap ADR-0023 defers, and the documented asymmetry + // vs. `copy_of_a_child_shares_parent` (where the peer shares the + // child's parent and so dies with the parent). + let mut t = CapabilityTable::new(); + let root = t.insert_root(root_cap()).unwrap(); + let peer = t.cap_copy(root, all_rights()).unwrap(); + + // Revoking the original root must NOT invalidate the peer-of-root. + t.cap_revoke(root).unwrap(); + assert!( + t.lookup(peer).is_ok(), + "a peer of a root survives cap_revoke of the original root \ + (peers are siblings; revoke is subtree-only)" + ); + assert!(t.lookup(root).is_ok(), "the revoked root itself survives"); + } + + #[test] + fn slot_entry_size_matches_adr_0023() { + // X2-N4: pin `SlotEntry`'s in-memory size so a future field + // addition / reorder that bloats the `CapabilityTable`'s L1-cache + // footprint (64 slots × slot size) is a visible test change, and + // to confirm the ADR-0023:47 cross-reference ("Capability slot is + // currently 32 bytes"). The value is layout-dependent + // (`Capability` = `CapRights(u32)` + `CapObject` enum; plus three + // `Option` tree links + a `u8` depth with padding); assert + // the concrete number rather than an upper bound so any drift is + // caught. + assert_eq!( + core::mem::size_of::(), + 32, + "SlotEntry size drifted from ADR-0023's 32 bytes" + ); + } } diff --git a/kernel/src/ipc/mod.rs b/kernel/src/ipc/mod.rs index b83aadf..1311e83 100644 --- a/kernel/src/ipc/mod.rs +++ b/kernel/src/ipc/mod.rs @@ -43,9 +43,9 @@ //! schedules tasks. `ipc_notify` sets bits on the notification word; waiter //! wakeup is wired in A5. //! -//! [t003]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-003-ipc-primitives.md -//! [adr-0017]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md -//! [adr-0032]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md +//! [t003]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-003-ipc-primitives.md +//! [adr-0017]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md +//! [adr-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md use crate::cap::{CapHandle, CapObject, CapRights, Capability, CapabilityTable}; use crate::obj::endpoint::{EndpointArena, EndpointHandle}; @@ -63,7 +63,14 @@ use crate::obj::ENDPOINT_ARENA_CAPACITY; /// /// Shape and rationale: [ADR-0017][adr-0017]. /// -/// [adr-0017]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md +/// `Default` (all-zero) is derived for ergonomics (tests, register-frame +/// initialisation). Note (C3-007): a zero `Message` is a perfectly valid +/// payload — the rendezvous state machine distinguishes "no message" +/// *structurally* via the [`EndpointState`] variant, never via a sentinel +/// field. The future syscall ABI (Phase B) must therefore **not** treat a +/// `Message::default()` as an "empty / absent message" convention. +/// +/// [adr-0017]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct Message { /// Caller-defined discriminator. The kernel does not interpret this field. @@ -97,7 +104,7 @@ pub enum IpcError { /// instead of silently decoding as `Ok(Pending)` which the caller would /// turn into a downstream panic. /// - /// [ADR-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md + /// [ADR-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md PendingAfterResume, } @@ -120,7 +127,13 @@ pub enum SendOutcome { } /// Outcome of a successful [`ipc_recv`]. -#[derive(Debug)] +/// +/// Derives the full `Copy + Clone + Debug + Eq + PartialEq` set, matching +/// [`SendOutcome`] (C3-002). Both inner payloads are themselves `Copy + Eq` +/// ([`Message`] and [`CapHandle`]), so the derive is free and lets tests / +/// the future syscall layer assert on a whole outcome with `assert_eq!` +/// rather than destructuring with `let-else { panic! }`. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum RecvOutcome { /// A message was available — either a waiting sender or a prior delivery /// from a sender that found a registered receiver. Returns the message @@ -316,6 +329,18 @@ pub fn ipc_send( } EndpointState::SendPending { .. } | EndpointState::RecvComplete { .. } => { // Excluded by the pre-check above; unreachable in correct code. + // + // Note (C3-009): this unreachability is a *temporal* invariant — + // the `peek_state` queue-full check above runs, then this commit + // match runs, with nothing mutating the state in between because + // v1 is single-threaded cooperative (no interleaving between peek + // and commit). It is NOT a structural invariant. If a future + // change splits peek and commit across a yield / await / preemption + // point (B5+), a second sender could land a SendPending/RecvComplete + // in this window and make this branch reachable, panicking in + // release. Re-audit when preemption lands (cross-ref ADR-0032 + // §Context's preemption note); the defensive alternative is to + // return `Err(IpcError::QueueFull)` here. unreachable!() } } @@ -402,6 +427,18 @@ pub fn ipc_recv( /// (silent deadlock). Tracked for Phase B alongside the scheduler/IPC /// wait-set design. /// +/// # Table borrow (intentional `&`, not `&mut`) +/// +/// `caller_table` is taken as a **shared** `&CapabilityTable` (C3-005): +/// `ipc_notify` only `lookup`s the cap for validation, never `cap_take`s or +/// `insert_root`s, so it needs no mutable borrow. This mirrors +/// [`ipc_cancel_recv`] and is **deliberately asymmetric** with +/// [`ipc_send`] / [`ipc_recv`] (which take `&mut` because they move caps). +/// The asymmetry is load-bearing for the scheduler bridge: it lets the +/// bridge re-borrow the table as `&` while the arena/queues are borrowed +/// `&mut` (see `sched::ipc_recv_and_yield`'s Deadlock rollback). A "tidy to +/// uniform `&mut`" cleanup here would break that non-aliasing borrow split. +/// /// # Errors /// /// [`IpcError::InvalidCapability`] — `notif_cap` is stale or lacks `NOTIFY`. @@ -470,15 +507,26 @@ pub fn ipc_notify( /// signature gains a `caller: TaskHandle` parameter to remove the named /// caller from the waiter list rather than blanket-clearing the slot. /// +/// # Table borrow (intentional `&`, not `&mut`) +/// +/// `caller_table` is taken as a **shared** `&CapabilityTable` (C3-005): +/// cancel only `lookup`s the cap to re-validate `RECV`; it never moves a +/// cap. This matches [`ipc_notify`] and is **deliberately asymmetric** with +/// [`ipc_send`] / [`ipc_recv`]. The shared borrow is *depended upon* by the +/// scheduler bridge's Deadlock rollback (`sched::ipc_recv_and_yield`), which +/// re-borrows the table `&` while holding `&mut` on the arena and queues — +/// a uniform-`&mut` "cleanup" would re-introduce an aliasing borrow there +/// and surface as a Miri failure on the bridge test. +/// /// # Errors /// /// [`IpcError::InvalidCapability`] — `ep_cap` is stale, refers to a /// non-endpoint object, or lacks `RECV`. The endpoint state is not /// touched on this error. /// -/// [adr-0032]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md -/// [ADR-0017]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md -/// [ADR-0019]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0019-scheduler-shape.md +/// [adr-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md +/// [ADR-0017]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0017-ipc-primitive-set.md +/// [ADR-0019]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0019-scheduler-shape.md pub fn ipc_cancel_recv( ep_arena: &mut EndpointArena, queues: &mut IpcQueues, @@ -533,6 +581,20 @@ fn validate_notif_cap( } } +/// Take the cap at `handle` (if any) out of `table` for in-flight transfer. +/// +/// Note (C3-008): every `cap_take` failure (`InvalidHandle`, `HasChildren`) +/// is collapsed into a single [`IpcError::InvalidTransferCap`]. This is +/// *currently* defensible — from the sender's perspective both mean "this +/// handle is not transferable", and the pre-flight `lookup` in [`ipc_send`] +/// makes a bare `InvalidHandle` improbable (the realistic failure is +/// `HasChildren` for a derived-from cap). It is lossy, though: a future +/// userspace caller cannot distinguish "stale handle (retry pointless)" from +/// "has children (revoke first, then retry)" — a *handleable* distinction per +/// error-handling.md. `IpcError` is `#[non_exhaustive]`, so when a userspace +/// caller exists this can grow a distinct `TransferCapHasChildren` variant +/// (or carry the inner `CapError`, mirroring `SchedError::Ipc`'s nesting) +/// without a breaking change. fn take_cap_if_some( table: &mut CapabilityTable, handle: Option, @@ -577,7 +639,9 @@ mod tests { use crate::cap::{CapHandle, CapObject, CapRights, Capability, CapabilityTable}; use crate::obj::arena::SlotId; use crate::obj::endpoint::{create_endpoint, Endpoint, EndpointArena, EndpointHandle}; - use crate::obj::notification::{create_notification, Notification, NotificationArena}; + use crate::obj::notification::{ + create_notification, destroy_notification, Notification, NotificationArena, + }; use crate::obj::TaskHandle; // ── Setup helpers ──────────────────────────────────────────────────────── @@ -1038,6 +1102,47 @@ mod tests { ); } + #[test] + fn notify_with_stale_handle_after_slot_reuse_fails() { + // Pin C3-003: the notification analogue of + // `stale_queue_state_reset_on_slot_reuse`. A cap whose underlying + // notification was destroyed (and a new one re-allocated in the same + // slot with a bumped generation) must make `ipc_notify` return + // `InvalidCapability` via the arena `get_mut(...).ok_or(...)` branch — + // the realistic adversarial case where the cap's rights check still + // passes but the handle is stale. The endpoint side already pins this; + // this closes the notification-side gap at the `ipc_notify` boundary. + let mut table = CapabilityTable::new(); + let mut notif_arena = NotificationArena::default(); + + // Install a fully-valid NOTIFY cap, then destroy its notification. + let notif_cap = setup_notif(&mut table, &mut notif_arena); + let stale_handle = match table.lookup(notif_cap).unwrap().object() { + CapObject::Notification(h) => h, + _ => panic!("wrong kind"), + }; + destroy_notification(&mut notif_arena, stale_handle).unwrap(); + + // The cap still satisfies the rights/kind check (it was minted with + // NOTIFY), so the failure must come from the arena staleness lookup, + // not the rights gate — proving the `ok_or(InvalidCapability)` mapping + // at the IPC boundary fires. (No cap_drop is even needed to provoke it.) + assert_eq!( + ipc_notify(&mut notif_arena, notif_cap, &table, 0xFF).unwrap_err(), + IpcError::InvalidCapability, + "ipc_notify on a stale notification handle must return InvalidCapability" + ); + + // Re-allocating reuses the slot with a bumped generation; the stale + // cap must still fail (it names the predecessor's generation). + let _new_handle = create_notification(&mut notif_arena, Notification::new(1)).unwrap(); + assert_eq!( + ipc_notify(&mut notif_arena, notif_cap, &table, 0xFF).unwrap_err(), + IpcError::InvalidCapability, + "stale cap must still fail after the slot is reused by a new notification" + ); + } + // ── blocked-sender wake (sender-first round-trip) ───────────────────────── #[test] diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 9b20a77..1629e27 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -13,24 +13,34 @@ //! `#![cfg_attr(not(test), no_std)]` disables `std` for production builds //! while allowing the standard test harness in host-side `cargo test` runs. //! -//! [adr-0006]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md -//! [p6]: https://github.com/cemililik/Tyrne/blob/main/docs/standards/architectural-principles.md#p6--hal-separation +//! [adr-0006]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md +//! [p6]: https://github.com/HodeTech/Tyrne/blob/main/docs/standards/architectural-principles.md#p6--hal-separation //! //! ## Subsystems //! //! - [`obj`] — kernel-object subsystem (Phase A3 / [T-002]): per-type -//! arenas holding the concrete entities that capabilities name. +//! arenas holding the concrete entities that capabilities name. Also +//! the home of the Phase-B4 task loader ([`obj::task_loader`] / +//! [T-019]), which composes [`mm`] + [`cap`] to stage and map a task +//! image. //! - [`cap`] — capability subsystem (Phase A2 / [T-001]), the substrate every //! later subsystem refers through for authority. //! - [`ipc`] — IPC subsystem (Phase A4 / [T-003]): `send` / `recv` / `notify` //! primitives over the A3 kernel objects, gated by capabilities. +//! - [`mm`] — memory-management subsystem (Phase B / [T-017] + [T-018]): +//! the bitmap physical-memory manager ([`mm::Pmm`], ADR-0035) and the +//! cap-gated `AddressSpace` kernel object ([`mm::AddressSpace`], +//! ADR-0028) with its `cap_map` / `cap_unmap` wrappers. //! - [`sched`] — cooperative scheduler (Phase A5 / [T-004]): bounded FIFO //! ready queue, per-task state, and IPC bridge. //! -//! [T-001]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-001-capability-table-foundation.md -//! [T-002]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-002-kernel-object-storage.md -//! [T-003]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-003-ipc-primitives.md -//! [T-004]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-004-cooperative-scheduler.md +//! [T-001]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-001-capability-table-foundation.md +//! [T-002]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-002-kernel-object-storage.md +//! [T-003]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-003-ipc-primitives.md +//! [T-004]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-004-cooperative-scheduler.md +//! [T-017]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-017-physical-memory-manager.md +//! [T-018]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-018-address-space-kernel-object.md +//! [T-019]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md #![cfg_attr(not(test), no_std)] // Kernel-specific stricter lints — these layer onto `[workspace.lints]` diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 9cb4f9c..bb00ab6 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -27,7 +27,7 @@ //! commit 4. The bootstrap-AS wrap + arena `StaticCell` publication //! land in T-018 commit 5. //! -//! [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +//! [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md use crate::cap::{ CapError, CapHandle, CapKind, CapObject, CapRights, CapabilityTable, MAX_DERIVATION_DEPTH, @@ -55,7 +55,7 @@ pub const ADDRESS_SPACE_ARENA_CAPACITY: usize = 8; /// slot) so this handle's `(index=0, generation=0)` deterministically /// matches the live arena slot. /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md pub const BOOTSTRAP_ADDRESS_SPACE_HANDLE: AddressSpaceHandle = AddressSpaceHandle::from_slot(SlotId::first_slot()); @@ -73,8 +73,8 @@ pub const BOOTSTRAP_ADDRESS_SPACE_HANDLE: AddressSpaceHandle = /// land here additively when ADR-0033 (high-half migration) opens — /// not added today (CLAUDE.md non-negotiable #6, no speculative design). /// -/// [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md pub struct AddressSpace { inner: M::AddressSpace, } @@ -97,7 +97,7 @@ impl AddressSpace { /// via `cap_create_address_space` → `PMM.alloc_frame()` → /// `Mmu::create_address_space(root)` (T-018 commit 3). /// - /// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md + /// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md #[must_use] pub const fn wrap_bootstrap(inner: M::AddressSpace) -> Self { Self { inner } @@ -200,7 +200,7 @@ impl AddressSpaceHandle { /// the kernel inherits this generic from the scheduler surface /// (ADR-0019 / ADR-0020) rather than introducing a parallel axis. /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md pub type AddressSpaceArena = Arena, ADDRESS_SPACE_ARENA_CAPACITY>; /// Errors returned by address-space operations. @@ -241,8 +241,8 @@ pub enum AddressSpaceError { /// underlying [`AddressSpaceArena`] is in use. ArenaFull, /// The handle does not name a live slot — either never allocated, - /// already freed, or stale after reuse. Returned by - /// [`destroy_address_space`] and by the cap-gated `cap_map` / + /// already freed, or stale after reuse. Returned by the crate-internal + /// `destroy_address_space` and by the cap-gated `cap_map` / /// `cap_unmap` wrappers when the cap's handle has gone stale. StaleHandle, /// PMM exhausted: the underlying [`FrameProvider::alloc_frame`] @@ -296,11 +296,17 @@ pub fn create_address_space( /// for one v1 caller — the rollback path in [`cap_create_address_space`] /// when cap-table minting fails after a successful arena allocation. /// +/// `pub(crate)`: its only v1 caller is the in-module rollback path, so +/// it is not part of the kernel-facing free-function surface that +/// `mm/mod.rs` re-exports (C2-004 API-coherence fix). Promote to `pub` +/// — and add it to the `mm/mod.rs` `pub use` — when the B4+ per-AS +/// destroy path gives it an out-of-module caller. +/// /// # Errors /// /// [`AddressSpaceError::StaleHandle`] when `handle` is stale or /// already freed. -pub fn destroy_address_space( +pub(crate) fn destroy_address_space( arena: &mut AddressSpaceArena, handle: AddressSpaceHandle, ) -> Result, AddressSpaceError> { @@ -321,7 +327,13 @@ pub fn get_address_space( /// Return a mutable reference to the address space at `handle`, or /// `None` if stale / freed. -pub fn get_address_space_mut( +/// +/// `pub(crate)`: used in-module by [`cap_map`] / [`cap_unmap`] and not +/// re-exported from `mm/mod.rs`. Narrowed from `pub` for API coherence +/// (C2-004) — its sibling [`get_address_space`] stays `pub` because it +/// *is* re-exported; the mutable form has no out-of-module caller, and +/// keeping it crate-internal shrinks the cap-bypass-able surface. +pub(crate) fn get_address_space_mut( arena: &mut AddressSpaceArena, handle: AddressSpaceHandle, ) -> Option<&mut AddressSpace> { @@ -394,7 +406,7 @@ pub fn activate_address_space_handle( // [`AddressSpaceError`] return type that wraps the underlying // [`CapError`] / [`MmuError`] taxonomies without flattening. // -// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md /// Resolve a capability handle to an [`AddressSpaceHandle`]. /// @@ -511,9 +523,9 @@ fn resolve_address_space_cap( /// - [`CapError(InvalidHandle)`][`CapError(CapError)`] — /// `parent_cap_handle` lookup failed at step 1. /// -/// [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md -/// [UNSAFE-2026-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [UNSAFE-2026-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md /// [`OutOfFrames`]: AddressSpaceError::OutOfFrames /// [`ArenaFull`]: AddressSpaceError::ArenaFull /// [`CapError(CapError)`]: AddressSpaceError::CapError @@ -635,8 +647,8 @@ pub fn cap_create_address_space( // way of forcing every caller (not just BSPs with side-effecting // bodies) through an audit-disciplined site. // - // [ADR-0009]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md - // [UNSAFE-2026-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md + // [ADR-0009]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md + // [UNSAFE-2026-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md let inner = unsafe { mmu.create_address_space(root) }; // Step 6: arena slot. Preflight at step 3 guarantees `arena` @@ -703,8 +715,8 @@ pub fn cap_create_address_space( /// (`OutOfFrames` for intermediate-table allocs, `AlreadyMapped`, /// `MisalignedAddress`, `InvalidFlags`, ...). /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md -/// [UNSAFE-2026-0025]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [UNSAFE-2026-0025]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md /// [`CapError(_)`]: AddressSpaceError::CapError /// [`StaleHandle`]: AddressSpaceError::StaleHandle /// [`MmuMapError(MmuError)`]: AddressSpaceError::MmuMapError @@ -759,7 +771,7 @@ pub fn cap_map( /// - [`MmuUnmapError(MmuError)`] — pass-through from `Mmu::unmap` /// (`NotMapped`, `MisalignedAddress`, ...). /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md /// [`CapError(_)`]: AddressSpaceError::CapError /// [`StaleHandle`]: AddressSpaceError::StaleHandle /// [`MmuUnmapError(MmuError)`]: AddressSpaceError::MmuUnmapError @@ -795,7 +807,7 @@ mod tests { }; use crate::cap::{CapError, CapObject, CapRights, Capability, CapabilityTable}; use tyrne_hal::{mmu::Mmu, MappingFlags, MmuError, PhysAddr, PhysFrame, VirtAddr}; - use tyrne_test_hal::{FakeMmu, VecFrameProvider}; + use tyrne_test_hal::{BlockMappedMmu, FakeMmu, OutOfFramesMmu, VecFrameProvider}; fn frame(addr: usize) -> PhysFrame { PhysFrame::from_aligned(PhysAddr(addr)).expect("test addr must be page-aligned") @@ -1267,6 +1279,183 @@ mod tests { )); } + // ── MR-018: pin the `Mmu::map` failure contract through `cap_map` ────────── + // + // `FakeMmu` has a flat-HashMap design with no intermediate page + // tables, so it can never return `MmuError::OutOfFrames` (mid-walk + // intermediate-table allocation failure) or `MmuError::BlockMapped` + // (walk hits a 2 MiB block descriptor) — the two clauses of the + // `Mmu::map` failure contract that `cap_map` rides for unsafe-free + // callers (C2-006). The sibling test-hal stream's `OutOfFramesMmu` / + // `BlockMappedMmu` decorators inject exactly those failures *before* + // any address-space mutation and *without* consuming `pa`. These + // tests drive `cap_map` through both clauses and assert the wrapper + // surfaces the error, does not consume the leaf `pa`, and leaves no + // mapping behind. + + /// Generic cap-wrapper setup over an arbitrary `M: Mmu`. Builds a + /// bootstrap address space in `arena`, mints a full-rights cap for + /// it in a fresh `CapabilityTable`, and returns + /// `(table, cap_handle, arena)`. Mirrors `bootstrap_setup` but is + /// parametric in the MMU type so the failure-injecting decorators + /// can be threaded through `cap_map` as `M`. + fn bootstrap_setup_generic( + mmu: &M, + ) -> (CapabilityTable, crate::cap::CapHandle, AddressSpaceArena) { + let mut arena: AddressSpaceArena = AddressSpaceArena::new(); + let mut table = CapabilityTable::new(); + + // SAFETY: + // (a) `Mmu::create_address_space` is `unsafe` at the trait boundary, + // so this call site discharges the contract manually. + // (b) `frame(0x4000_0000)` is page-aligned, and these decorators + // delegate to `FakeMmu`, a host-side mock that only stores the + // `PhysFrame` and never dereferences `root` — no UB. + // (c) A local safe shim would hide, not remove, the trait-level + // `unsafe` contract and weaken audit visibility at the call site. + let bootstrap_inner = unsafe { mmu.create_address_space(frame(0x4000_0000)) }; + let bootstrap_as = AddressSpace::wrap_bootstrap(bootstrap_inner); + let as_handle = create_address_space(&mut arena, bootstrap_as).unwrap(); + + let bootstrap_cap = Capability::new( + CapRights::DUPLICATE | CapRights::DERIVE | CapRights::REVOKE | CapRights::TRANSFER, + CapObject::AddressSpace(as_handle), + ); + let cap_handle = table.insert_root(bootstrap_cap).unwrap(); + + (table, cap_handle, arena) + } + + #[test] + fn cap_map_propagates_intermediate_out_of_frames_and_does_not_consume_pa() { + // C2-006: drive `cap_map` through the `Mmu::map` clause-3 + // intermediate-frame `OutOfFrames` path. `OutOfFramesMmu::map` + // pulls one frame from the provider (modelling an intermediate + // page-table allocation); with an empty provider it returns + // `OutOfFrames` before any AS mutation and without consuming + // `pa`. + let mmu = OutOfFramesMmu::new(); + let (table, bootstrap_cap, mut arena) = bootstrap_setup_generic(&mmu); + + // Empty provider → the intermediate-frame alloc fails. + let mut empty = VecFrameProvider::new(vec![]); + let va = VirtAddr(0x0001_0000); + let pa = frame(0x7000_0000); + + let result = cap_map( + &table, + bootstrap_cap, + &mmu, + &mut empty, + &mut arena, + va, + pa, + MappingFlags::WRITE, + ); + + // The wrapper surfaces the HAL error verbatim. + assert!( + matches!( + result, + Err(AddressSpaceError::MmuMapError(MmuError::OutOfFrames)) + ), + "cap_map must propagate the intermediate-frame OutOfFrames \ + from Mmu::map" + ); + + // Rollback contract: `pa` (the leaf frame) was NOT consumed — + // the caller may safely return it to its provider. The provider + // started empty, so a successful re-alloc would mean a frame was + // wrongly handed back; we instead confirm the leaf `pa` is still + // mappable (no mapping was installed for `va`). + let handle = resolve_address_space_cap(&table, bootstrap_cap).unwrap(); + let as_ref = get_address_space(&arena, handle).unwrap(); + assert_eq!( + as_ref.inner().mapping_count(), + 0, + "no mapping must be installed on the OutOfFrames error path" + ); + + // The flush token is never produced on the error path, so no TLB + // invalidation was issued (`?` returns before `token.flush`). + assert!( + mmu.inner().tlb_address_invalidations().is_empty(), + "no TLB flush must be issued when map fails" + ); + } + + #[test] + fn cap_map_propagates_block_mapped_and_leaves_no_mapping() { + // C2-006: drive `cap_map` through the `Mmu::map` `BlockMapped` + // clause. `BlockMappedMmu` returns `BlockMapped` for a blocked + // VA before any AS mutation and without consuming `pa`. + let va = VirtAddr(0x0001_0000); + let mmu = BlockMappedMmu::with_blocked([va]); + let (table, bootstrap_cap, mut arena) = bootstrap_setup_generic(&mmu); + + // A populated provider proves the failure is the block descriptor, + // not frame exhaustion: even with frames available, the blocked VA + // must short-circuit to BlockMapped. + let mut pmm = VecFrameProvider::new(vec![frame(0x6000_0000)]); + let pa = frame(0x7000_0000); + let pmm_before = pmm.remaining(); + + let result = cap_map( + &table, + bootstrap_cap, + &mmu, + &mut pmm, + &mut arena, + va, + pa, + MappingFlags::WRITE, + ); + + assert!( + matches!( + result, + Err(AddressSpaceError::MmuMapError(MmuError::BlockMapped)) + ), + "cap_map must propagate BlockMapped from Mmu::map" + ); + + // `pa` not consumed and no intermediate frame pulled: the block + // check fires before any allocation, so the provider is untouched. + assert_eq!( + pmm.remaining(), + pmm_before, + "BlockMapped must fire before any frame is consumed" + ); + + // No mapping installed and no TLB flush issued. + let handle = resolve_address_space_cap(&table, bootstrap_cap).unwrap(); + let as_ref = get_address_space(&arena, handle).unwrap(); + assert_eq!( + as_ref.inner().mapping_count(), + 0, + "no mapping must be installed on the BlockMapped error path" + ); + assert!( + mmu.inner().tlb_address_invalidations().is_empty(), + "no TLB flush must be issued when map fails" + ); + + // Sanity: a non-blocked VA on the same MMU still maps (the + // decorator is faithful for the success path). + let ok_va = VirtAddr(0x0002_0000); + let ok = cap_map( + &table, + bootstrap_cap, + &mmu, + &mut pmm, + &mut arena, + ok_va, + pa, + MappingFlags::WRITE, + ); + assert!(ok.is_ok(), "a non-blocked VA must still map successfully"); + } + // ── Revocation cascade regression (Fix 1) ──────────────────────────────── #[test] diff --git a/kernel/src/mm/mod.rs b/kernel/src/mm/mod.rs index f1a5c3f..82e0016 100644 --- a/kernel/src/mm/mod.rs +++ b/kernel/src/mm/mod.rs @@ -8,10 +8,10 @@ //! [`docs/architecture/memory-management.md`] for the synthesised //! architecture chapter. //! -//! [ADR-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md -//! [ADR-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md -//! [T-017]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-017-physical-memory-manager.md -//! [T-018]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-018-address-space-kernel-object.md +//! [ADR-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +//! [ADR-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md +//! [T-017]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-017-physical-memory-manager.md +//! [T-018]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-018-address-space-kernel-object.md pub mod address_space; pub mod pmm; @@ -35,7 +35,7 @@ use tyrne_hal::{PhysAddr, PAGE_SIZE}; /// panicking; the validation layer at the BSP is the canonical /// source for "well-formed range". /// -/// [adr-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation +/// [adr-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation /// [`Pmm::new`]: pmm::Pmm::new #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct PhysFrameRange { @@ -49,6 +49,18 @@ pub struct PhysFrameRange { impl PhysFrameRange { /// Construct a range from raw bounds. + /// + /// Performs **no** alignment or ordering validation: the soft + /// `start <= end` invariant and page-alignment are the caller's + /// responsibility (canonically [`Pmm::new`], which validates both + /// before trusting [`frame_count`][Self::frame_count] / + /// [`len_bytes`][Self::len_bytes]). [`frame_count`][Self::frame_count] + /// and [`len_bytes`][Self::len_bytes] are only meaningful for + /// page-aligned, non-inverted bounds — an inverted range reads as + /// zero-length and an unaligned range yields a truncating frame + /// count (C2-010). + /// + /// [`Pmm::new`]: pmm::Pmm::new #[must_use] pub const fn new(start: PhysAddr, end: PhysAddr) -> Self { Self { start, end } @@ -158,10 +170,10 @@ pub use pmm::{Pmm, PmmError, PmmStats}; /// commit ADR-0033 lands (kept inline today to avoid churning the /// audit-log entries that landed with T-017). /// -/// [UNSAFE-2026-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [UNSAFE-2026-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md -/// [UNSAFE-2026-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [UNSAFE-2026-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md #[must_use] #[inline] pub(crate) fn phys_frame_kernel_ptr(frame: tyrne_hal::PhysFrame) -> *mut u8 { diff --git a/kernel/src/mm/pmm.rs b/kernel/src/mm/pmm.rs index 5608f20..f37ea42 100644 --- a/kernel/src/mm/pmm.rs +++ b/kernel/src/mm/pmm.rs @@ -7,16 +7,27 @@ //! //! See [ADR-0035] for the design (bitmap vs. free-list trade-offs, //! reservation tracking, forward-portability to high-half kernel) and -//! [T-017] for the implementation arc this file lands across four +//! [T-017] for the implementation arc this file landed across four //! bisectable commits. //! -//! Commit 1 (this file, initial landing): `Pmm` struct + bitmap -//! arithmetic + `Pmm::new` constructor + four host tests pinning -//! `Pmm::new`'s contract. No `unsafe`. The next commit adds -//! `alloc_frame` / `free_frame` / `stats`. +//! Responsibilities (steady state): track the managed physical extent +//! via a one-bit-per-frame bitmap; reserve init-time ranges (kernel +//! image / `.boot_pt` / boot stack) so they are never handed out; +//! implement [`tyrne_hal::FrameProvider`] (`alloc_frame`) for runtime +//! [`Mmu::map`] callers; account frame state via cached counters +//! cross-checked against the bitmap; and zero-fill every allocated +//! frame before return under [UNSAFE-2026-0026] (the sole `unsafe` +//! site in this file — the [`core::ptr::write_bytes`] call in +//! [`Pmm::alloc_frame`]). [`Pmm::free_frame`] returns a frame to the +//! free pool, and [`Pmm::could_yield_pa_overlapping`] answers the +//! "could `alloc_frame` ever return a PA aliasing this range" query +//! the task loader uses to discharge [UNSAFE-2026-0027]. //! -//! [ADR-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md -//! [T-017]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-017-physical-memory-manager.md +//! [UNSAFE-2026-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [UNSAFE-2026-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md +//! +//! [ADR-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md +//! [T-017]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-017-physical-memory-manager.md //! [`Mmu::map`]: tyrne_hal::Mmu::map use tyrne_hal::{FrameProvider, PhysAddr, PhysFrame, PAGE_SIZE}; @@ -54,7 +65,7 @@ pub enum PmmError { /// the bit-set but twice to the cached counter), leaving /// `stats()` inconsistent with the bitmap and risking /// `free_count = 0` while `alloc_frame()` still has free - /// frames. See [PR #26 review-round 1](https://github.com/cemililik/Tyrne/pull/26). + /// frames. See [PR #26 review-round 1](https://github.com/HodeTech/Tyrne/pull/26). OverlappingReservedRanges, /// `free_frame` rejected an attempt to free a frame that is /// already Free, or whose PA falls in a Reserved range (the @@ -62,7 +73,7 @@ pub enum PmmError { /// the cached reserved-range list is the discrimination /// mechanism, per [ADR-0035 §Simulation §Step 2 Critical row]). /// - /// [ADR-0035 §Simulation §Step 2 Critical row]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation + /// [ADR-0035 §Simulation §Step 2 Critical row]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation DoubleFree, } @@ -104,7 +115,7 @@ pub struct PmmStats { /// /// Per [ADR-0035 §Decision outcome][adr-0035]. /// -/// [adr-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#decision-outcome +/// [adr-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#decision-outcome pub struct Pmm { /// One bit per frame; bit `i` set ⇔ frame `i` is Allocated or /// Reserved (single-bit collapse per [ADR-0035 §Negative @@ -152,11 +163,16 @@ impl Pmm { /// small an `N` for its extent). /// 3. `reserved.len() <= R` — returns [`PmmError::TooManyReservedRanges`] /// otherwise. - /// 4. Each reserved range is page-aligned, fits inside + /// 4. Each reserved range is page-aligned (returns + /// [`PmmError::MisalignedAddress`] otherwise), fits inside /// `[extent.start, extent.end)`, and is non-inverted - /// (`range.end >= range.start`) — returns - /// [`PmmError::MisalignedAddress`] or [`PmmError::OutOfRange`] - /// otherwise. + /// (`range.end >= range.start`) — out-of-bounds **or inverted** + /// ranges return [`PmmError::OutOfRange`]. The inversion check is + /// defence-in-depth: [`PhysFrameRange::frame_count`] already + /// treats an inverted range as zero-length (saturating + /// arithmetic), so even without the guard an inverted range would + /// cover zero frames — but rejecting malformed input outright is + /// the more honest fail-fast stance. /// 5. No two reserved ranges overlap (pairwise half-open check) — /// returns [`PmmError::OverlappingReservedRanges`] otherwise. /// Touching boundaries (`[a, b)` + `[b, c)`) are accepted. @@ -179,7 +195,7 @@ impl Pmm { /// `extent.frame_count() <= N * 8` and reported as /// [`PmmError::OutOfRange`] (no kernel-static panic). /// - /// [adr-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation + /// [adr-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation pub fn new(extent: PhysFrameRange, reserved: &[PhysFrameRange]) -> Result { // Validation (i): extent page-aligned. if !extent.is_aligned() { @@ -328,7 +344,7 @@ impl Pmm { /// forward-compat note, a wrap-then-scan-prefix step would land /// when SMP per-core caches arrive; not v1. /// - /// [adr-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation + /// [adr-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md#simulation pub fn alloc_frame(&mut self) -> Option { // Test-only failure injection (see `alloc_failure_after`'s // doc-comment). Runs before the production body so a forced @@ -353,6 +369,17 @@ impl Pmm { // scaffolding for SMP per-core-caches, which v1 doesn't // need but we leave the wrap in place to keep the // future-extension path one-line clean. + // + // Complexity (X2-002): the forward pass is O(total_frames - + // hint) and the wrap pass is O(hint), so the combined scan is + // O(total_frames) worst-case (max 32 768 frames per ADR-0035 + // §Simulation §Step 1). The wrap pass is dead in v1's + // single-core cooperative model — `free_frame`'s `hint = + // min(hint, idx)` rewind keeps `hint <= lowest-free-index`, so + // a free frame is always found on the forward pass — and is + // preserved only for future SMP free-then-alloc interleaving; + // do not remove it as a "simplification" without restoring an + // equivalent fallback. let mut idx_opt: Option = (self.hint..total_frames).find(|&idx| !read_bit(&self.bitmap, idx)); if idx_opt.is_none() && self.hint > 0 { @@ -362,21 +389,29 @@ impl Pmm { } let idx = idx_opt?; - // Mark allocated. + // Compute the frame's PA and mint the PhysFrame *before* any + // state mutation (C2-002). `from_aligned` is the only fallible + // step; computing it first and bailing via `?` on `None` makes + // the "bit set, no handle handed out → permanent leak" window + // structurally impossible rather than merely currently + // unreachable. Validation (i) on `extent` at `Pmm::new` time + // guarantees `extent.start` is page-aligned and `idx * + // PAGE_SIZE` preserves alignment, so `from_aligned` is + // provably-`Some` today; the `?` is the fail-safe a future + // maintainer inherits if that proof is ever weakened. + let pa_off = idx.saturating_mul(PAGE_SIZE); + let pa_usize = self.extent.start.0.saturating_add(pa_off); + let frame = PhysFrame::from_aligned(PhysAddr(pa_usize))?; + let pa_ptr = pa_usize as *mut u8; + + // Mark allocated. Reached only after the fallible `from_aligned` + // above succeeded, so the counters/bitmap and the handed-out + // handle move together — no leak window. set_bit(&mut self.bitmap, idx); self.hint = idx.saturating_add(1); self.free_count = self.free_count.saturating_sub(1); self.allocated_count = self.allocated_count.saturating_add(1); - // Compute the frame's PA. Validation (i) on `extent` at - // `Pmm::new` time guarantees `extent.start` is page-aligned; - // `idx * PAGE_SIZE` preserves alignment. The - // `from_aligned` unwrap_or(unreachable!) pair is therefore - // structurally provable. - let pa_off = idx.saturating_mul(PAGE_SIZE); - let pa_usize = self.extent.start.0.saturating_add(pa_off); - let pa_ptr = pa_usize as *mut u8; - // SAFETY: // **Why unsafe is needed.** The FrameProvider contract // ("Returned frames must be page-aligned and @@ -437,27 +472,12 @@ impl Pmm { core::ptr::write_bytes(pa_ptr, 0u8, PAGE_SIZE); } - // Return the page-aligned PhysFrame. `from_aligned` is - // provably-Some here: validation (i) on Pmm::new guarantees - // `extent.start` is page-aligned, and `idx * PAGE_SIZE` - // preserves that alignment. Returning the Option directly - // (rather than unwrap / expect) keeps `clippy::unwrap_used` - // happy without adding a panic path. - // - // **Unreachable-leak caveat.** Mutation of the bitmap, hint, - // and counters above happens BEFORE this call. If a future - // change ever weakens the alignment proof (e.g., a BSP whose - // extent.start is not page-aligned and the validation is - // bypassed), `from_aligned` could return `None` and this - // function would return `None` to the caller while the - // bitmap state has already moved — the frame would be - // permanently leaked (bit set, no PhysFrame handed out). The - // path is structurally unreachable in v1; a future - // maintainer who alters Pmm::new's validation set must - // either preserve the alignment proof or move the mutation - // block below this call to keep the leak structurally - // impossible. - PhysFrame::from_aligned(PhysAddr(pa_usize)) + // Return the page-aligned PhysFrame minted above. The frame was + // computed via the fallible `from_aligned` *before* any bitmap / + // counter mutation (see C2-002 reorder), so there is no + // mutate-then-fail leak window: a `None` from `from_aligned` + // bails via `?` while the PMM state is still byte-stable. + Some(frame) } /// Free a previously-allocated frame. @@ -564,19 +584,36 @@ impl Pmm { /// /// # Algorithm /// - /// Clip `pa_range` to `extent`, then walk the covered frame - /// indices linearly; return `true` on the first frame whose PA is - /// not inside any populated `Some(_)` slot of `reserved_ranges`. - /// Worst-case `O((pa_range.len() / PAGE_SIZE) × populated_reserved)`; - /// for the loader's v1 placeholder (8-byte image, 1 frame of - /// coverage) this is a single iteration over at most `R` slots - /// (`R = 8` for `bsp-qemu-virt`). + /// Pure interval arithmetic over **frame-index space** — no + /// per-frame enumeration. Clip `pa_range` to `extent` to obtain a + /// half-open frame-index window `[start_idx, end_idx)`, then walk + /// that window with a cursor: at each step, look for a populated + /// reserved range that *covers* the current cursor frame; if one + /// exists, jump the cursor past it; if none does, the cursor frame + /// is itself non-reserved (a residue) and the query overlaps a + /// yieldable frame → return `true`. If the cursor reaches `end_idx` + /// the window was fully covered by reserved ranges → return + /// `false`. + /// + /// Each cursor jump lands strictly past the *end* of some reserved + /// range, and the reserved ranges are pairwise non-overlapping + /// (`Pmm::new` validation (iv)), so the cursor advances through at + /// most `R` distinct reserved intervals before either finding a + /// residue or exhausting the window. Cost is therefore + /// `O(populated_reserved²)` worst-case (each step scans the ≤ `R` + /// slots) and — critically — **independent of `pa_range`'s length** + /// (`R = 8` for `bsp-qemu-virt`, so ≤ 64 comparisons regardless of + /// whether the caller passes a 1-frame span or the full 128 MiB + /// extent). This replaces the former + /// `O((pa_range.len() / PAGE_SIZE) × populated_reserved)` per-frame + /// walk; the `could_yield_pa_overlapping_interval_equals_perframe` + /// host test pins that the two formulations agree on every input. /// - /// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md - /// [unsafe-27]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md + /// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md + /// [unsafe-27]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md #[must_use] pub fn could_yield_pa_overlapping(&self, pa_range: core::ops::Range) -> bool { - // Empty range cannot overlap anything. + // Empty (or inverted) range cannot overlap anything. if pa_range.start >= pa_range.end { return false; } @@ -597,31 +634,60 @@ impl Pmm { } else { extent_end }; - // Frame-index bounds: any frame whose PA range - // `[f, f + PAGE_SIZE)` overlaps `[clipped_start, clipped_end)`. - // Equivalently: start_idx is the frame containing `clipped_start`; - // end_idx is one past the frame containing `clipped_end - 1`. + // Frame-index window `[start_idx, end_idx)`: every frame whose + // PA range `[f, f + PAGE_SIZE)` overlaps `[clipped_start, + // clipped_end)`. `start_idx` is the frame containing + // `clipped_start`; `end_idx` is one past the frame containing + // `clipped_end - 1`. Identical to the former per-frame loop's + // bounds — see the equivalence test. let start_idx = clipped_start .saturating_sub(extent_start) .wrapping_div(PAGE_SIZE); - let end_idx = clipped_end - .saturating_sub(extent_start) - .saturating_add(PAGE_SIZE) - .saturating_sub(1) - .wrapping_div(PAGE_SIZE); - // Walk frame PAs; return true on first non-reserved frame. - for idx in start_idx..end_idx { - let frame_pa = extent_start.saturating_add(idx.saturating_mul(PAGE_SIZE)); - let frame_addr = PhysAddr(frame_pa); - let in_reserved = self - .reserved_ranges - .iter() - .flatten() - .any(|r| r.contains(frame_addr)); - if !in_reserved { - return true; + let end_idx = clipped_end.saturating_sub(extent_start).div_ceil(PAGE_SIZE); + + // Interval-coverage walk in frame-index space. `cursor` is the + // first frame index in the window not yet proven reserved. + let mut cursor = start_idx; + while cursor < end_idx { + // Find a populated reserved range whose frame-index span + // `[r_start_idx, r_end_idx)` covers `cursor`. A frame is + // reserved iff its base PA falls inside the range — the same + // `contains(frame_base)` test the old per-frame loop used, + // lifted to index space (reserved ranges are page-aligned and + // fit inside the extent per `Pmm::new` validation, so each + // range maps cleanly onto a frame-index interval). + let mut covering_end: Option = None; + for range in self.reserved_ranges.iter().flatten() { + // Frame indices covered by this reserved range, clamped + // into the extent (defensive: `Pmm::new` already proved + // `range.start >= extent_start`, so the saturating_sub + // never truncates a well-formed range). + let r_start_idx = range + .start + .0 + .saturating_sub(extent_start) + .wrapping_div(PAGE_SIZE); + let r_end_idx = range + .end + .0 + .saturating_sub(extent_start) + .wrapping_div(PAGE_SIZE); + if r_start_idx <= cursor && cursor < r_end_idx { + covering_end = Some(r_end_idx); + break; + } + } + match covering_end { + // `cursor` is inside a reserved range; jump past it and + // keep scanning. The jump always strictly advances + // (`r_end_idx > cursor`), guaranteeing termination. + Some(r_end_idx) => cursor = r_end_idx, + // `cursor` is a non-reserved frame within the window — a + // residue. The query overlaps a yieldable frame. + None => return true, } } + // The whole window was covered by reserved ranges. false } } @@ -646,8 +712,8 @@ impl Pmm { /// [ADR-0035 §Decision drivers][adr-0035], this is the canonical /// surface the PMM layer satisfies. /// -/// [adr-0009]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md -/// [adr-0035]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md +/// [adr-0009]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md +/// [adr-0035]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0035-physical-memory-manager.md impl FrameProvider for Pmm { fn alloc_frame(&mut self) -> Option { Pmm::alloc_frame(self) @@ -656,11 +722,21 @@ impl FrameProvider for Pmm { // ── Bitmap helpers (private) ────────────────────────────────────────────────── +// Each helper indexes `bitmap[idx / 8]`, which panics on out-of-bounds. +// The kernel crate denies `clippy::panic`, so the bound is a contract: +// every caller stays within `idx < bitmap.len() * 8` via the +// `total_frames <= N * 8` invariant `Pmm::new` enforces (C2-005). The +// `debug_assert!` in each helper documents that contract and catches a +// future mis-call in debug / Miri builds without any release cost +// (C2-008) — mirroring the BSP MMU walker's `debug_assert!(idx < +// ENTRIES_PER_TABLE)` discipline under UNSAFE-2026-0025. + /// Set bit `idx` in `bitmap`. Caller's responsibility to ensure /// `idx < bitmap.len() * 8`. fn set_bit(bitmap: &mut [u8], idx: usize) { let byte = idx / 8; let bit = idx % 8; + debug_assert!(byte < bitmap.len(), "set_bit: bitmap index out of range"); bitmap[byte] |= 1 << bit; } @@ -669,6 +745,7 @@ fn set_bit(bitmap: &mut [u8], idx: usize) { fn read_bit(bitmap: &[u8], idx: usize) -> bool { let byte = idx / 8; let bit = idx % 8; + debug_assert!(byte < bitmap.len(), "read_bit: bitmap index out of range"); (bitmap[byte] >> bit) & 1 == 1 } @@ -677,11 +754,20 @@ fn read_bit(bitmap: &[u8], idx: usize) -> bool { fn clear_bit(bitmap: &mut [u8], idx: usize) { let byte = idx / 8; let bit = idx % 8; + debug_assert!(byte < bitmap.len(), "clear_bit: bitmap index out of range"); bitmap[byte] &= !(1u8 << bit); } /// Return the first `0` bit in `bitmap` over the range /// `[0, frame_count)`, or `None` if every bit is `1`. +/// +/// Used by [`Pmm::new`] to compute the initial allocation hint (the +/// first frame not covered by a reserved range). It scans from index 0 +/// and is **hint-unaware** by design — `alloc_frame`'s steady-state +/// path deliberately does *not* call it, scanning inline from `hint` +/// instead so it stays amortised-O(1) (X2-004). Keep it for the +/// constructor's one-shot scan; a future caller wanting a hint-aware +/// scan must add the start index, not reuse this helper. fn first_zero_bit(bitmap: &[u8], frame_count: usize) -> Option { (0..frame_count).find(|&idx| !read_bit(bitmap, idx)) } @@ -1192,4 +1278,148 @@ mod tests { assert_eq!(stats.allocated_frames, 0); assert_eq!(stats.free_frames, 16); } + + // ── MR-010: interval-arithmetic refactor is behaviour-preserving ────────── + + /// Reference implementation of the *former* per-frame + /// `could_yield_pa_overlapping` algorithm (the one MR-010 replaced). + /// Clips `pa_range` to the extent and walks every covered frame + /// index, returning `true` on the first frame whose base PA is not + /// inside any populated reserved range. The equivalence test below + /// asserts the production interval-arithmetic implementation agrees + /// with this byte-for-byte across a spread of inputs. + fn could_yield_pa_overlapping_per_frame( + pmm: &Pmm, + pa_range: core::ops::Range, + ) -> bool { + const PAGE: usize = 4096; + if pa_range.start >= pa_range.end { + return false; + } + let extent_start = pmm.extent.start.0; + let extent_end = pmm.extent.end.0; + if pa_range.end <= extent_start || pa_range.start >= extent_end { + return false; + } + let clipped_start = pa_range.start.max(extent_start); + let clipped_end = pa_range.end.min(extent_end); + let start_idx = (clipped_start - extent_start) / PAGE; + let end_idx = (clipped_end - extent_start).div_ceil(PAGE); + for idx in start_idx..end_idx { + let frame_pa = extent_start + idx * PAGE; + let frame_addr = PhysAddr(frame_pa); + let in_reserved = pmm + .reserved_ranges + .iter() + .flatten() + .any(|r| r.contains(frame_addr)); + if !in_reserved { + return true; + } + } + false + } + + #[test] + fn could_yield_pa_overlapping_interval_equals_perframe() { + // MR-010 (master review 2026-05-22): the per-frame loop was + // replaced with O(R) interval arithmetic. This pins that the + // new implementation is *behaviour-preserving* — it must return + // the identical answer as the old per-frame walk for every + // query, so the security-relevant non-overlap guarantee + // task_loader rides (UNSAFE-2026-0027) is unchanged. + let (_buf, ptr) = aligned_backing(16); + let base = ptr as usize; + let page = 4096usize; + + // A spread of reserved-range layouts to exercise every branch: + // none reserved; head reserved; tail reserved; an interior + // island; two disjoint islands; and the whole extent reserved. + let layouts: &[&[(usize, usize)]] = &[ + &[], + &[(0, 1)], + &[(15, 16)], + &[(4, 6)], + &[(2, 4), (8, 12)], + &[(0, 16)], + ]; + + for layout in layouts { + let pmm = pmm_over_backing(ptr, 16, layout); + + // A spread of query ranges, expressed as byte offsets from + // `base` so the cases are layout-independent: empty, + // single frame, sub-page, page-straddling, full extent, + // a range entirely below / above the extent, a range + // straddling the lower / upper extent boundary, an inverted + // range, and unaligned endpoints. + let queries: &[core::ops::Range] = &[ + // Empty range (start == end). + base..base, + // Inverted range (start > end). + (base + page)..base, + // Single first frame. + base..(base + page), + // Single last frame. + (base + 15 * page)..(base + 16 * page), + // Interior single frame. + (base + 5 * page)..(base + 6 * page), + // Sub-page span inside frame 5. + (base + 5 * page + 8)..(base + 5 * page + 16), + // Two-frame span straddling a page boundary. + (base + 3 * page + 100)..(base + 5 * page - 100), + // The whole extent. + base..(base + 16 * page), + // Wholly below the extent. + (base - 8 * page)..(base - 4 * page), + // Wholly above the extent. + (base + 20 * page)..(base + 24 * page), + // Straddling the lower extent boundary. + (base - 2 * page)..(base + 2 * page), + // Straddling the upper extent boundary. + (base + 14 * page)..(base + 18 * page), + // Unaligned endpoints inside the extent. + (base + page + 1)..(base + 7 * page - 1), + ]; + + for q in queries { + let interval = pmm.could_yield_pa_overlapping(q.clone()); + let per_frame = could_yield_pa_overlapping_per_frame(&pmm, q.clone()); + assert_eq!( + interval, per_frame, + "interval-arithmetic result must equal the per-frame \ + reference for layout {layout:?} query {q:?}" + ); + } + } + } + + // ── C2-005: Pmm::new bitmap-size invariant ──────────────────────────────── + + #[test] + fn new_rejects_extent_larger_than_bitmap() { + // The `total_frames <= N * 8` guard is the single invariant that + // keeps `set_bit`/`read_bit`/`clear_bit` from indexing + // `bitmap[byte]` out of bounds. `Pmm<1, _>` holds 1 byte = 8 + // bits; a 16-frame extent needs 16 bits → must be rejected with + // OutOfRange (an init-time "BSP picked too small an N" error). + let extent = PhysFrameRange::new(PhysAddr(0x4000_0000), PhysAddr(0x4001_0000)); + assert_eq!(extent.frame_count(), 16); + let result: Result, _> = Pmm::new(extent, &[]); + assert_eq!( + result.err(), + Some(PmmError::OutOfRange), + "an extent larger than N*8 bits must be rejected" + ); + + // Exact-fit boundary: an 8-frame extent fills `Pmm<1, _>`'s 8 + // bits exactly and must succeed. + let exact = PhysFrameRange::new(PhysAddr(0x4000_0000), PhysAddr(0x4000_8000)); + assert_eq!(exact.frame_count(), 8); + let ok: Result, _> = Pmm::new(exact, &[]); + assert!( + ok.is_ok(), + "an extent of exactly N*8 frames must fit the bitmap" + ); + } } diff --git a/kernel/src/obj/arena.rs b/kernel/src/obj/arena.rs index 83549d3..99c8048 100644 --- a/kernel/src/obj/arena.rs +++ b/kernel/src/obj/arena.rs @@ -11,7 +11,7 @@ //! audited arena shape, now generic, instantiated three times in the //! per-kind kernel-object modules. //! -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md /// Index into an [`Arena`]'s backing array. type Index = u16; diff --git a/kernel/src/obj/endpoint.rs b/kernel/src/obj/endpoint.rs index 7be7222..d07eda7 100644 --- a/kernel/src/obj/endpoint.rs +++ b/kernel/src/obj/endpoint.rs @@ -6,7 +6,7 @@ //! Milestone A4 populates them with real waiter lists when `send` / //! `recv` / `reply_recv` arrive. //! -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md use super::arena::{Arena, SlotId}; use super::{ObjError, ENDPOINT_ARENA_CAPACITY}; @@ -77,9 +77,35 @@ pub fn create_endpoint( /// Free the endpoint at `handle`. /// +/// # In-flight capability hazard (C3-001 — deliberate v1 deferral) +/// +/// `destroy_endpoint` is **cap-blind**: it frees the arena slot and bumps +/// the generation but does **not** consult [`IpcQueues`][crate::ipc::IpcQueues]. +/// If an endpoint is destroyed while its queue slot holds a +/// `SendPending { cap: Some(_) }` or `RecvComplete { cap: Some(_) }`, the +/// parked move-only [`Capability`][crate::cap::Capability] is owned solely by +/// that state. On the next IPC op against a *new* endpoint allocated in the +/// same slot, `IpcQueues::reset_if_stale_generation` overwrites the state with +/// `Idle` and the parked cap is dropped on the floor — a silently leaked +/// authority. In **debug** builds the `debug_assert!` in +/// `reset_if_stale_generation` fires on exactly this case; in **release** it +/// is compiled out, so the leak is silent. +/// +/// This is *currently benign and intentional*, not unhandled: no production +/// code calls `destroy_endpoint` on a cap-bearing pending state, and the +/// destroy-drain primitive (which must *return* the parked cap to its origin +/// or destroy it) is deferred to the Phase B2+ endpoint-destroy ADR per +/// [ADR-0032] §Consequences. The conservative future improvement is to have +/// this (or a thin IPC-layer wrapper) take `&mut IpcQueues` and return a typed +/// `ObjError::HasPendingTransfer` when the slot is cap-bearing — converting the +/// debug-only assert into a release-safe refusal. Until that ADR lands, a +/// caller that frees a cap-bearing endpoint is the one path to the leak. +/// /// # Errors /// /// [`ObjError::InvalidHandle`] when `handle` is stale or already freed. +/// +/// [ADR-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md pub fn destroy_endpoint( arena: &mut EndpointArena, handle: EndpointHandle, diff --git a/kernel/src/obj/mod.rs b/kernel/src/obj/mod.rs index 2837fdc..cc9c7c1 100644 --- a/kernel/src/obj/mod.rs +++ b/kernel/src/obj/mod.rs @@ -18,9 +18,9 @@ //! [`Arena`][arena::Arena] is the audited pattern generalised and //! instantiated three times. //! -//! [adr-0014]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0014-capability-representation.md -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md -//! [adr-0029]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md +//! [adr-0014]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0014-capability-representation.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [adr-0029]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md //! //! ## Public surface (v1) //! @@ -57,7 +57,7 @@ //! [UNSAFE-2026-0027][unsafe-27]. No other `unsafe` lives in this //! subsystem. //! -//! [unsafe-27]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +//! [unsafe-27]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md pub mod arena; pub mod endpoint; diff --git a/kernel/src/obj/notification.rs b/kernel/src/obj/notification.rs index 9baa501..25e6214 100644 --- a/kernel/src/obj/notification.rs +++ b/kernel/src/obj/notification.rs @@ -6,7 +6,7 @@ //! `notify` / `wait` operations will OR bits into and read back; the //! waiter list arrives in A4. //! -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md use super::arena::{Arena, SlotId}; use super::{ObjError, NOTIFICATION_ARENA_CAPACITY}; diff --git a/kernel/src/obj/task.rs b/kernel/src/obj/task.rs index bdf23f2..ee6ee76 100644 --- a/kernel/src/obj/task.rs +++ b/kernel/src/obj/task.rs @@ -6,7 +6,7 @@ //! state and the context-save frame arrive in Milestone A5 as layered //! additions. //! -//! [adr-0016]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md +//! [adr-0016]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0016-kernel-object-storage.md use super::arena::{Arena, SlotId}; use super::{ObjError, TASK_ARENA_CAPACITY}; @@ -20,7 +20,7 @@ use crate::mm::AddressSpaceHandle; /// [`Mmu::activate`][tyrne_hal::Mmu::activate] before the /// architectural context switch. /// -/// [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md +/// [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md #[derive(Debug)] pub struct Task { id: u32, diff --git a/kernel/src/obj/task_loader.rs b/kernel/src/obj/task_loader.rs index 1e99ee9..2a9d7b6 100644 --- a/kernel/src/obj/task_loader.rs +++ b/kernel/src/obj/task_loader.rs @@ -60,9 +60,9 @@ //! and B6 (first userspace "hello") per //! [phase-b §B4 §Revision-notes][phase-b-b4-rider]. //! -//! [adr-0029]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md -//! [t-019]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md -//! [phase-b-b4-rider]: https://github.com/cemililik/Tyrne/blob/main/docs/roadmap/phases/phase-b.md#milestone-b4--task-loader +//! [adr-0029]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md +//! [t-019]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md +//! [phase-b-b4-rider]: https://github.com/HodeTech/Tyrne/blob/main/docs/roadmap/phases/phase-b.md#milestone-b4--task-loader use crate::cap::{CapError, CapHandle, CapKind, CapRights, CapabilityTable}; use crate::mm::{ @@ -94,6 +94,20 @@ use tyrne_hal::{MappingFlags, Mmu, VirtAddr, PAGE_SIZE}; /// aggressively would observe this count as a lower bound. v1 has /// the single aarch64 BSP, so the count is exact in practice. /// +/// FORWARD (C4-004): this is a kernel-core module encoding both a +/// page-table-format constant (the 21/30/39 shifts) **and** a BSP +/// allocation-policy assumption (lazy allocation). A second BSP — Pi 4 / +/// Jetson, or a RISC-V `Sv39` port — that pre-allocates more, or uses a +/// different index decomposition, would make this an **undercount**: the +/// [`FrameBudgetExceeded`][LoadError::FrameBudgetExceeded] preflight would +/// pass, then `cap_map` would fail mid-loop with +/// [`MapFailed`][LoadError::MapFailed]`(MmuMapError(OutOfFrames))` — the +/// exact path the new MR-018 test pins — turning the preflight (whose whole +/// purpose is to make that mid-loop failure structurally unreachable) +/// ineffective. Before a second BSP / translation regime lands, move the +/// exact-budget contract to a HAL method (`Mmu::intermediate_frames_for_span`) +/// or re-derive it per format. +/// /// Returns 0 for an empty span (defensive; row 1 preflight rejects /// zero-page requests before this helper is reached). /// @@ -121,8 +135,8 @@ use tyrne_hal::{MappingFlags, Mmu, VirtAddr, PAGE_SIZE}; /// `Mmu::intermediate_frames_for_span` HAL method. v1's single /// aarch64 BSP keeps the constants inline. /// -/// [adr-0009]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0009]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0009-mmu-trait.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md #[must_use] pub fn intermediate_frame_count( image_base_va: VirtAddr, @@ -173,7 +187,7 @@ pub fn intermediate_frame_count( /// is unusual but not architecturally forbidden); the loader does /// not impose a stylistic "no-null-page" policy in v1. /// -/// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md +/// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md pub const USERSPACE_VA_LIMIT: usize = 1usize << 48; /// Metadata describing a freshly populated address space produced by @@ -211,7 +225,7 @@ pub const USERSPACE_VA_LIMIT: usize = 1usize << 48; /// - `stack_bytes == stack_size_pages * PAGE_SIZE` (always a multiple /// of `PAGE_SIZE`). /// -/// [unsafe-26]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [unsafe-26]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct LoadedImage { /// Cap handle for the newly-minted address space. Backed by a @@ -252,7 +266,7 @@ pub struct LoadedImage { /// — e.g. a per-section permission failure that lands with ADR-0034 /// (placeholder; B5+). /// -/// [t-019-rollback]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit +/// [t-019-rollback]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[non_exhaustive] pub enum LoadError { @@ -292,7 +306,7 @@ pub enum LoadError { /// path; non-sentinel `end` values name the offending /// saturated-add result for diagnostics. /// - /// [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md + /// [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md InvalidImageBaseVa { /// The caller-supplied `image_base_va`. base: VirtAddr, @@ -331,9 +345,13 @@ pub enum LoadError { /// `cap_create_address_space` returned `Err`. Covers /// `CapError::InsufficientRights` if `parent_as_cap` lacks DERIVE, - /// plus the T-018-guarded `CapsExhausted` / `DerivationTooDeep` / - /// `ArenaFull` paths. No rollback needed at this layer (T-018's - /// preflight ensures no committed state on failure). + /// `CapError::WidenedRights` if `new_rights ⊄ parent_cap.rights` + /// (the no-widening rule enforced by `cap_create_address_space` step + /// 2b — asking for rights the parent does not hold is rejected here, + /// not in [`InvalidParentCap`][LoadError::InvalidParentCap]), plus the + /// T-018-guarded `CapsExhausted` / `DerivationTooDeep` / `ArenaFull` + /// paths. No rollback needed at this layer (T-018's preflight ensures + /// no committed state on failure). AddressSpaceCreationFailed(AddressSpaceError), /// The `image` byte slice's PA range overlaps a frame @@ -351,10 +369,13 @@ pub enum LoadError { /// retained as a defensive variant so a misconfigured BSP fails /// fast with a typed error instead of UB. /// - /// [unsafe-27]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md + /// [unsafe-27]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md ImageOverlapsAllocatableMemory, - /// `pmm.alloc_frame()` returned `None` mid-image-or-stack-loop. + /// `pmm.alloc_frame()` (the loader's *own* leaf/root alloc) returned + /// `None` mid-image-or-stack-loop. This is distinct from a mid-walk + /// `Mmu::map` intermediate-table failure, which surfaces as + /// [`MapFailed`][LoadError::MapFailed]`(MmuMapError(OutOfFrames))`. /// Structurally unreachable post-[`FrameBudgetExceeded`][LoadError::FrameBudgetExceeded] /// preflight under v1's single-thread cooperative model; retained /// as a defensive variant for budget-calculation bugs and future- @@ -362,7 +383,11 @@ pub enum LoadError { /// [T-019 §"Rollback contract"][t-019-rollback] (leaf frames + /// `cap_unmap` undo + `cap_drop(loaded_as_cap)`). /// - /// [t-019-rollback]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit + /// Exercised in tests via `Pmm::force_alloc_failure_after` (the loader's + /// leaf-alloc path); the sibling mid-walk `MapFailed(OutOfFrames)` clause + /// is exercised via `tyrne_test_hal::OutOfFramesMmu`. + /// + /// [t-019-rollback]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit OutOfFrames, /// `cap_map` returned `Err` mid-loop. Wraps the underlying @@ -410,7 +435,7 @@ pub enum LoadError { /// 7. Stack-page loop under `USER | WRITE` (same). /// 8. Construct and return [`LoadedImage`]. /// -/// [unsafe-27]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +/// [unsafe-27]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md /// /// # Arguments /// @@ -460,9 +485,9 @@ pub enum LoadError { /// `cap_drop` `free_slot`s the leaf directly, is rights-agnostic, and /// fails only with `HasChildren` (impossible here). /// -/// [adr-0029]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md -/// [t-019]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md -/// [t-019-rollback]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit +/// [adr-0029]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0029-initial-userspace-image-format.md +/// [t-019]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md +/// [t-019-rollback]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-b/T-019-task-loader.md#rollback-contract-explicit #[allow( clippy::too_many_arguments, reason = "load_image threads the full kernel-state surface (pmm + mmu + \ @@ -658,9 +683,9 @@ pub fn load_image( // step; `Mmu::copy_into_frame`-style HAL relocation just moves // the audit point without removing it. // - // [UNSAFE-2026-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md - // [adr-0027]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md - // [audit]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md + // [UNSAFE-2026-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md + // [adr-0027]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0027-kernel-virtual-memory-layout.md + // [audit]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md unsafe { let src = chunk.as_ptr(); let dst = crate::mm::phys_frame_kernel_ptr(frame); @@ -842,7 +867,7 @@ mod tests { FrameProvider, MapperFlush, MappingFlags, Mmu, MmuError, PhysAddr, PhysFrame, VirtAddr, PAGE_SIZE, }; - use tyrne_test_hal::{FakeAddressSpace, FakeMmu}; + use tyrne_test_hal::{BlockMappedMmu, FakeAddressSpace, FakeMmu, OutOfFramesMmu}; // ── Pmm-over-backing helper (mirrors kernel/src/mm/pmm.rs::tests) ───────── // @@ -1526,8 +1551,81 @@ mod tests { drop(backing); // explicit lifetime extension } + #[test] + fn widened_rights_surfaces_via_address_space_creation_failed() { + // Pin C4-001/C4-002: the no-widening rule is *also* delegated to + // cap_create_address_space (step 2b) and surfaces as + // AddressSpaceCreationFailed(CapError::WidenedRights), distinct from + // the InsufficientRights (missing-DERIVE) leg. The fixture's parent + // cap holds DUPLICATE|DERIVE|REVOKE|TRANSFER but NOT SEND; requesting + // `new_rights` containing SEND is a widening request and is rejected + // here — not in InvalidParentCap. + let (mut table, parent_cap, mmu, mut arena, mut pmm, _b) = fixture(16); + let pmm_before = pmm.stats().free_frames; + + let result = load_image::( + &[0xAAu8; 2 * PAGE_SIZE], + &mut pmm, + &mmu, + &mut table, + &mut arena, + parent_cap, + // SEND is not held by the parent (DUPLICATE|DERIVE|REVOKE|TRANSFER). + CapRights::SEND, + VirtAddr(0x0080_0000), + 1, + ); + + assert!( + matches!( + result, + Err(LoadError::AddressSpaceCreationFailed( + AddressSpaceError::CapError(CapError::WidenedRights) + )) + ), + "expected AddressSpaceCreationFailed(WidenedRights), got {result:?}" + ); + // PMM byte-stable — step 2b rejects before any pre-alloc. + assert_eq!(pmm.stats().free_frames, pmm_before); + } + // ── Happy path: §Simulation rows 6 / 7 / 8 ──────────────────────────────── + #[test] + fn mints_address_space_cap_with_requested_non_empty_rights() { + // Pin C4-002's happy-path gap: every other happy-path test mints with + // `CapRights::empty()`. The realistic B5+ shape mints an AS cap that + // actually carries rights — exercise that the requested (non-empty, + // parent-held) `new_rights` survive the mint and land on the returned + // `as_cap`. + let (mut table, parent_cap, mmu, mut arena, mut pmm, _b) = fixture(32); + // Subset of the parent's DUPLICATE|DERIVE|REVOKE|TRANSFER → passes the + // no-widening check; non-empty so the assertion is meaningful. + let requested = CapRights::DUPLICATE | CapRights::DERIVE; + + let loaded = load_image::( + &[0xAAu8; PAGE_SIZE], + &mut pmm, + &mmu, + &mut table, + &mut arena, + parent_cap, + requested, + VirtAddr(0x0080_0000), + 1, + ) + .expect("load_image must succeed minting a non-empty-rights AS cap"); + + let minted = table + .lookup(loaded.as_cap) + .expect("the minted AS cap must resolve in the table"); + assert_eq!( + minted.rights(), + requested, + "the minted AS cap must carry exactly the requested non-empty rights" + ); + } + #[test] fn returns_loaded_image_with_correct_metadata() { // Pin §Simulation row 8: the LoadedImage struct returned by @@ -2004,6 +2102,167 @@ mod tests { ); } + // ── MR-018: mid-walk `Mmu::map` failure clauses (OutOfFrames / BlockMapped) ── + // + // The two tests above (`rolls_back_on_pmm_exhausted_mid_image_loop` and the + // `FailingMapMmu` pair) drive the loader's *own* `pmm.alloc_frame() -> None` + // path and a fake `AlreadyMapped`. They do **not** exercise the real + // `Mmu::map` failure-split for the two variants the flat `FakeMmu` cannot + // produce: mid-walk intermediate-table `OutOfFrames` and `BlockMapped`. + // These tests use the shared `tyrne_test_hal::{OutOfFramesMmu, BlockMappedMmu}` + // decorators (which inject those variants *before* any AS mutation, + // honouring the `Mmu::map` failure-semantics contract clause 2: `pa` not + // consumed) and assert the loader's rollback frees the leaf frame and + // installs no mapping (master-review MR-018 / X4c-003). + + /// Generic fixture for any `Mmu` whose `AddressSpace` is the shared + /// [`FakeAddressSpace`] (covers `FakeMmu`, `OutOfFramesMmu`, + /// `BlockMappedMmu`). Mirrors [`fixture`] but takes the MMU instance so a + /// failure-injecting decorator can be supplied by the caller. + fn fixture_with_mmu>( + mmu: M, + frames: usize, + ) -> ( + CapabilityTable, + crate::cap::CapHandle, + M, + AddressSpaceArena, + TestPmm, + Vec, + ) { + let mut arena: AddressSpaceArena = AddressSpaceArena::new(); + let mut table = CapabilityTable::new(); + + // SAFETY: same (a)/(b)/(c) argument as the `fixture` helper's + // `mmu.create_address_space` call — every `M` here delegates the + // body to `FakeMmu`'s pure host code (or stores the frame without + // dereferencing it); the frame is page-aligned by construction; + // single-threaded test. See the `fixture` helper's full SAFETY + // discipline for the complete argument. + let bootstrap_inner = unsafe { mmu.create_address_space(frame(0x4000_0000)) }; + let bootstrap_handle = crate::mm::create_address_space( + &mut arena, + crate::mm::AddressSpace::wrap_bootstrap(bootstrap_inner), + ) + .unwrap(); + + let parent_cap = Capability::new( + CapRights::DUPLICATE | CapRights::DERIVE | CapRights::REVOKE | CapRights::TRANSFER, + CapObject::AddressSpace(bootstrap_handle), + ); + let parent_cap_handle = table.insert_root(parent_cap).unwrap(); + + let (backing, ptr) = aligned_backing(frames); + let pmm = pmm_over_backing(ptr, frames); + + (table, parent_cap_handle, mmu, arena, pmm, backing) + } + + #[test] + fn rolls_back_on_intermediate_out_of_frames_mid_image_loop() { + // Pin the mid-walk `Mmu::map` -> `OutOfFrames` clause (intermediate + // page-table allocation failure), distinct from the loader's own + // `pmm.alloc_frame() -> None` path. `OutOfFramesMmu::map` pulls one + // frame from its `FrameProvider` (here the loader's `pmm`) per call, + // standing in for an intermediate table; when the provider is empty it + // returns `OutOfFrames` BEFORE mutating the AS and WITHOUT consuming + // `pa`. The loader surfaces this as + // `MapFailed(MmuMapError(OutOfFrames))` and frees the leaf frame. + let (mut table, parent_cap, mmu, mut arena, mut pmm, _b) = + fixture_with_mmu(OutOfFramesMmu::new(), 32); + let pmm_before = pmm.stats().free_frames; + let image_base = VirtAddr(0x0080_0000); + + // alloc sequence (single image page, single stack page): + // alloc #1: cap_create_address_space root L0 — succeeds. + // alloc #2: image-page idx 0 leaf — succeeds. + // alloc #3: intermediate table inside OutOfFramesMmu::map — + // returns None → OutOfFrames (mid-walk). + pmm.force_alloc_failure_after(2); + + let result = load_image::( + &[0xAAu8; PAGE_SIZE], + &mut pmm, + &mmu, + &mut table, + &mut arena, + parent_cap, + CapRights::empty(), + image_base, + 1, + ); + + assert_eq!( + result, + Err(LoadError::MapFailed(AddressSpaceError::MmuMapError( + MmuError::OutOfFrames + ))), + "mid-walk intermediate alloc failure must surface as MapFailed(OutOfFrames), got {result:?}" + ); + + // Rollback accounting: the failing iteration's leaf frame (alloc #2) + // is freed directly via the MapFailed clause's `pmm.free_frame(frame)` + // (clause 2: `pa` not consumed by the failed map). No mapping was + // committed before the failure, so the cap_unmap loop frees nothing. + // Only the root L0 (alloc #1) leaks per the v1 baseline. + // + // NOTE on the count: `force_alloc_failure_after` permanently fails + // alloc once tripped, but it leaves the bitmap byte-stable, so the + // freed leaf (alloc #2) is reflected in `free_frames`. Net result is + // `pmm_before - 1` (only the leaked root L0). The AS-level state was + // never mutated — the decorator failed before the inner FakeMmu insert + // — so this PMM byte-stability minus the root leak is the load-bearing + // "no mapping committed" signal, matching the sibling `rolls_back_on_*` + // tests. + assert_eq!( + pmm.stats().free_frames, + pmm_before - 1, + "rollback must free the failing iteration's leaf; only the root L0 leaks in v1" + ); + } + + #[test] + fn rolls_back_on_block_mapped_mid_image_loop() { + // Pin the `Mmu::map` -> `BlockMapped` clause (the walk hits a 2 MiB + // block descriptor). `BlockMappedMmu` injects `BlockMapped` for a + // configured VA *before* any AS mutation and without consuming `pa`, + // so the loader rolls back and frees the leaf frame, surfacing + // `MapFailed(MmuMapError(BlockMapped))`. + let image_base = VirtAddr(0x0080_0000); + let (mut table, parent_cap, mmu, mut arena, mut pmm, _b) = + fixture_with_mmu(BlockMappedMmu::with_blocked([image_base]), 32); + let pmm_before = pmm.stats().free_frames; + + let result = load_image::( + &[0xAAu8; PAGE_SIZE], + &mut pmm, + &mmu, + &mut table, + &mut arena, + parent_cap, + CapRights::empty(), + image_base, + 1, + ); + + assert_eq!( + result, + Err(LoadError::MapFailed(AddressSpaceError::MmuMapError( + MmuError::BlockMapped + ))), + "blocked VA must surface as MapFailed(BlockMapped), got {result:?}" + ); + + // Rollback accounting: the failing iteration's image-page leaf frame + // is freed directly via the MapFailed clause (clause 2: `pa` not + // consumed). No mapping was committed; only the root L0 leaks. + assert_eq!( + pmm.stats().free_frames, + pmm_before - 1, + "rollback must free the failing iteration's leaf; only the root L0 leaks in v1" + ); + } + #[test] fn rejects_misaligned_image_base_va_with_pmm_byte_stable() { // Pin PR #31 review-round 4 P2: a misaligned `image_base_va` diff --git a/kernel/src/sched/mod.rs b/kernel/src/sched/mod.rs index 6ce447b..eeb52eb 100644 --- a/kernel/src/sched/mod.rs +++ b/kernel/src/sched/mod.rs @@ -38,13 +38,13 @@ //! [`docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md`] //! for the incident report and ADR-0026 for the structural fix. //! -//! [ADR-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md -//! [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md -//! [`docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md`]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md +//! [ADR-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md +//! [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md +//! [`docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md`]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md //! -//! [T-004]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-004-cooperative-scheduler.md -//! [ADR-0019]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0019-scheduler-shape.md -//! [ADR-0020]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md +//! [T-004]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/tasks/phase-a/T-004-cooperative-scheduler.md +//! [ADR-0019]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0019-scheduler-shape.md +//! [ADR-0020]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0020-cpu-trait-v2-context-switch.md use tyrne_hal::{ContextSwitch, Cpu, IrqGuard}; @@ -173,6 +173,15 @@ pub enum SchedError { /// No task is currently running; the operation requires a current task. NoCurrentTask, /// The ready queue is full. + /// + /// Produced **only** by [`add_task`] at registration time, where the + /// enqueue runs before dispatch and is not invariant-guaranteed (a BSP + /// can register more tasks than fit). The bridge entry points + /// ([`yield_now`] / [`ipc_send_and_yield`] / [`ipc_recv_and_yield`]) + /// never surface this: their re-enqueues are infallible by the + /// no-double-enqueue invariant and route through + /// [`Scheduler::enqueue_ready`], which `panic!`s on the impossible full + /// case rather than returning a typed error. QueueFull, /// IPC operation failed. Ipc(IpcError), @@ -217,9 +226,9 @@ pub enum SchedError { /// land, this symmetric rollback keeps the recovery path /// well-defined. /// - /// [ADR-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md - /// [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md - /// [ADR-0032]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md + /// [ADR-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md + /// [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md + /// [ADR-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md Deadlock, } @@ -255,7 +264,7 @@ pub struct Scheduler { /// every registered task so B5+ multi-AS tasks slot in /// additively. /// - /// [ADR-0028 §Simulation row 3]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md#simulation + /// [ADR-0028 §Simulation row 3]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md#simulation /// [`Mmu::activate`]: tyrne_hal::Mmu::activate task_address_space_handles: [Option; TASK_ARENA_CAPACITY], current: Option, @@ -270,13 +279,23 @@ pub struct Scheduler { /// dispatching idle is mechanically identical to dispatching any /// other task once the handle is selected. /// - /// [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md + /// [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md idle: Option, /// Saved register contexts, one per task arena slot. /// /// Invariant: `contexts[i]` is valid for every slot `i` that has - /// `task_states[i] != Idle` — either zero-initialised by `Default` and - /// then filled by `init_context`, or saved by a prior `context_switch`. + /// `task_states[i] != Idle` — either default-initialised by + /// `C::TaskContext::default()` and then filled by `init_context`, or + /// saved by a prior `context_switch`. + /// + /// Note: [`ContextSwitch::TaskContext`] is bound only by `Default + Send`; + /// `Default` is not contractually all-zero. The QEMU BSP's + /// `Aarch64TaskContext` derives an all-zero `Default` *and its + /// `init_context` relies on that* (it writes only `lr`/`sp`, leaving the + /// other callee-saved slots zero), so a second BSP whose + /// `TaskContext::default()` left garbage in those slots would have its + /// first restore load that garbage. That coupling lives on the BSP side + /// (X4c-013); the scheduler only requires "valid after `init_context`". contexts: [C::TaskContext; TASK_ARENA_CAPACITY], } @@ -287,7 +306,9 @@ impl Default for Scheduler { } impl Scheduler { - /// Construct an empty scheduler with all contexts zero-initialised. + /// Construct an empty scheduler with all contexts default-initialised + /// (via `C::TaskContext::default()`; see the `contexts` field doc for why + /// "default" is stated rather than "zero"). #[must_use] pub fn new() -> Self { Self { @@ -359,6 +380,45 @@ impl Scheduler { } } + /// Enqueue `handle` onto the ready queue, treating a full queue as an + /// unreachable invariant violation rather than a recoverable error. + /// + /// This centralises the "infallible enqueue" pattern shared by the two + /// bridge paths that re-enqueue a task they have just removed from the + /// running slot ([`unblock_receiver_on`] when waking a receiver, and + /// [`yield_now`] when re-enqueueing the yielding task). In both cases the + /// load-bearing invariant is the same: the ready-queue capacity equals + /// [`TASK_ARENA_CAPACITY`], and the task being enqueued is **not already + /// in the ready queue** (it was the running task, or it was `Blocked`), so + /// at most `TASK_ARENA_CAPACITY - 1` other tasks are queued and a free slot + /// always exists. A full queue here is therefore a kernel-programming-error, + /// handled by `panic!` exactly as the two original call sites did — this is + /// a behaviour-preserving centralisation, not a semantic change. + /// + /// Distinct from [`add_task`]'s enqueue, which runs **before** dispatch and + /// is *not* invariant-guaranteed (a BSP can register more tasks than fit); + /// that path maps the failure to a typed [`SchedError::QueueFull`] and is + /// deliberately left as-is. + /// + /// FORWARD: a future preemption / multi-waiter / SMP change can violate the + /// no-double-enqueue invariant this panic rests on (e.g. re-enqueueing a + /// preempted task that is still queued, or waking several waiters at once). + /// Such a change must revisit this helper and the [`unblock_receiver_on`] + /// O(N) scan together — and should prefer an endpoint-indexed waiter list + /// over widening the linear scan. + #[allow( + clippy::panic, + reason = "ready-queue capacity equals task-arena capacity; the enqueued \ + task is not already in the ready queue, so at least one free \ + slot always exists — a full queue here is a kernel-programming \ + error, not a recoverable condition (see fn doc)" + )] + fn enqueue_ready(&mut self, handle: TaskHandle) { + let Ok(()) = self.ready.enqueue(handle) else { + panic!("scheduler invariant: ready queue full on infallible enqueue"); + }; + } + /// Scan `task_states` for a task blocked on `ep` and re-enqueue it. /// /// **Single-waiter semantics.** Only the first blocked task found is @@ -366,22 +426,29 @@ impl Scheduler { /// most one task waits per endpoint at a time (ADR-0019), so this is /// correct. Multi-waiter wake-up is deferred to a future ADR. /// - /// O(N) scan — acceptable at `TASK_ARENA_CAPACITY ≤ 16` (ADR-0019). + /// O(N) scan over all [`TASK_ARENA_CAPACITY`] slots — bounded and + /// acceptable at `N ≤ 16` (ADR-0019). It runs on the IPC send hot path + /// ([`ipc_send_and_yield`] → `unblock_receiver_on`), so it is the + /// scheduler's per-delivered-message cost. FORWARD: when + /// `TASK_ARENA_CAPACITY` grows or multi-waiter endpoints land, prefer an + /// endpoint-indexed waiter list over widening this scan (see + /// [`enqueue_ready`]'s forward note, which the same future change must + /// revisit in lockstep). fn unblock_receiver_on(&mut self, ep: EndpointHandle) { for idx in 0..TASK_ARENA_CAPACITY { if let TaskState::Blocked { on } = self.task_states[idx] { if on == ep { + // A `Blocked` slot is always occupied, so `task_handles[idx]` + // is `Some` by the add_task / register_idle write-together + // invariant. The `if let` is defensive; assert the invariant + // loudly in debug to match this file's loud-on-violation style. + debug_assert!( + self.task_handles[idx].is_some(), + "scheduler invariant: a Blocked slot must have a stored TaskHandle" + ); if let Some(handle) = self.task_handles[idx] { self.task_states[idx] = TaskState::Ready; - #[allow( - clippy::panic, - reason = "ready-queue capacity equals task-arena capacity; \ - the running task is not enqueued, so at least one \ - free slot always exists when unblocking a receiver" - )] - let Ok(()) = self.ready.enqueue(handle) else { - panic!("scheduler invariant: ready queue full on unblock"); - }; + self.enqueue_ready(handle); return; } } @@ -469,8 +536,8 @@ impl Scheduler { // shared rationale for the "why not safer Rust" half of its justification, // alongside the block-local invariants it states inline. // -// [ADR-0021]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md -// [UNSAFE-2026-0012]: https://github.com/cemililik/Tyrne/blob/main/docs/audits/unsafe-log.md +// [ADR-0021]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0021-raw-pointer-scheduler-ipc-bridge.md +// [UNSAFE-2026-0012]: https://github.com/HodeTech/Tyrne/blob/main/docs/audits/unsafe-log.md /// Register the BSP-owned idle task in the dispatcher's fallback slot. /// @@ -512,7 +579,7 @@ impl Scheduler { /// 16-byte aligned, at least 512 bytes of backing memory, valid for the /// idle task's entire lifetime. /// -/// [ADR-0026]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md +/// [ADR-0026]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0026-idle-dispatch-fallback.md pub unsafe fn register_idle( sched: *mut Scheduler, cpu: &C, @@ -638,6 +705,27 @@ unsafe fn start_prelude(sched: *mut Scheduler) -> usi /// `cpu.restore_irq_state(IrqState(0))` explicitly. Revisited when Phase B /// introduces a timer or other interrupt source. /// +/// Note (WFI vs mask): the idle task's body parks the CPU with +/// `Cpu::wait_for_interrupt` (WFI) even though it boots masked. WFI exits on +/// a *pending* IRQ regardless of the DAIF mask — the mask only gates whether +/// the *handler* runs, not whether WFI wakes — and idle is reached via a +/// `context_switch` whose own `IrqGuard` *does* drop on return, restoring the +/// prior IRQ state. So a masked-on-entry idle still wakes correctly. +/// +/// **`IrqState(0)` convention (DAIF polarity).** The synthesised +/// `IrqState(0)` token means **IRQs ENABLED**. This is the canonical +/// project convention — the BSP `QemuVirtCpu` stores the raw DAIF mask in +/// `IrqState.0` (a *set* DAIF bit means *masked*, so all-zero means +/// unmasked / enabled), and `tyrne_test_hal::FakeCpu` was reconciled to the +/// same polarity (master-review MR-017 / X4c-002). Both `Cpu` implementors +/// therefore agree on `IrqState(0)`, so the synthesised token is no longer +/// ambiguous: a freshly-dispatched task that calls +/// `cpu.restore_irq_state(IrqState(0))` ends up with IRQs enabled under the +/// BSP and under the host test fake alike. (Synthesising other `IrqState` +/// literals remains out-of-contract: the value is otherwise opaque per +/// [`Cpu`]'s trait doc; only `0`-means-enabled is a guaranteed cross-impl +/// invariant.) +/// /// # Panics /// /// Panics if no task **and** no idle has been registered — see @@ -693,7 +781,7 @@ pub unsafe fn start( // per-task AS without the kernel mapping would translation-fault // any IRQ taken in this window. // - // [adr-0028]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md#simulation + // [adr-0028]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0028-address-space-data-structure.md#simulation if let Some(target) = first_as { activate_address_space(target); } else { @@ -777,16 +865,11 @@ pub unsafe fn yield_now( // running, it returns to the fallback slot via `s.idle`, not // through `ready.enqueue`. Cannot be full: the running task was // not in the ready queue (it was dequeued when it started running), - // so at most TASK_ARENA_CAPACITY-1 other tasks are queued. + // so at most TASK_ARENA_CAPACITY-1 other tasks are queued — the + // exact invariant `enqueue_ready` encapsulates (shared with + // `unblock_receiver_on`; see its doc for the panic rationale). if !current_is_idle { - #[allow( - clippy::panic, - reason = "the running task is not in the ready queue, so at most \ - TASK_ARENA_CAPACITY-1 tasks are enqueued; enqueue cannot fail" - )] - let Ok(()) = s.ready.enqueue(current_handle) else { - panic!("scheduler invariant: ready queue full on yield re-enqueue"); - }; + s.enqueue_ready(current_handle); } // Pick next: head of ready queue, or fall back to idle when empty. @@ -931,9 +1014,14 @@ pub unsafe fn ipc_send_and_yield( ) -> Result { // Pre-switch work — momentary &muts, dropped before the switch. // SAFETY: caller contract — all four pointers are valid, distinct, and - // exclusively-owned for the duration of this inner block. Each `&mut` - // materialised in the tuple below lives only inside this block and is - // dropped before the `yield_now` call site. Rejected alternatives: see + // exclusively-owned for the duration of this inner block. Distinctness is + // load-bearing here: this block is the one site that holds *four* live + // `&mut` referents at once (`s`/`arena_ref`/`queues_ref`/`table_ref`), so + // soundness requires `sched`, `ep_arena`, `queues`, and `caller_table` to + // point at four DISJOINT objects (the Shared safety contract's "must not + // alias each other" clause) — otherwise two of these `&mut`s would alias. + // Each `&mut` materialised in the tuple below lives only inside this block + // and is dropped before the `yield_now` call site. Rejected alternatives: see // §Rejected safer alternatives in the Shared safety contract above — // `&mut` parameter receivers would pin the borrow across the switch // (reproducing UNSAFE-2026-0012); ADR-0021 §Decision outcome enumerates @@ -1015,8 +1103,8 @@ pub unsafe fn ipc_send_and_yield( /// *Pointer validity*. The four pointers must not alias each other or any /// live `&mut` in the caller's scope. /// -/// [ADR-0022]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md -/// [ADR-0032]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md +/// [ADR-0022]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0022-idle-task-and-typed-scheduler-deadlock.md +/// [ADR-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md #[allow( clippy::too_many_arguments, reason = "IPC bridge must forward all parameters that ipc_recv requires \ @@ -1084,7 +1172,7 @@ pub unsafe fn ipc_recv_and_yield( // any instant. With idle registered (the v1 expected configuration), // Deadlock is structurally unreachable. // - // [ADR-0032]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md + // [ADR-0032]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0032-endpoint-rollback-and-cancel-recv.md let dispatch = { // SAFETY: caller contract — `sched` valid and exclusive for this // block; `&mut` does not cross the switch below. Rejected @@ -1246,51 +1334,96 @@ mod tests { use crate::mm::BOOTSTRAP_ADDRESS_SPACE_HANDLE; use crate::obj::arena::SlotId; use crate::obj::endpoint::EndpointHandle; + use tyrne_test_hal::{FakeContextSwitch, FakeTaskContext}; // ── FakeCpu ─────────────────────────────────────────────────────────────── + // + // The scheduler's `C: ContextSwitch + Cpu` bound requires a single type + // implementing both traits. `tyrne_test_hal` deliberately splits these into + // `FakeCpu` (Cpu) and `FakeContextSwitch` (ContextSwitch) — matching the + // production ADR-0020 split — so this module composes them into one type + // and delegates each trait to the matching shared fake. This replaces the + // former hand-rolled inline `FakeCpu`/`FakeCtx` doubles that had drifted + // from the shared ones (master-review X4c-005 "no shared ContextSwitch + // fake" + X4c-002 "IrqState polarity drift"): the IRQ-mask state is now the + // shared, DAIF-polarity `tyrne_test_hal::FakeCpu` (`IrqState(0)` = enabled), + // so scheduler tests can observe IRQ-mask transitions across a switch, and + // the context-switch counter is the shared `FakeContextSwitch`. Both inner + // fakes use `std::sync::Mutex` for their interior state, so `FakeCpu` is + // `Send + Sync` automatically — no test-only `unsafe impl Send/Sync` is + // needed (closing X3-004 for this type). + + /// Task context modelled by the shared fakes. Carries a `switched` flag + /// (set by `FakeContextSwitch::context_switch`) plus the recorded + /// init-context arguments; the scheduler stores one per slot in + /// `Scheduler::contexts`. + type FakeCtx = FakeTaskContext; + + /// Composite test `Cpu` + `ContextSwitch`, delegating to the shared + /// `tyrne_test_hal` fakes (see the module-section comment above). + #[derive(Default)] + struct FakeCpu { + cpu: tyrne_test_hal::FakeCpu, + cs: FakeContextSwitch, + } + + impl FakeCpu { + fn new() -> Self { + Self::default() + } - struct FakeCpu; + /// Whether IRQs are currently enabled on the underlying shared fake. + /// Lets a test assert IRQ-mask transitions across a `context_switch`. + fn irqs_enabled(&self) -> bool { + self.cpu.irqs_enabled() + } - #[derive(Default, Debug, PartialEq)] - struct FakeCtx { - switched: bool, + /// Number of `context_switch` calls recorded by the shared fake. + fn switch_count(&self) -> u64 { + self.cs.switch_count() + } } - // SAFETY: FakeCpu is a zero-size marker with no interior mutability - // and no shared mutable state. Send + Sync are safe. - unsafe impl Send for FakeCpu {} - // SAFETY: same reasoning as Send impl above. - unsafe impl Sync for FakeCpu {} - impl Cpu for FakeCpu { fn current_core_id(&self) -> tyrne_hal::CoreId { - 0 + self.cpu.current_core_id() } fn disable_irqs(&self) -> tyrne_hal::IrqState { - tyrne_hal::IrqState(0) + self.cpu.disable_irqs() + } + fn restore_irq_state(&self, state: tyrne_hal::IrqState) { + self.cpu.restore_irq_state(state); + } + fn wait_for_interrupt(&self) { + self.cpu.wait_for_interrupt(); + } + fn instruction_barrier(&self) { + self.cpu.instruction_barrier(); } - fn restore_irq_state(&self, _: tyrne_hal::IrqState) {} - fn wait_for_interrupt(&self) {} - fn instruction_barrier(&self) {} } impl ContextSwitch for FakeCpu { type TaskContext = FakeCtx; - unsafe fn context_switch( - &self, - current: &mut Self::TaskContext, - _next: &Self::TaskContext, - ) { - current.switched = true; + unsafe fn context_switch(&self, current: &mut Self::TaskContext, next: &Self::TaskContext) { + // SAFETY: delegates to the shared `FakeContextSwitch`, which + // performs no real register save/restore — it only records. The + // trait contract is inherited; the fake never dereferences a real + // context. (Test-only; exempt from audit-log entries per + // unsafe-policy §3 / X3-003.) + unsafe { self.cs.context_switch(current, next) }; } unsafe fn init_context( &self, - _ctx: &mut Self::TaskContext, - _entry: fn() -> !, - _stack_top: *mut u8, + ctx: &mut Self::TaskContext, + entry: fn() -> !, + stack_top: *mut u8, ) { + // SAFETY: delegates to the shared `FakeContextSwitch::init_context`, + // which records the requested entry/stack and never dereferences + // `stack_top` or calls `entry`. (Test-only; see above.) + unsafe { self.cs.init_context(ctx, entry, stack_top) }; } } @@ -1378,7 +1511,7 @@ mod tests { #[test] fn add_task_sets_ready_state_and_stores_handle() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h = task_handle(0); let mut stack = AlignedStack::<512>::new(); @@ -1402,7 +1535,7 @@ mod tests { #[test] fn yield_now_switches_context_and_updates_current() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h0 = task_handle(0); let h1 = task_handle(1); @@ -1451,9 +1584,73 @@ mod tests { assert!(sched.contexts[0].switched); } + /// `yield_now` masks IRQs across the `context_switch` (via the `IrqGuard` + /// it constructs in the switch window) and restores them on return. This + /// is only observable now that the scheduler tests use the shared + /// DAIF-polarity `tyrne_test_hal::FakeCpu` (master-review MR-017): the old + /// inline `FakeCpu` made `disable_irqs`/`restore_irq_state` no-ops, so the + /// bridge's critical-section discipline could not be asserted at the host + /// level. The bridge's whole soundness rests on IRQs actually being masked + /// across the switch (ADR-0021), so pinning the round-trip here closes the + /// "no host fake verifies IRQ state across the switch" gap (X4c-002). + #[test] + fn yield_now_masks_irqs_across_switch_and_restores_on_return() { + let cpu = FakeCpu::new(); + let mut sched: Scheduler = Scheduler::new(); + let h0 = task_handle(0); + let h1 = task_handle(1); + let mut s0 = AlignedStack::<512>::new(); + let mut s1 = AlignedStack::<512>::new(); + // SAFETY: 16-byte aligned 512-byte stacks; init_context only records. + unsafe { + sched + .add_task( + &cpu, + h0, + BOOTSTRAP_ADDRESS_SPACE_HANDLE, + spin_entry(), + s0.top(), + ) + .unwrap(); + sched + .add_task( + &cpu, + h1, + BOOTSTRAP_ADDRESS_SPACE_HANDLE, + spin_entry(), + s1.top(), + ) + .unwrap(); + } + sched.ready.dequeue(); // h0 starts running + sched.current = Some(h0); + + // Pre-condition: the shared fake boots with IRQs enabled. + assert!(cpu.irqs_enabled(), "FakeCpu starts with IRQs enabled"); + assert_eq!(cpu.switch_count(), 0); + + // SAFETY: stack-local `sched`; single-threaded test, no aliasing. The + // shared `FakeContextSwitch` only records, so the switch never alters + // host control flow. + unsafe { + yield_now(core::ptr::from_mut(&mut sched), &cpu, |_| {}).unwrap(); + } + + // A switch happened, and the `IrqGuard` that masked IRQs across it has + // dropped — IRQs are enabled again. With DAIF polarity, the guard's + // `disable_irqs` returns `IrqState(0)` (was-enabled) and its `Drop` + // restores `IrqState(0)` → re-enabled. Under the old no-op inline fake + // both halves were vacuous; here the round-trip is real and asserted. + assert_eq!(cpu.switch_count(), 1, "exactly one context_switch occurred"); + assert!( + cpu.irqs_enabled(), + "IrqGuard restored IRQs to enabled after the switch window" + ); + } + #[test] fn yield_now_with_no_current_returns_error() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); // SAFETY: same reasoning as the test above — `sched` is stack-local, // single-threaded test; no aliasing. @@ -1468,7 +1665,7 @@ mod tests { // Pin ADR-0028 §Simulation row 3's same-AS short-circuit: when // current and next tasks have the same `AddressSpaceHandle`, the // activation closure must not fire. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h0 = task_handle(0); @@ -1522,7 +1719,7 @@ mod tests { // Pin ADR-0028 §Simulation row 3's switch-AS path: when current // and next tasks have distinct `AddressSpaceHandle`s, the // activation closure fires with the next task's handle. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h0 = task_handle(0); @@ -1627,7 +1824,7 @@ mod tests { task: TaskHandle, stack: &mut AlignedStack<512>, ) -> CapHandle { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); // SAFETY: 16-byte aligned, 512-byte stack; FakeCpu::init_context is // a no-op — stack is never actually used. unsafe { @@ -1656,7 +1853,7 @@ mod tests { // T-007 / ADR-0022: without an idle task, blocking the sole ready // task on IPC must return Err(SchedError::Deadlock) — not panic — // and the scheduler state must be restored to its pre-call shape. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -1723,7 +1920,7 @@ mod tests { /// ADR-0032's Simulation table row 3b. #[test] fn ipc_recv_and_yield_deadlock_rolls_back_endpoint_state() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -1777,7 +1974,7 @@ mod tests { /// with no rollback path. #[test] fn ipc_recv_and_yield_with_no_current_task_leaves_endpoint_idle() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -1830,7 +2027,7 @@ mod tests { /// the self-dispatch and falls back to the Deadlock rollback. #[test] fn ipc_recv_and_yield_with_idle_as_current_returns_deadlock() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -1899,29 +2096,54 @@ mod tests { ); } - /// `FakeCpu` variant that resets the `IpcQueues` state to `Idle` during - /// `context_switch`, simulating the pathological "resumed without a - /// delivery" scenario that `SchedError::Ipc(IpcError::PendingAfterResume)` - /// is designed to catch. + /// `Cpu` + `ContextSwitch` test double that resets the `IpcQueues` state + /// to `Idle` during `context_switch`, simulating the pathological + /// "resumed without a delivery" scenario that + /// `SchedError::Ipc(IpcError::PendingAfterResume)` is designed to catch. + /// + /// Like [`FakeCpu`], the [`Cpu`] surface delegates to the shared + /// `tyrne_test_hal::FakeCpu` (DAIF-polarity IRQ state); only the + /// `context_switch` body is bespoke (it must zero the queues, which the + /// shared `FakeContextSwitch` cannot do). struct ResetQueuesCpu { + cpu: tyrne_test_hal::FakeCpu, queues: *mut IpcQueues, } - // SAFETY: test-only; the pointer refers to a stack-local IpcQueues the - // test thread exclusively owns. No cross-thread sharing. + // SAFETY: test-only; the raw `*mut IpcQueues` refers to a stack-local + // `IpcQueues` the single test thread exclusively owns, so transferring + // `ResetQueuesCpu` is safe. The raw pointer is the only non-`Send`/`Sync` + // field — `tyrne_test_hal::FakeCpu` is already `Send + Sync` via its + // `Mutex`. (Test-only `unsafe impl`: per the unsafe-policy §3 test-only + // exemption it needs a `// SAFETY:` but no `Audit:` tag — X3-003 / X3-004.) unsafe impl Send for ResetQueuesCpu {} - // SAFETY: same reasoning as Send. + // SAFETY: same reasoning as the Send impl above. unsafe impl Sync for ResetQueuesCpu {} + impl ResetQueuesCpu { + fn new(queues: *mut IpcQueues) -> Self { + Self { + cpu: tyrne_test_hal::FakeCpu::new(), + queues, + } + } + } + impl Cpu for ResetQueuesCpu { fn current_core_id(&self) -> tyrne_hal::CoreId { - 0 + self.cpu.current_core_id() } fn disable_irqs(&self) -> tyrne_hal::IrqState { - tyrne_hal::IrqState(0) + self.cpu.disable_irqs() + } + fn restore_irq_state(&self, state: tyrne_hal::IrqState) { + self.cpu.restore_irq_state(state); + } + fn wait_for_interrupt(&self) { + self.cpu.wait_for_interrupt(); + } + fn instruction_barrier(&self) { + self.cpu.instruction_barrier(); } - fn restore_irq_state(&self, _: tyrne_hal::IrqState) {} - fn wait_for_interrupt(&self) {} - fn instruction_barrier(&self) {} } impl ContextSwitch for ResetQueuesCpu { @@ -1936,7 +2158,8 @@ mod tests { // ipc_recv observes Pending (RecvWaiting would yield QueueFull // instead, which is covered by the existing IPC tests). // SAFETY: `queues` is valid per the test's construction and - // not concurrently accessed. + // not concurrently accessed. (Test-only; exempt from audit-log + // entries per unsafe-policy §3 / X3-003.) unsafe { let q = &mut *self.queues; *q = IpcQueues::new(); @@ -1995,7 +2218,7 @@ mod tests { let queues_ptr = core::ptr::from_mut(&mut queues); let table_ptr = core::ptr::from_mut(&mut table); - let cpu = ResetQueuesCpu { queues: queues_ptr }; + let cpu = ResetQueuesCpu::new(queues_ptr); // SAFETY: 16-byte aligned 512-byte stacks; init_context is a no-op. // add_task uses `&mut self` on Scheduler, which temporarily // reborrows through `sched_ptr` — fine because the reborrow @@ -2054,7 +2277,7 @@ mod tests { #[test] fn start_prelude_dispatches_head_and_marks_ready() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h0 = task_handle(0); let h1 = task_handle(1); @@ -2120,11 +2343,11 @@ mod tests { // Option A and verifies the post-fix dispatcher routes correctly. // Refs: ADR-0026, T-014, [B1 smoke regression mini-retro]. // - // [B1 smoke regression mini-retro]: https://github.com/cemililik/Tyrne/blob/main/docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md + // [B1 smoke regression mini-retro]: https://github.com/HodeTech/Tyrne/blob/main/docs/analysis/reviews/business-reviews/2026-05-06-B1-smoke-regression.md #[test] fn register_idle_stores_handle_in_idle_slot_and_not_in_ready_queue() { - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let h_idle = task_handle(7); let mut s_idle = AlignedStack::<512>::new(); @@ -2166,7 +2389,7 @@ mod tests { // registered. start_prelude must select A first; after A blocks, // the dispatcher (via ipc_recv_and_yield's fallback chain) must // select idle. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -2256,7 +2479,7 @@ mod tests { // the ready queue), so step 5 selected idle and the kernel hung. // ADR-0026's fix removes idle from the FIFO entirely, so even with // idle registered, step 5 picks the just-unblocked B. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -2413,7 +2636,7 @@ mod tests { // Setup: h0 (sender, current), h1 (receiver, Blocked on ep). // The endpoint's IPC queue is in RecvWaiting because h1 already // ran ipc_recv before being context-switched out. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -2507,7 +2730,7 @@ mod tests { // Setup: h0 alone, no receiver. ipc_send returns Enqueued; the // bridge must NOT yield (needs_yield = false), so the scheduler // is structurally unchanged after the call. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); @@ -2571,7 +2794,7 @@ mod tests { // this typed error and leave the scheduler exactly as it was. // Symmetric to T-007's `ipc_recv_and_yield_returns_deadlock_…` // state-restore guarantee. - let cpu = FakeCpu; + let cpu = FakeCpu::new(); let mut sched: Scheduler = Scheduler::new(); let mut ep_arena = EndpointArena::default(); let mut queues = IpcQueues::new(); diff --git a/test-hal/src/context_switch.rs b/test-hal/src/context_switch.rs new file mode 100644 index 0000000..adf0f4f --- /dev/null +++ b/test-hal/src/context_switch.rs @@ -0,0 +1,241 @@ +//! Deterministic fake [`tyrne_hal::ContextSwitch`] for host-side tests. +//! +//! A real context switch saves/restores CPU registers and swaps stacks, +//! which a host process cannot perform meaningfully. `FakeContextSwitch` +//! instead **records** that a switch (or an `init_context`) happened, so +//! scheduler unit tests can assert the scheduler invoked the switch the +//! expected number of times and seeded each new task's context exactly +//! once — without actually changing the host's control flow. +//! +//! Pair with [`crate::FakeCpu`] when a test needs both the [`Cpu`] surface +//! (IRQ-mask save/restore with DAIF polarity) and the [`ContextSwitch`] +//! surface (e.g. asserting that interrupts are masked across a switch): +//! a single test type can hold one of each, or a test can construct both +//! and drive them in concert. +//! +//! [`Cpu`]: tyrne_hal::Cpu +//! [`ContextSwitch`]: tyrne_hal::ContextSwitch + +use std::sync::Mutex; +use tyrne_hal::ContextSwitch; + +/// Saved register state for one cooperative task, as modelled by +/// [`FakeContextSwitch`]. +/// +/// Carries no real registers. `switched` flips to `true` the first time +/// this context is passed as the `current` argument of +/// [`ContextSwitch::context_switch`] (i.e. its owning task was suspended); +/// `initialized` flips to `true` when [`ContextSwitch::init_context`] +/// seeds it. Tests can assert on both. The `entry_addr` / `stack_top` +/// fields record the last `init_context` arguments so a test can confirm +/// the scheduler seeded the intended entry point and stack. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct FakeTaskContext { + /// Set when this context was saved as `current` by `context_switch`. + pub switched: bool, + /// Set when this context was seeded by `init_context`. + pub initialized: bool, + /// Entry-point address from the last `init_context` call (as `usize`). + pub entry_addr: usize, + /// Stack-top pointer from the last `init_context` call (as `usize`). + pub stack_top: usize, +} + +/// A [`ContextSwitch`] that records switch / init call counts for test +/// assertions instead of performing a real register save/restore. +/// +/// # Example +/// +/// ``` +/// use tyrne_test_hal::{FakeContextSwitch, FakeTaskContext}; +/// use tyrne_hal::ContextSwitch; +/// +/// fn never_returns() -> ! { +/// panic!("the fake never calls task entry points") +/// } +/// +/// let cs = FakeContextSwitch::new(); +/// let mut a = FakeTaskContext::default(); +/// let mut stack = [0u8; 512]; +/// let top = stack.as_mut_ptr().wrapping_add(stack.len()); +/// +/// // SAFETY: +/// // (a) `ContextSwitch::init_context` is `unsafe` — for a real CPU it would +/// // install `top` as the stack pointer and `never_returns` as the entry. +/// // (b) `top` is one-past a live 512-byte stack and `never_returns` diverges; +/// // `FakeContextSwitch` only records the arguments, dereferencing neither. +/// // (c) No safe shim exists: the trait method is `unsafe` by contract, so the +/// // call site must discharge it even for the recording fake. +/// unsafe { cs.init_context(&mut a, never_returns, top) }; +/// assert!(a.initialized); +/// assert_eq!(cs.init_count(), 1); +/// +/// let b = FakeTaskContext::default(); +/// // SAFETY: +/// // (a) `ContextSwitch::context_switch` is `unsafe` — for a real CPU it +/// // saves/restores callee-saved state through the context pointers. +/// // (b) `a`/`b` are live `FakeTaskContext`s; the fake performs no real switch +/// // and never dereferences register state — it only records. +/// // (c) The trait method is `unsafe` by contract; no safe alternative exists. +/// unsafe { cs.context_switch(&mut a, &b) }; +/// assert!(a.switched); +/// assert_eq!(cs.switch_count(), 1); +/// ``` +pub struct FakeContextSwitch { + state: Mutex, +} + +#[derive(Default)] +struct FakeContextSwitchState { + switch_count: u64, + init_count: u64, +} + +impl FakeContextSwitch { + /// Construct a `FakeContextSwitch` with zeroed call counts. + #[must_use] + pub fn new() -> Self { + Self { + state: Mutex::new(FakeContextSwitchState::default()), + } + } + + /// Return the number of [`ContextSwitch::context_switch`] calls so far. + /// + /// # Panics + /// + /// Panics if the internal mutex has been poisoned. + #[must_use] + pub fn switch_count(&self) -> u64 { + self.locked().switch_count + } + + /// Return the number of [`ContextSwitch::init_context`] calls so far. + /// + /// # Panics + /// + /// Panics if the internal mutex has been poisoned. + #[must_use] + pub fn init_count(&self) -> u64 { + self.locked().init_count + } + + fn locked(&self) -> std::sync::MutexGuard<'_, FakeContextSwitchState> { + self.state.lock().expect("FakeContextSwitch mutex poisoned") + } +} + +impl Default for FakeContextSwitch { + fn default() -> Self { + Self::new() + } +} + +impl ContextSwitch for FakeContextSwitch { + type TaskContext = FakeTaskContext; + + /// # Safety + /// + /// Inherits the [`ContextSwitch::context_switch`] trait contract, but + /// the fake performs **no** register save/restore and no stack swap — + /// it only marks `current.switched` and increments a counter. It + /// therefore never dereferences a real saved context and cannot + /// corrupt host state regardless of the caller's invariants. Callers + /// in tests still satisfy the contract (IRQs masked, valid contexts) + /// to mirror production call sequences. + unsafe fn context_switch(&self, current: &mut Self::TaskContext, _next: &Self::TaskContext) { + current.switched = true; + self.locked().switch_count += 1; + } + + /// # Safety + /// + /// Inherits the [`ContextSwitch::init_context`] trait contract. The + /// fake records the requested `entry` / `stack_top` (as `usize`) and + /// marks the context initialised; it neither dereferences `stack_top` + /// nor calls `entry`, so no real stack or function pointer is touched. + unsafe fn init_context( + &self, + ctx: &mut Self::TaskContext, + entry: fn() -> !, + stack_top: *mut u8, + ) { + ctx.initialized = true; + ctx.entry_addr = entry as *const () as usize; + ctx.stack_top = stack_top as usize; + self.locked().init_count += 1; + } +} + +#[cfg(test)] +mod tests { + use super::{FakeContextSwitch, FakeTaskContext}; + use tyrne_hal::ContextSwitch; + + fn never_returns() -> ! { + panic!("FakeContextSwitch never calls task entry points") + } + + #[test] + fn init_context_records_entry_stack_and_marks_initialized() { + let cs = FakeContextSwitch::new(); + let mut ctx = FakeTaskContext::default(); + let mut stack = [0u8; 512]; + let top = stack.as_mut_ptr().wrapping_add(stack.len()); + + // SAFETY: + // (a) `init_context` is `unsafe` — for a real CPU it installs `top` + // as the stack pointer and `never_returns` as the entry point. + // (b) `top` is one-past a live 512-byte stack and `never_returns` + // diverges; the fake only records the pointer, never derefs it. + // (c) The trait method is `unsafe` by contract; no safe shim exists. + unsafe { cs.init_context(&mut ctx, never_returns, top) }; + + assert!(ctx.initialized); + // Under Miri a function pointer cast to an integer is given a + // synthetic, non-stable address: two separate `fn as usize` + // exposures of the same function need not be equal (they are on real + // hardware). Assert exact equality only off-Miri; under Miri confirm + // a non-zero value was recorded. + #[cfg(not(miri))] + assert_eq!(ctx.entry_addr, never_returns as *const () as usize); + #[cfg(miri)] + assert_ne!(ctx.entry_addr, 0); + assert_eq!(ctx.stack_top, top as usize); + assert_eq!(cs.init_count(), 1); + assert_eq!(cs.switch_count(), 0); + } + + #[test] + fn context_switch_marks_current_and_counts() { + let cs = FakeContextSwitch::new(); + let mut a = FakeTaskContext::default(); + let b = FakeTaskContext::default(); + + // SAFETY: + // (a) `context_switch` is `unsafe` — for a real CPU it saves/restores + // callee-saved state through the two context pointers. + // (b) `a`/`b` are live `FakeTaskContext`s; the fake performs no real + // switch and only records, dereferencing no register state. + // (c) The trait method is `unsafe` by contract; no safe alternative. + unsafe { cs.context_switch(&mut a, &b) }; + assert!(a.switched); + assert!(!b.switched); + assert_eq!(cs.switch_count(), 1); + + // SAFETY: as the first `context_switch` call above — (a) the trait + // method is `unsafe`, (b) the fake only records over live contexts and + // derefs no register state, (c) no safe alternative to the `unsafe` API. + unsafe { cs.context_switch(&mut a, &b) }; + assert_eq!(cs.switch_count(), 2); + } + + #[test] + fn default_context_is_uninitialized_and_unswitched() { + let ctx = FakeTaskContext::default(); + assert!(!ctx.initialized); + assert!(!ctx.switched); + assert_eq!(ctx.entry_addr, 0); + assert_eq!(ctx.stack_top, 0); + } +} diff --git a/test-hal/src/cpu.rs b/test-hal/src/cpu.rs index f1be73b..5304fd9 100644 --- a/test-hal/src/cpu.rs +++ b/test-hal/src/cpu.rs @@ -98,13 +98,20 @@ impl Cpu for FakeCpu { fn disable_irqs(&self) -> IrqState { let mut state = self.locked(); - let prev = IrqState(usize::from(state.irqs_enabled)); + // DAIF-compatible polarity (matches the BSP `QemuVirtCpu`): the + // saved `IrqState.0` mirrors the DAIF mask convention where a + // *set* bit means IRQs are *masked*. So `IrqState(0)` = IRQs were + // ENABLED, non-zero = IRQs were DISABLED. Using boolean polarity + // here (0 = disabled) would make a shared fake invert production + // IRQ semantics — see master-review MR-017 / X4c-002. + let prev = IrqState(usize::from(!state.irqs_enabled)); state.irqs_enabled = false; prev } fn restore_irq_state(&self, state: IrqState) { - self.locked().irqs_enabled = state.0 != 0; + // DAIF polarity: `IrqState(0)` means IRQs enabled. + self.locked().irqs_enabled = state.0 == 0; } fn wait_for_interrupt(&self) { @@ -119,7 +126,7 @@ impl Cpu for FakeCpu { #[cfg(test)] mod tests { use super::FakeCpu; - use tyrne_hal::{Cpu, IrqGuard}; + use tyrne_hal::{Cpu, IrqGuard, IrqState}; #[test] fn default_cpu_reports_core_zero_with_irqs_enabled() { @@ -128,6 +135,26 @@ mod tests { assert!(cpu.irqs_enabled()); } + #[test] + fn irq_state_uses_daif_polarity_zero_means_enabled() { + // Canonical convention (matches the BSP `QemuVirtCpu`): + // `IrqState(0)` == IRQs enabled; non-zero == IRQs disabled. + let cpu = FakeCpu::new(); + // Starts enabled → disable_irqs saves "was enabled" as IrqState(0). + let prev = cpu.disable_irqs(); + assert_eq!(prev.0, 0, "saved state for previously-enabled IRQs is 0"); + assert!(!cpu.irqs_enabled()); + + // Restoring IrqState(0) re-enables; a non-zero token disables. + cpu.restore_irq_state(IrqState(0)); + assert!(cpu.irqs_enabled(), "IrqState(0) restores IRQs to enabled"); + cpu.restore_irq_state(IrqState(1)); + assert!( + !cpu.irqs_enabled(), + "non-zero IrqState restores to disabled" + ); + } + #[test] fn with_core_id_sets_reported_id() { let cpu = FakeCpu::with_core_id(3); diff --git a/test-hal/src/irq_controller.rs b/test-hal/src/irq_controller.rs index cfa13c4..b324698 100644 --- a/test-hal/src/irq_controller.rs +++ b/test-hal/src/irq_controller.rs @@ -4,6 +4,13 @@ use std::collections::{HashSet, VecDeque}; use std::sync::Mutex; use tyrne_hal::{IrqController, IrqNumber}; +/// Architectural maximum INTID; mirrors `QemuVirtGic`'s `GIC_MAX_IRQ` +/// (= 1020). Any `enable` / `disable` call with `irq.0 >= FAKE_MAX_IRQ` +/// panics, matching the real BSP, so kernel logic that miscomputes an +/// out-of-range IRQ number fails on the host just as it would on hardware +/// (where the register offset would escape the distributor MMIO window). +const FAKE_MAX_IRQ: u32 = 1020; + /// A [`IrqController`] whose enable set, pending queue, and EOI history /// are visible to tests. /// @@ -92,10 +99,25 @@ impl Default for FakeIrqController { impl IrqController for FakeIrqController { fn enable(&self, irq: IrqNumber) { + // Mirror QemuVirtGic::enable's architectural range guard so a + // kernel bug that constructs an out-of-range IrqNumber fails on + // the host instead of giving false confidence (C8-004 / X4c-009). + assert!( + irq.0 < FAKE_MAX_IRQ, + "FakeIrqController::enable: irq.0 = {} exceeds architectural max {}", + irq.0, + FAKE_MAX_IRQ, + ); self.locked().enabled.insert(irq); } fn disable(&self, irq: IrqNumber) { + assert!( + irq.0 < FAKE_MAX_IRQ, + "FakeIrqController::disable: irq.0 = {} exceeds architectural max {}", + irq.0, + FAKE_MAX_IRQ, + ); self.locked().enabled.remove(&irq); } @@ -176,6 +198,22 @@ mod tests { assert_eq!(ic.eoi_history(), vec![IrqNumber(30)]); } + #[test] + #[should_panic(expected = "exceeds architectural max")] + fn enable_panics_on_out_of_range_irq() { + let ic = FakeIrqController::new(); + // 1020 is the first out-of-range INTID (== FAKE_MAX_IRQ); the + // spurious sentinel 1023 is also above the bound. + ic.enable(IrqNumber(1020)); + } + + #[test] + #[should_panic(expected = "exceeds architectural max")] + fn disable_panics_on_out_of_range_irq() { + let ic = FakeIrqController::new(); + ic.disable(IrqNumber(1023)); + } + #[test] fn disabled_irq_can_still_be_injected_for_test_purposes() { // The fake does not enforce enable-gating on inject; it's up to diff --git a/test-hal/src/lib.rs b/test-hal/src/lib.rs index c4cb26e..b054c3a 100644 --- a/test-hal/src/lib.rs +++ b/test-hal/src/lib.rs @@ -10,23 +10,32 @@ //! [`docs/standards/testing.md`][testing-doc] for the test discipline this //! supports. //! -//! [adr-0006]: https://github.com/cemililik/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md -//! [testing-doc]: https://github.com/cemililik/Tyrne/blob/main/docs/standards/testing.md +//! [adr-0006]: https://github.com/HodeTech/Tyrne/blob/main/docs/decisions/0006-workspace-layout.md +//! [testing-doc]: https://github.com/HodeTech/Tyrne/blob/main/docs/standards/testing.md //! //! ## Status //! -//! All five Phase 4b HAL traits now have fakes: +//! All six accepted HAL traits now have fakes: //! [`FakeConsole`] (ADR-0007), [`FakeCpu`] (ADR-0008), [`FakeMmu`] -//! (ADR-0009), [`FakeTimer`] (ADR-0010), [`FakeIrqController`] (ADR-0011). +//! (ADR-0009), [`FakeTimer`] (ADR-0010), [`FakeIrqController`] (ADR-0011), +//! and [`FakeContextSwitch`] (ADR-0020). +//! +//! Two failure-injecting [`tyrne_hal::Mmu`] decorators wrap [`FakeMmu`] +//! so kernel rollback tests can exercise the two error variants the flat +//! `FakeMmu` cannot itself produce: [`OutOfFramesMmu`] +//! ([`tyrne_hal::MmuError::OutOfFrames`]) and [`BlockMappedMmu`] +//! ([`tyrne_hal::MmuError::BlockMapped`]). mod console; +mod context_switch; mod cpu; mod irq_controller; mod mmu; mod timer; pub use console::FakeConsole; +pub use context_switch::{FakeContextSwitch, FakeTaskContext}; pub use cpu::FakeCpu; pub use irq_controller::FakeIrqController; -pub use mmu::{FakeAddressSpace, FakeMmu, VecFrameProvider}; +pub use mmu::{BlockMappedMmu, FakeAddressSpace, FakeMmu, OutOfFramesMmu, VecFrameProvider}; pub use timer::FakeTimer; diff --git a/test-hal/src/mmu.rs b/test-hal/src/mmu.rs index d6ccdc9..53bb519 100644 --- a/test-hal/src/mmu.rs +++ b/test-hal/src/mmu.rs @@ -11,6 +11,25 @@ use tyrne_hal::{ /// Pops from the end, so the order in which frames are consumed is the /// reverse of insertion order. Tests can query [`Self::remaining`] to /// check how many frames were used. +/// +/// # Contract note — frames are NOT zero-filled +/// +/// The [`FrameProvider::alloc_frame`] contract requires zero-initialised +/// frames (the real [`Pmm`][pmm] zero-fills before returning, and the BSP +/// page-table walker *reads* the resulting zeroed descriptor slots). +/// `VecFrameProvider` does **not** zero-fill: a [`PhysFrame`] in the fake +/// is a typed *address*, not a region of backing bytes, so there is +/// nothing to zero. This satisfies the contract only **vacuously** — +/// [`FakeMmu`] (and the [`OutOfFramesMmu`] / [`BlockMappedMmu`] +/// decorators) never dereference a frame's physical memory. +/// +/// If a future fake is added that *reads* frame contents (e.g. one that +/// walks a simulated page-table tree), the caller is responsible for +/// ensuring the inserted frames point at genuinely zero-initialised +/// backing memory; pairing such a fake with `VecFrameProvider` as-is +/// would feed it non-zero descriptor bytes. +/// +/// [pmm]: https://github.com/HodeTech/Tyrne/blob/main/kernel/src/mm/pmm.rs pub struct VecFrameProvider { available: Vec, } @@ -61,6 +80,34 @@ impl FakeAddressSpace { /// A [`Mmu`] that records activations, TLB invalidations, and mapping /// operations for test assertions. +/// +/// # Intrinsic fidelity gap +/// +/// `FakeMmu` models mappings as a **flat `HashMap`** keyed by virtual +/// address; it has no multi-level page-table structure. Two `MmuError` +/// variants the real [`QemuVirtMmu`][bsp] can return are therefore +/// **never** produced by `FakeMmu`: +/// +/// - [`MmuError::OutOfFrames`] — raised by the real walker when an +/// intermediate-table allocation fails mid-walk. `FakeMmu::map` +/// ignores its `FrameProvider` (no intermediate tables to allocate), +/// so it cannot exhaust it. Use [`OutOfFramesMmu`] to exercise the +/// kernel's mid-walk `OutOfFrames` rollback path (`load_image` / +/// `cap_map` failure-semantics clause (2): `pa` is not consumed). +/// - [`MmuError::BlockMapped`] — raised by the real walker when a walk +/// hits a 2 MiB block descriptor at L1/L2 (e.g. the bootstrap block +/// mappings). `FakeMmu` has no block descriptors. Use +/// [`BlockMappedMmu`] to exercise kernel code that distinguishes +/// `BlockMapped` from `NotMapped`. +/// +/// Everything `FakeMmu` *does* model is bit-for-bit faithful to the real +/// impl (VA-alignment rejection, `DEVICE | EXECUTE` rejection, double-map +/// → `AlreadyMapped`, unmap-missing → `NotMapped`, the `MapperFlush` +/// token discipline). The injecting decorators above wrap a `FakeMmu` and +/// add exactly one failure mode each, delegating the success path +/// unchanged. +/// +/// [bsp]: https://github.com/HodeTech/Tyrne/blob/main/bsp-qemu-virt/src/mmu.rs pub struct FakeMmu { state: Mutex, } @@ -130,7 +177,23 @@ impl Default for FakeMmu { impl Mmu for FakeMmu { type AddressSpace = FakeAddressSpace; + /// # Safety + /// + /// Inherits the [`Mmu::create_address_space`] trait-declaration + /// contract (`root` page-aligned, exclusively owned, zero-filled). + /// `FakeMmu` upholds it *vacuously*: the body never dereferences + /// `root`'s physical memory — it stores the `PhysFrame` value (an + /// aligned address) into a host-side `HashMap`-backed + /// [`FakeAddressSpace`]. The zero-fill and exclusive-ownership + /// pre-conditions therefore cannot be observed; alignment is enforced + /// upstream by [`PhysFrame::from_aligned`]. unsafe fn create_address_space(&self, root: PhysFrame) -> FakeAddressSpace { + // SAFETY: no unsafe operation in this body — `root` is stored, not + // dereferenced. Per unsafe-policy §4, this alloc-free trait-impl + // `unsafe fn` inherits the trait declaration's `# Safety` contract; + // it is a host-only test double and warrants no audit-log entry + // (test-harness `unsafe` is exempt from individual log entries when + // confined to test doubles — see unsafe-policy §3 / X3-003). FakeAddressSpace { root, mappings: HashMap::new(), @@ -151,6 +214,12 @@ impl Mmu for FakeMmu { va: VirtAddr, pa: PhysFrame, flags: MappingFlags, + // `frames` is accepted for trait-signature compatibility but not + // consumed: `FakeMmu` uses a flat `HashMap` and has no + // intermediate page-table structure to allocate, so it never + // returns `MmuError::OutOfFrames` regardless of how many frames + // are available. See the `FakeMmu` struct-doc fidelity gap and + // `OutOfFramesMmu` for the decorator that exercises that path. _frames: &mut dyn FrameProvider, ) -> Result { // Mirror the real `Mmu` contract: VA must be `PAGE_SIZE`-aligned. @@ -201,9 +270,281 @@ impl Mmu for FakeMmu { } } +// ── Failure-injecting decorator MMUs ────────────────────────────────────────── +// +// `FakeMmu`'s flat-HashMap design cannot reproduce two `MmuError` variants the +// real `QemuVirtMmu` returns: `OutOfFrames` (mid-walk intermediate-table +// allocation failure) and `BlockMapped` (walk hits a 2 MiB block descriptor). +// Kernel rollback logic (`load_image`, `cap_map`, `cap_unmap`) rides those +// clauses, so the two decorators below let host tests drive both failure paths. +// Each wraps a `FakeMmu`, reuses `FakeAddressSpace`, and delegates the success +// path verbatim — adding exactly one injected failure mode. + +/// A [`Mmu`] decorator over [`FakeMmu`] that returns +/// [`MmuError::OutOfFrames`] from [`Mmu::map`] once its +/// [`FrameProvider`] is exhausted, modelling the real walker's mid-walk +/// intermediate-table allocation failure. +/// +/// Each successful `map` call consumes **one** frame from the provider +/// passed to `map` (standing in for one intermediate page-table frame). +/// When the provider returns `None`, `map` returns `OutOfFrames` +/// **before** touching the address space, honouring the [`Mmu::map`] +/// failure-semantics contract: no mapping at `va`, and `pa` is **not** +/// consumed (the caller may safely return it to its provider). All other +/// methods delegate to the inner [`FakeMmu`] unchanged. +/// +/// # Example +/// +/// ``` +/// use tyrne_test_hal::{OutOfFramesMmu, VecFrameProvider}; +/// use tyrne_hal::{MappingFlags, Mmu, MmuError, PhysAddr, PhysFrame, VirtAddr}; +/// +/// let frame = |a| PhysFrame::from_aligned(PhysAddr(a)).unwrap(); +/// let mmu = OutOfFramesMmu::new(); +/// // SAFETY: the inner FakeMmu never dereferences `root`. +/// let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; +/// +/// // A provider with zero frames → the first map fails with OutOfFrames. +/// let mut empty = VecFrameProvider::new(vec![]); +/// let err = mmu +/// .map(&mut as_, VirtAddr(0x4000), frame(0x8000), MappingFlags::WRITE, &mut empty) +/// .unwrap_err(); +/// assert_eq!(err, MmuError::OutOfFrames); +/// // pa was not consumed and no mapping was installed. +/// assert_eq!(as_.mapping_count(), 0); +/// ``` +pub struct OutOfFramesMmu { + inner: FakeMmu, +} + +impl OutOfFramesMmu { + /// Construct an `OutOfFramesMmu` wrapping a fresh [`FakeMmu`]. + #[must_use] + pub fn new() -> Self { + Self { + inner: FakeMmu::new(), + } + } + + /// Borrow the inner [`FakeMmu`] for activation / TLB introspection + /// (e.g. [`FakeMmu::activated_root`], + /// [`FakeMmu::tlb_address_invalidations`]). + #[must_use] + pub fn inner(&self) -> &FakeMmu { + &self.inner + } +} + +impl Default for OutOfFramesMmu { + fn default() -> Self { + Self::new() + } +} + +impl Mmu for OutOfFramesMmu { + type AddressSpace = FakeAddressSpace; + + /// # Safety + /// + /// Inherits [`Mmu::create_address_space`]; delegates to the inner + /// [`FakeMmu`], which never dereferences `root`. + unsafe fn create_address_space(&self, root: PhysFrame) -> FakeAddressSpace { + // SAFETY: forwards to FakeMmu::create_address_space, an alloc-free + // store of an aligned `PhysFrame`. See FakeMmu's `# Safety`. + unsafe { self.inner.create_address_space(root) } + } + + fn address_space_root(&self, as_: &Self::AddressSpace) -> PhysFrame { + self.inner.address_space_root(as_) + } + + fn activate(&self, as_: &Self::AddressSpace) { + self.inner.activate(as_); + } + + fn map( + &self, + as_: &mut FakeAddressSpace, + va: VirtAddr, + pa: PhysFrame, + flags: MappingFlags, + frames: &mut dyn FrameProvider, + ) -> Result { + // Validate + insert via the inner FakeMmu FIRST (it runs the + // alignment / flag / double-map checks and ignores its own `frames` + // argument). A non-OutOfFrames rejection therefore returns WITHOUT + // consuming a provider frame — matching the real walker, which + // validates before allocating any intermediate table. + let flush = self.inner.map(as_, va, pa, flags, frames)?; + // Then model one intermediate-table allocation. If the provider is + // empty, roll the just-inserted mapping back and report OutOfFrames, + // so the only path that returns OutOfFrames leaves no mapping and the + // only path that consumes a provider frame is a fully successful map. + if frames.alloc_frame().is_none() { + // Undo the insert; unmap cannot fail for a VA mapped one line above. + let _ = self.inner.unmap(as_, va); + return Err(MmuError::OutOfFrames); + } + Ok(flush) + } + + fn unmap( + &self, + as_: &mut FakeAddressSpace, + va: VirtAddr, + ) -> Result<(MapperFlush, PhysFrame), MmuError> { + self.inner.unmap(as_, va) + } + + fn invalidate_tlb_address(&self, va: VirtAddr) { + self.inner.invalidate_tlb_address(va); + } + + fn invalidate_tlb_all(&self) { + self.inner.invalidate_tlb_all(); + } +} + +/// A [`Mmu`] decorator over [`FakeMmu`] that injects +/// [`MmuError::BlockMapped`] for a configured set of virtual addresses, +/// modelling the real walker hitting a 2 MiB block descriptor at L1/L2. +/// +/// A VA registered via [`Self::block`] (or [`Self::with_blocked`]) makes +/// both [`Mmu::map`] and [`Mmu::unmap`] return `BlockMapped` for that VA +/// (checked **before** any address-space mutation, so the failure +/// semantics — no state change, `pa` not consumed — hold). Any VA not in +/// the blocked set delegates to the inner [`FakeMmu`] unchanged, so the +/// success path and the `NotMapped` / `AlreadyMapped` / alignment +/// behaviours stay faithful. +/// +/// # Example +/// +/// ``` +/// use tyrne_test_hal::BlockMappedMmu; +/// use tyrne_hal::{MappingFlags, Mmu, MmuError, PhysAddr, PhysFrame, VirtAddr}; +/// +/// let frame = |a| PhysFrame::from_aligned(PhysAddr(a)).unwrap(); +/// let mmu = BlockMappedMmu::with_blocked([VirtAddr(0x4000)]); +/// // SAFETY: the inner FakeMmu never dereferences `root`. +/// let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; +/// +/// // unmap of a blocked VA surfaces BlockMapped, distinct from NotMapped. +/// let err = mmu.unmap(&mut as_, VirtAddr(0x4000)).unwrap_err(); +/// assert_eq!(err, MmuError::BlockMapped); +/// // A non-blocked VA falls through to the inner FakeMmu (NotMapped here). +/// let err = mmu.unmap(&mut as_, VirtAddr(0x5000)).unwrap_err(); +/// assert_eq!(err, MmuError::NotMapped); +/// ``` +pub struct BlockMappedMmu { + inner: FakeMmu, + blocked: std::collections::HashSet, +} + +impl BlockMappedMmu { + /// Construct a `BlockMappedMmu` with no blocked addresses (delegates + /// everything to the inner [`FakeMmu`] until [`Self::block`] is + /// called). + #[must_use] + pub fn new() -> Self { + Self { + inner: FakeMmu::new(), + blocked: std::collections::HashSet::new(), + } + } + + /// Construct a `BlockMappedMmu` pre-loaded with the given blocked + /// virtual addresses. + #[must_use] + pub fn with_blocked(addrs: impl IntoIterator) -> Self { + Self { + inner: FakeMmu::new(), + blocked: addrs.into_iter().collect(), + } + } + + /// Register `va` so that subsequent `map` / `unmap` on it return + /// [`MmuError::BlockMapped`]. + pub fn block(&mut self, va: VirtAddr) { + self.blocked.insert(va); + } + + /// Borrow the inner [`FakeMmu`] for activation / TLB introspection. + #[must_use] + pub fn inner(&self) -> &FakeMmu { + &self.inner + } + + fn is_blocked(&self, va: VirtAddr) -> bool { + self.blocked.contains(&va) + } +} + +impl Default for BlockMappedMmu { + fn default() -> Self { + Self::new() + } +} + +impl Mmu for BlockMappedMmu { + type AddressSpace = FakeAddressSpace; + + /// # Safety + /// + /// Inherits [`Mmu::create_address_space`]; delegates to the inner + /// [`FakeMmu`], which never dereferences `root`. + unsafe fn create_address_space(&self, root: PhysFrame) -> FakeAddressSpace { + // SAFETY: forwards to FakeMmu::create_address_space, an alloc-free + // store of an aligned `PhysFrame`. See FakeMmu's `# Safety`. + unsafe { self.inner.create_address_space(root) } + } + + fn address_space_root(&self, as_: &Self::AddressSpace) -> PhysFrame { + self.inner.address_space_root(as_) + } + + fn activate(&self, as_: &Self::AddressSpace) { + self.inner.activate(as_); + } + + fn map( + &self, + as_: &mut FakeAddressSpace, + va: VirtAddr, + pa: PhysFrame, + flags: MappingFlags, + frames: &mut dyn FrameProvider, + ) -> Result { + // Inject BlockMapped before any state change: no mapping at `va`, + // `pa` not consumed — honours the Mmu::map failure contract. + if self.is_blocked(va) { + return Err(MmuError::BlockMapped); + } + self.inner.map(as_, va, pa, flags, frames) + } + + fn unmap( + &self, + as_: &mut FakeAddressSpace, + va: VirtAddr, + ) -> Result<(MapperFlush, PhysFrame), MmuError> { + if self.is_blocked(va) { + return Err(MmuError::BlockMapped); + } + self.inner.unmap(as_, va) + } + + fn invalidate_tlb_address(&self, va: VirtAddr) { + self.inner.invalidate_tlb_address(va); + } + + fn invalidate_tlb_all(&self) { + self.inner.invalidate_tlb_all(); + } +} + #[cfg(test)] mod tests { - use super::{FakeMmu, VecFrameProvider}; + use super::{BlockMappedMmu, FakeMmu, OutOfFramesMmu, VecFrameProvider}; use tyrne_hal::{MapperFlush, MappingFlags, Mmu, MmuError, PhysAddr, PhysFrame, VirtAddr}; fn frame(addr: usize) -> PhysFrame { @@ -536,4 +877,102 @@ mod tests { assert_eq!(err, MmuError::InvalidFlags); assert_eq!(as_.mapping_count(), 0); } + + // ── Failure-injecting decorators ────────────────────────────────────────── + + #[test] + fn out_of_frames_mmu_maps_while_frames_available() { + let mmu = OutOfFramesMmu::new(); + // SAFETY: the inner FakeMmu never dereferences `root`. + let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; + let mut fp = VecFrameProvider::new(vec![frame(0x2000)]); + + mmu.map( + &mut as_, + VirtAddr(0x4000), + frame(0x8000), + MappingFlags::WRITE, + &mut fp, + ) + .expect("map must succeed while a frame is available") + .flush(mmu.inner()); + assert_eq!(as_.mapping_count(), 1); + assert_eq!(fp.remaining(), 0, "one frame must have been consumed"); + } + + #[test] + fn out_of_frames_mmu_returns_out_of_frames_when_provider_empty() { + let mmu = OutOfFramesMmu::new(); + // SAFETY: the inner FakeMmu never dereferences `root`. + let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; + let mut fp = VecFrameProvider::new(vec![]); + + let err = mmu + .map( + &mut as_, + VirtAddr(0x4000), + frame(0x8000), + MappingFlags::WRITE, + &mut fp, + ) + .expect_err("empty provider must yield OutOfFrames"); + assert_eq!(err, MmuError::OutOfFrames); + // Failure semantics: no mapping at va, pa not consumed. + assert_eq!(as_.mapping_count(), 0, "failed map must not mutate the AS"); + } + + #[test] + fn block_mapped_mmu_injects_block_mapped_on_map_and_unmap() { + let mmu = BlockMappedMmu::with_blocked([VirtAddr(0x4000)]); + // SAFETY: the inner FakeMmu never dereferences `root`. + let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; + let mut fp = VecFrameProvider::new(vec![]); + + let map_err = mmu + .map( + &mut as_, + VirtAddr(0x4000), + frame(0x8000), + MappingFlags::WRITE, + &mut fp, + ) + .expect_err("blocked VA must fail map with BlockMapped"); + assert_eq!(map_err, MmuError::BlockMapped); + assert_eq!(as_.mapping_count(), 0); + + let unmap_err = mmu + .unmap(&mut as_, VirtAddr(0x4000)) + .expect_err("blocked VA must fail unmap with BlockMapped"); + assert_eq!(unmap_err, MmuError::BlockMapped); + } + + #[test] + fn block_mapped_mmu_delegates_unblocked_addresses() { + let mut mmu = BlockMappedMmu::new(); + mmu.block(VirtAddr(0x4000)); + // SAFETY: the inner FakeMmu never dereferences `root`. + let mut as_ = unsafe { mmu.create_address_space(frame(0x1000)) }; + let mut fp = VecFrameProvider::new(vec![]); + + // An unblocked VA falls through to the inner FakeMmu: a successful + // map, then unmap-missing → NotMapped (distinct from BlockMapped). + mmu.map( + &mut as_, + VirtAddr(0x5000), + frame(0x9000), + MappingFlags::WRITE, + &mut fp, + ) + .expect("unblocked map must succeed") + .flush(mmu.inner()); + let (_flush, returned) = mmu + .unmap(&mut as_, VirtAddr(0x5000)) + .expect("unblocked unmap must succeed"); + assert_eq!(returned, frame(0x9000)); + + let err = mmu + .unmap(&mut as_, VirtAddr(0x6000)) + .expect_err("missing VA must be NotMapped, not BlockMapped"); + assert_eq!(err, MmuError::NotMapped); + } } diff --git a/tools/perf-harness.sh b/tools/perf-harness.sh index 310da60..c9c96f8 100755 --- a/tools/perf-harness.sh +++ b/tools/perf-harness.sh @@ -29,13 +29,13 @@ # is unusually slow on the host. # # Failure handling: a run that does not emit a boot-to-end line within the -# timeout is counted as a failure. If fewer than 50 % of runs produced a -# valid sample, the harness exits non-zero — that threshold is treated as -# environmental (kernel image missing, QEMU not in PATH, host under heavy -# load). If 50-100 % of runs are valid, statistics are computed over the -# valid samples only and the failure count is reported alongside. +# timeout is counted as a failure. If fewer than ⌈n/2⌉ of the n runs produced +# a valid sample (i.e. fewer than half, rounding up), the harness exits +# non-zero — that threshold is treated as environmental (kernel image missing, +# QEMU not in PATH, host under heavy load). Otherwise statistics are computed +# over the valid samples only and the failure count is reported alongside. # -# Exits 0 on success (>= 50 % valid runs), 1 on environmental failure, +# Exits 0 on success (>= ⌈n/2⌉ valid runs), 1 on environmental failure, # 2 on argument errors. set -euo pipefail @@ -316,9 +316,12 @@ if [[ "$VALID_COUNT" -eq 0 ]]; then exit 1 fi -# 50 % failure-rate threshold: below it, we treat the run as environmental -# rather than a measurement worth aggregating. The brief explicitly asked -# for a clear error in this case. +# Half-of-iterations threshold: the run must produce at least ⌈n/2⌉ valid +# samples (i.e. half, rounding UP) or we treat it as environmental rather +# than a measurement worth aggregating. The brief explicitly asked for a +# clear error in this case. Note for odd n the round-up means slightly more +# than 50 % is required (e.g. n=5 ⇒ HALF=3 ⇒ needs 3/5 = 60 %); this matches +# the "at least ⌈n/2⌉ valid runs" wording in docs/standards/infrastructure.md. HALF=$(( (ITERATIONS + 1) / 2 )) if [[ "$VALID_COUNT" -lt "$HALF" ]]; then echo "error: only $VALID_COUNT/$ITERATIONS iterations produced a boot-to-end sample" >&2 @@ -471,6 +474,18 @@ if [[ -n "$REPORT_CONTEXT" ]]; then exit 1 fi + # Baseline reports are append-only artefacts (see + # docs/standards/infrastructure.md §"Reporting discipline"): re-baselines + # land as fresh reports with a new context slug, never by overwriting an + # existing one. Refuse to clobber so the discipline is enforced rather than + # merely conventional. (The stats are already on stdout above, so nothing + # is lost — only the file write is skipped.) + if [[ -e "$REPORT_PATH" ]]; then + echo "error: refusing to overwrite existing report: ${REPORT_PATH#${REPO_ROOT}/}" >&2 + echo "hint: baseline reports are append-only; pick a fresh --report=CONTEXT slug" >&2 + exit 1 + fi + # Strip a leading `YYYY-MM-DD-` from the context for the title so we don't # render `2026-05-08 — 2026-05-08-foo` on dated contexts. case "$REPORT_CONTEXT" in diff --git a/tools/run-qemu.sh b/tools/run-qemu.sh index 808def3..3baf6c4 100755 --- a/tools/run-qemu.sh +++ b/tools/run-qemu.sh @@ -4,15 +4,18 @@ # Usage: # tools/run-qemu.sh — debug build # tools/run-qemu.sh --release — release build -# tools/run-qemu.sh --int-log — log exceptions to /tmp/qemu_int.log +# tools/run-qemu.sh --int-log — log exceptions (PID-suffixed temp file) # tools/run-qemu.sh — explicit ELF path +# tools/run-qemu.sh -h | --help — show this usage # -# --int-log adds -d int -D /tmp/qemu_int.log to the QEMU invocation. +# --int-log adds `-d int -D ` to the QEMU invocation, where +# is ${TMPDIR:-/tmp}/qemu_int..log (printed at startup). # Use it when the kernel hangs silently to see what exception fired. -# After the run: grep "Taking exception" /tmp/qemu_int.log +# After the run: grep "Taking exception" # # See docs/guides/run-under-qemu.md for the full walkthrough and the -# manual invocation used under the hood. +# manual invocation used under the hood. The QEMU invocation below is +# kept in sync with the `runner` line in .cargo/config.toml. set -euo pipefail @@ -20,6 +23,12 @@ BUILD_PROFILE="debug" KERNEL="" INT_LOG="" +usage() { + # Echo the usage block above (lines after the shebang up to the first + # blank line), stripping the leading "# ". + sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//' >&2 +} + for arg in "$@"; do case "$arg" in --release) @@ -28,7 +37,22 @@ for arg in "$@"; do --int-log) INT_LOG="yes" ;; + -h|--help) + usage + exit 0 + ;; + --*) + echo "error: unknown flag: $arg" >&2 + usage + exit 2 + ;; *) + if [[ -n "$KERNEL" ]]; then + echo "error: unexpected extra argument: $arg" >&2 + echo " (the kernel path was already set to: $KERNEL)" >&2 + usage + exit 2 + fi KERNEL="$arg" ;; esac @@ -53,8 +77,12 @@ fi INT_LOG_FLAGS=() if [[ -n "$INT_LOG" ]]; then - INT_LOG_FLAGS=(-d int -D /tmp/qemu_int.log) - echo "exception log → /tmp/qemu_int.log (grep 'Taking exception' to inspect)" >&2 + # PID-suffix the log so concurrent runs (or two users on a shared host) + # do not clobber each other's exception traces. ${TMPDIR:-/tmp} honours a + # per-user temp dir when one is set. + INT_LOG_PATH="${TMPDIR:-/tmp}/qemu_int.$$.log" + INT_LOG_FLAGS=(-d int -D "$INT_LOG_PATH") + echo "exception log → ${INT_LOG_PATH} (grep 'Taking exception' to inspect)" >&2 fi exec qemu-system-aarch64 \